From 5304402f05369d7205b1f028e3d6ba574979a035 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Thu, 21 Nov 2024 13:03:53 -0700 Subject: [PATCH 01/88] [SM6.9] Allow native vectors longer than 4 Remove errors in Sema diagnostics for vectors longer than 4 in 6.9. Test for failures using long vectors in unspported contexts and for correct codegen in supported contexts. Verify errors persist in pre-6.9 shader models The type buffer cache expects a max vector size of 4. By just skipping the cache for longer vectors, we don't overrun and store float7 vectors in the double3 slot or retrieve the double3 in place of float7. Testing is for acceptance, mangling and basic copying that takes place at the high level to ensure they are being accepted and recognized correctly. The intent is not to tully test the passing of data as that requires enabling vector operations to do properly. This test is used to verify that these same constructs are disallowed in 6.8 and earlier. A separate test verifies that disallowed contexts produce the appropriate errors Fixes #7117 --- tools/clang/lib/Sema/SemaHLSL.cpp | 12 +- .../CodeGenDXIL/hlsl/types/longvec_decls.hlsl | 263 ++++++++++++++++++ .../hlsl/types/invalid_longvecs_sm68.hlsl | 34 +++ 3 files changed, 306 insertions(+), 3 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec_decls.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/types/invalid_longvecs_sm68.hlsl diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index ba0801dd52..69cd2a88e3 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -3928,7 +3928,9 @@ class HLSLExternalSource : public ExternalSemaSource { } QualType LookupVectorType(HLSLScalarType scalarType, unsigned int colCount) { - QualType qt = m_vectorTypes[scalarType][colCount - 1]; + QualType qt; + if (colCount < 4) + qt = m_vectorTypes[scalarType][colCount - 1]; if (qt.isNull()) { if (m_scalarTypes[scalarType].isNull()) { LookupScalarTypeDef(scalarType); @@ -3936,7 +3938,8 @@ class HLSLExternalSource : public ExternalSemaSource { qt = GetOrCreateVectorSpecialization(*m_context, m_sema, m_vectorTemplateDecl, m_scalarTypes[scalarType], colCount); - m_vectorTypes[scalarType][colCount - 1] = qt; + if (colCount < 4) + m_vectorTypes[scalarType][colCount - 1] = qt; } return qt; } @@ -5055,7 +5058,10 @@ class HLSLExternalSource : public ExternalSemaSource { bool CheckRangedTemplateArgument(SourceLocation diagLoc, llvm::APSInt &sintValue) { - if (!sintValue.isStrictlyPositive() || sintValue.getLimitedValue() > 4) { + const auto *SM = + hlsl::ShaderModel::GetByName(m_sema->getLangOpts().HLSLProfile.c_str()); + if (!sintValue.isStrictlyPositive() || + (sintValue.getLimitedValue() > 4 && !SM->IsSM69Plus())) { m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_4); return true; } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec_decls.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec_decls.hlsl new file mode 100644 index 0000000000..d6672e7678 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec_decls.hlsl @@ -0,0 +1,263 @@ +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float -DNUM=7 %s | FileCheck %s +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=bool -DNUM=7 %s | FileCheck %s +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=uint64_t -DNUM=7 %s | FileCheck %s +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=double -DNUM=7 %s | FileCheck %s +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float16_t -DNUM=7 -enable-16bit-types %s | FileCheck %s +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=int16_t -DNUM=7 -enable-16bit-types %s | FileCheck %s + +// A test to verify that declarations of longvecs are permitted in all the accepted places. +// Only tests for acceptance, most codegen is ignored for now. + +// CHECK: %struct.LongVec = type { <4 x float>, <7 x [[STY:[a-z0-9]*]]> } +struct LongVec { + float4 f; + vector vec; +}; + + +// Just some dummies to capture the types and mangles. +// CHECK: @"\01?dummy@@3[[MNG:F|M|N|_N|_K|\$f16@]]A" = external addrspace(3) global [[STY]] +groupshared TYPE dummy; + +// CHECK-DAG: @"\01?gs_vec@@3V?$vector@[[MNG]]$06@@A" = external addrspace(3) global <7 x [[STY]]> +// CHECK-DAG: @"\01?gs_vec_arr@@3PAV?$vector@[[MNG]]$06@@A" = external addrspace(3) global [10 x <7 x [[STY]]>] +// CHECK-DAG: @"\01?gs_vec_rec@@3ULongVec@@A" = external addrspace(3) global %struct.LongVec +groupshared vector gs_vec; +groupshared vector gs_vec_arr[10]; +groupshared LongVec gs_vec_rec; + +// CHECK-DAG: @static_vec = internal global <7 x [[STY]]> +// CHECK-DAG: @static_vec_arr = internal global [10 x <7 x [[STY]]>] zeroinitializer +// CHECK-DAG: @static_vec_rec = internal global %struct.LongVec +static vector static_vec; +static vector static_vec_arr[10]; +static LongVec static_vec_rec; + +// CHECK: define [[RTY:[a-z0-9]*]] @"\01?getVal@@YA[[MNG]][[MNG]]@Z"([[RTY]] {{.*}}%t) +export TYPE getVal(TYPE t) {TYPE ret = dummy; dummy = t; return ret;} + +// CHECK: define <7 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_param_passthru +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@V1@@Z"(<7 x [[RTY]]> %vec1) +// CHECK: ret <7 x [[RTY]]> +export vector lv_param_passthru(vector vec1) { + vector ret = vec1; + return ret; +} + +// CHECK-LABEL: define void @"\01?lv_param_in_out +// CHECK-SAME: @@YAXV?$vector@[[MNG]]$06@@AIAV1@@Z"(<7 x [[RTY]]> %vec1, <7 x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2) +// CHECK: store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* %vec2, align 4 +// CHECK: ret void +export void lv_param_in_out(in vector vec1, out vector vec2) { + vec2 = vec1; +} + +// CHECK-LABEL: define void @"\01?lv_param_inout +// CHECK-SAME: @@YAXAIAV?$vector@[[MNG]]$06@@0@Z"(<7 x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec1, <7 x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2) +// CHECK: load <7 x [[STY]]>, <7 x [[STY]]>* %vec1, align 4 +// CHECK: load <7 x [[STY]]>, <7 x [[STY]]>* %vec2, align 4 +// CHECK: store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* %vec1, align 4 +// CHECK: store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* %vec2, align 4 +// CHECK: ret void +export void lv_param_inout(inout vector vec1, inout vector vec2) { + vector tmp = vec1; + vec1 = vec2; + vec2 = tmp; +} + +// CHECK-LABEL: define void @"\01?lv_param_in_out_rec@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_in_out_rec(in LongVec vec1, out LongVec vec2) { + vec2 = vec1; +} + +// CHECK-LABEL: define void @"\01?lv_param_inout_rec@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_inout_rec(inout LongVec vec1, inout LongVec vec2) { + LongVec tmp = vec1; + vec1 = vec2; + vec2 = tmp; +} + +// CHECK-LABEL: define void @"\01?lv_global_assign +// CHECK-SAME: @@YAXV?$vector@[[MNG]]$06@@@Z"(<7 x [[RTY]]> %vec) +// CHECK: store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* @static_vec +// CHECK: ret void +export void lv_global_assign(vector vec) { + static_vec = vec; +} + +// CHECK: define <7 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_global_ret +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@XZ"() +// CHECK: load <7 x [[STY]]>, <7 x [[STY]]>* @static_vec +// CHECK: ret <7 x [[RTY]]> +export vector lv_global_ret() { + vector ret = static_vec; + return ret; +} + +// CHECK-LABEL: define void @"\01?lv_gs_assign +// CHECK-SAME: @@YAXV?$vector@[[MNG]]$06@@@Z"(<7 x [[RTY]]> %vec) +// CHECK: store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$06@@A" +// CHECK: ret void +export void lv_gs_assign(vector vec) { + gs_vec = vec; +} + +// CHECK: define <7 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_gs_ret +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@XZ"() +// CHECK: load <7 x [[STY]]>, <7 x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$06@@A" +// CHECK: ret <7 x [[RTY]]> +export vector lv_gs_ret() { + vector ret = gs_vec; + return ret; +} + +#define DIMS 10 + +// CHECK-LABEL: define void @"\01?lv_param_arr_passthru +// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$06@@V1@@Z"([10 x <7 x [[STY]]>]* noalias sret %agg.result, <7 x [[RTY]]> %vec) +// Arrays are returned in the params +// CHECK: ret void +export vector lv_param_arr_passthru(vector vec)[10] { + vector ret[10]; + for (int i = 0; i < DIMS; i++) + ret[i] = vec; + return ret; +} + +// CHECK-LABEL: define void @"\01?lv_global_arr_assign +// CHECK-SAME: @@YAXY09V?$vector@[[MNG]]$06@@@Z"([10 x <7 x [[STY]]>]* %vec) +// CHECK: ret void +export void lv_global_arr_assign(vector vec[10]) { + for (int i = 0; i < DIMS; i++) + static_vec_arr[i] = vec[i]; +} + +// CHECK-LABEL: define void @"\01?lv_global_arr_ret +// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$06@@XZ"([10 x <7 x [[STY]]>]* noalias sret %agg.result) +// Arrays are returned in the params +// CHECK: ret void +export vector lv_global_arr_ret()[10] { + vector ret[10]; + for (int i = 0; i < DIMS; i++) + ret[i] = static_vec_arr[i]; + return ret; +} + +// CHECK-LABEL: define void @"\01?lv_gs_arr_assign +// CHECK-SAME: @@YAXY09V?$vector@[[MNG]]$06@@@Z"([10 x <7 x [[STY]]>]* %vec) +// ret void +export void lv_gs_arr_assign(vector vec[10]) { + for (int i = 0; i < DIMS; i++) + gs_vec_arr[i] = vec[i]; +} + +// CHECK-LABEL: define void @"\01?lv_gs_arr_ret +// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$06@@XZ"([10 x <7 x [[STY]]>]* noalias sret %agg.result) +export vector lv_gs_arr_ret()[10] { + vector ret[10]; + for (int i = 0; i < DIMS; i++) + ret[i] = gs_vec_arr[i]; + return ret; +} + +// CHECK-LABEL: define void @"\01?lv_param_rec_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec) +// CHECK: memcpy +// Aggregates are returned in the params +// CHECK: ret void +export LongVec lv_param_rec_passthru(LongVec vec) { + LongVec ret = vec; + return ret; +} + +// CHECK-LABEL: define void @"\01?lv_global_rec_assign@@YAXULongVec@@@Z"(%struct.LongVec* %vec) +// CHECK: memcpy +// CHECK: ret void +export void lv_global_rec_assign(LongVec vec) { + static_vec_rec = vec; +} + +// CHECK-LABEL: define void @"\01?lv_global_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result) +// CHECK: memcpy +// Aggregates are returned in the params +// CHECK: ret void +export LongVec lv_global_rec_ret() { + LongVec ret = static_vec_rec; + return ret; +} + +// CHECK-LABEL: define void @"\01?lv_gs_rec_assign@@YAXULongVec@@@Z"(%struct.LongVec* %vec) +// CHECK: memcpy +// CHECK: ret void +export void lv_gs_rec_assign(LongVec vec) { + gs_vec_rec = vec; +} + +// CHECK-LABEL: define void @"\01?lv_gs_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result) +// CHECK: memcpy +// Aggregates are returned in the params +// CHECK: ret void +export LongVec lv_gs_rec_ret() { + LongVec ret = gs_vec_rec; + return ret; +} + +// CHECK: define <7 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_splat +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@[[MNG]]@Z"([[RTY]] {{.*}}%scalar) +// CHECK: ret <7 x [[RTY]]> +export vector lv_splat(TYPE scalar) { + vector ret = scalar; + return ret; +} + +// CHECK: define <6 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_initlist +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@XZ"() +// CHECK: ret <6 x [[RTY]]> +export vector lv_initlist() { + vector ret = {1, 2, 3, 4, 5, 6}; + return ret; +} + +// CHECK: define <6 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_initlist_vec +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@V?$vector@[[MNG]]$02@@@Z"(<3 x [[RTY]]> %vec) +// CHECK: ret <6 x [[RTY]]> +export vector lv_initlist_vec(vector vec) { + vector ret = {vec, 4.0, 5.0, 6.0}; + return ret; +} + +// CHECK: define <6 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_vec_vec +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@V?$vector@[[MNG]]$02@@0@Z"(<3 x [[RTY]]> %vec1, <3 x [[RTY]]> %vec2) +// CHECK: ret <6 x [[RTY]]> +export vector lv_vec_vec(vector vec1, vector vec2) { + vector ret = {vec1, vec2}; + return ret; +} + +// CHECK: define <7 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_array_cast +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@Y06[[MNG]]@Z"([7 x [[STY]]]* %arr) +// CHECK: ret <7 x [[RTY]]> +export vector lv_array_cast(TYPE arr[NUM]) { + vector ret = (vector)arr; + return ret; +} + +// CHECK: define <6 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_ctor +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@[[MNG]]@Z"([[RTY]] {{.*}}%s) +// CHECK: ret <6 x [[RTY]]> +export vector lv_ctor(TYPE s) { + vector ret = vector(1.0, 2.0, 3.0, 4.0, 5.0, s); + return ret; +} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvecs_sm68.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvecs_sm68.hlsl new file mode 100644 index 0000000000..42eb6b077c --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvecs_sm68.hlsl @@ -0,0 +1,34 @@ +// RUN: %dxc -T ps_6_8 -verify %s + +#define TYPE float +#define NUM 5 + +struct LongVec { + float4 f; + vector vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} +}; +groupshared vector gs_vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} +groupshared vector gs_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + +static vector static_vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} +static vector static_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + +export vector lv_param_passthru( // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vector vec1) { // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vector ret = vec1; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vector arr[10]; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + arr[1]= vec1; + return ret; +} + +export void lv_param_in_out(in vector vec1, // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + out vector vec2) { // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vec2 = vec1; +} + +export void lv_param_inout(inout vector vec1, // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + inout vector vec2) { // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vector tmp = vec1; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vec1 = vec2; + vec2 = tmp; +} From e010223f74f9c2d96d51349849cd5f5e99542e99 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Thu, 5 Dec 2024 10:55:40 -1000 Subject: [PATCH 02/88] Produce errors for long vectors in invalid contexts Disallow long vectors, and arrays or structs containing long vectors in cbuffers, entry functions, node records, tessellation patchs, or special intrinsic parameters with user-defined struct parameters. --- tools/clang/include/clang/AST/HlslTypes.h | 6 +- tools/clang/include/clang/Basic/Attr.td | 12 ++ .../clang/Basic/DiagnosticSemaKinds.td | 4 +- tools/clang/include/clang/Sema/SemaHLSL.h | 2 + tools/clang/lib/AST/ASTContextHLSL.cpp | 19 ++- tools/clang/lib/Sema/SemaDXR.cpp | 6 + tools/clang/lib/Sema/SemaHLSL.cpp | 127 +++++++++++++++-- tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp | 10 ++ .../hlsl/types/invalid_longvec_decls.hlsl | 132 ++++++++++++++++++ .../hlsl/types/invalid_longvec_decls_68.hlsl | 108 ++++++++++++++ .../hlsl/types/invalid_longvec_decls_hs.hlsl | 24 ++++ 11 files changed, 426 insertions(+), 24 deletions(-) create mode 100644 tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_hs.hlsl diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index d11fd598e6..2aa9afa5f9 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -370,12 +370,14 @@ void AddStdIsEqualImplementation(clang::ASTContext &context, clang::Sema &sema); clang::CXXRecordDecl *DeclareTemplateTypeWithHandle( clang::ASTContext &context, llvm::StringRef name, uint8_t templateArgCount = 1, - clang::TypeSourceInfo *defaultTypeArgValue = nullptr); + clang::TypeSourceInfo *defaultTypeArgValue = nullptr, + clang::InheritableAttr *Attr = nullptr); clang::CXXRecordDecl *DeclareTemplateTypeWithHandleInDeclContext( clang::ASTContext &context, clang::DeclContext *declContext, llvm::StringRef name, uint8_t templateArgCount, - clang::TypeSourceInfo *defaultTypeArgValue); + clang::TypeSourceInfo *defaultTypeArgValue, + clang::InheritableAttr *Attr = nullptr); clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandle( clang::ASTContext &context, llvm::StringRef typeName, diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index 6d2295dc4a..5ca9d4b333 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -992,6 +992,18 @@ def HLSLNodeTrackRWInputSharing : InheritableAttr { let Documentation = [Undocumented]; } +def HLSLCBuffer : InheritableAttr { + let Spellings = []; // No spellings! + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + +def HLSLTessPatch : InheritableAttr { + let Spellings = []; // No spellings! + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + def HLSLNodeObject : InheritableAttr { let Spellings = []; // No spellings! let Subjects = SubjectList<[CXXRecord]>; diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index f79b8f6045..c85f6a6863 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7691,8 +7691,6 @@ def err_hlsl_control_flow_cond_not_scalar : Error< "%0 statement conditional expressions must evaluate to a scalar">; def err_hlsl_unsupportedvectortype : Error< "%0 is declared with type %1, but only primitive scalar values are supported">; -def err_hlsl_unsupportedvectorsize : Error< - "%0 is declared with size %1, but only values 1 through 4 are supported">; def err_hlsl_unsupportedmatrixsize : Error< "%0 is declared with size %1x%2, but only values 1 through 4 are supported">; def err_hlsl_norm_float_only : Error< @@ -7843,6 +7841,8 @@ def err_hlsl_load_from_mesh_out_arrays: Error< "output arrays of a mesh shader can not be read from">; def err_hlsl_out_indices_array_incorrect_access: Error< "a vector in out indices array must be accessed as a whole">; +def err_hlsl_unsupported_long_vector: Error< + "Vectors of over 4 elements in %0 are not supported">; def err_hlsl_logical_binop_scalar : Error< "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">; def err_hlsl_ternary_scalar : Error< diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h index 40b030b430..c52131b8a5 100644 --- a/tools/clang/include/clang/Sema/SemaHLSL.h +++ b/tools/clang/include/clang/Sema/SemaHLSL.h @@ -128,6 +128,8 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema, const clang::InitListExpr *InitList, const clang::QualType EltTy); +bool HasLongVecs(const clang::QualType &qt); + bool IsConversionToLessOrEqualElements(clang::Sema *self, const clang::ExprResult &sourceExpr, const clang::QualType &targetType, diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index 3c058950e0..978c97aeb5 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -903,18 +903,19 @@ void hlsl::AddStdIsEqualImplementation(clang::ASTContext &context, /// Number of template arguments (one or /// two). If assigned, the default /// argument for the element template. -CXXRecordDecl * -hlsl::DeclareTemplateTypeWithHandle(ASTContext &context, StringRef name, - uint8_t templateArgCount, - TypeSourceInfo *defaultTypeArgValue) { +CXXRecordDecl *hlsl::DeclareTemplateTypeWithHandle( + ASTContext &context, StringRef name, uint8_t templateArgCount, + TypeSourceInfo *defaultTypeArgValue, InheritableAttr *Attr) { return DeclareTemplateTypeWithHandleInDeclContext( context, context.getTranslationUnitDecl(), name, templateArgCount, - defaultTypeArgValue); + defaultTypeArgValue, Attr); } CXXRecordDecl *hlsl::DeclareTemplateTypeWithHandleInDeclContext( ASTContext &context, DeclContext *declContext, StringRef name, - uint8_t templateArgCount, TypeSourceInfo *defaultTypeArgValue) { + uint8_t templateArgCount, TypeSourceInfo *defaultTypeArgValue, + InheritableAttr *Attr) { + DXASSERT(templateArgCount != 0, "otherwise caller should be creating a class or struct"); DXASSERT(templateArgCount <= 2, "otherwise the function needs to be updated " @@ -968,6 +969,9 @@ CXXRecordDecl *hlsl::DeclareTemplateTypeWithHandleInDeclContext( typeDeclBuilder.addField("h", elementType); + if (Attr) + typeDeclBuilder.getRecordDecl()->addAttr(Attr); + return typeDeclBuilder.getRecordDecl(); } @@ -1131,6 +1135,9 @@ hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, bool bTBuf) { typeDeclBuilder.addField( "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle. + typeDeclBuilder.getRecordDecl()->addAttr( + HLSLCBufferAttr::CreateImplicit(context)); + typeDeclBuilder.getRecordDecl(); return templateRecordDecl; diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index 6d838fb203..cb16ced5df 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -810,6 +810,12 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, return; } + if (hlsl::HasLongVecs(Payload->getType())) { + S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector) + << "payload parameters"; + return; + } + CollectNonAccessableFields(PayloadType, CallerStage, {}, {}, NonWriteableFields, NonReadableFields); diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 69cd2a88e3..c5a30e00fa 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -3733,10 +3733,14 @@ class HLSLExternalSource : public ExternalSemaSource { DXASSERT(templateArgCount == 1 || templateArgCount == 2, "otherwise a new case has been added"); + InheritableAttr *Attr = nullptr; + if (kind == AR_OBJECT_INPUTPATCH || kind == AR_OBJECT_OUTPUTPATCH) + Attr = HLSLTessPatchAttr::CreateImplicit(*m_context); + TypeSourceInfo *typeDefault = TemplateHasDefaultType(kind) ? float4TypeSourceInfo : nullptr; recordDecl = DeclareTemplateTypeWithHandle( - *m_context, typeName, templateArgCount, typeDefault); + *m_context, typeName, templateArgCount, typeDefault, Attr); } m_objectTypeDecls[i] = recordDecl; m_objectTypeDeclsMap[i] = std::make_pair(recordDecl, i); @@ -4896,10 +4900,6 @@ class HLSLExternalSource : public ExternalSemaSource { AR_BASIC_UNKNOWN; } - /// Checks whether the specified value is a valid vector - /// size. - bool IsValidVectorSize(size_t length) { return 1 <= length && length <= 4; } - /// Checks whether the specified value is a valid matrix row or /// column size. bool IsValidMatrixColOrRowSize(size_t length) { @@ -4935,11 +4935,6 @@ class HLSLExternalSource : public ExternalSemaSource { false); } else if (objectKind == AR_TOBJ_VECTOR) { bool valid = true; - if (!IsValidVectorSize(GetHLSLVecSize(type))) { - valid = false; - m_sema->Diag(argLoc, diag::err_hlsl_unsupportedvectorsize) - << type << GetHLSLVecSize(type); - } if (!IsScalarType(GetMatrixOrVectorElementType(type))) { valid = false; m_sema->Diag(argLoc, diag::err_hlsl_unsupportedvectortype) @@ -5085,11 +5080,12 @@ class HLSLExternalSource : public ExternalSemaSource { return false; } // Allow object type for Constant/TextureBuffer. - if (templateName == "ConstantBuffer" || templateName == "TextureBuffer") { + if (Template->getTemplatedDecl()->hasAttr()) { if (TemplateArgList.size() == 1) { const TemplateArgumentLoc &argLoc = TemplateArgList[0]; const TemplateArgument &arg = argLoc.getArgument(); - DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, ""); + DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, + "cbuffer with non-type template arg"); QualType argType = arg.getAsType(); SourceLocation argSrcLoc = argLoc.getLocation(); if (IsScalarType(argType) || IsVectorType(m_sema, argType) || @@ -5099,6 +5095,12 @@ class HLSLExternalSource : public ExternalSemaSource { << argType; return true; } + if (HasLongVecs(argType)) { + m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector) + << "cbuffers"; + return true; + } + if (auto *TST = dyn_cast(argType)) { // This is a bit of a special case we need to handle. Because the // buffer types don't use their template parameter in a way that would @@ -5182,8 +5184,20 @@ class HLSLExternalSource : public ExternalSemaSource { return true; } return false; + } else if (Template->getTemplatedDecl()->hasAttr()) { + DXASSERT(TemplateArgList.size() == 1, + "Tessellation patch has more than one template arg"); + const TemplateArgumentLoc &argLoc = TemplateArgList[0]; + const TemplateArgument &arg = argLoc.getArgument(); + DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, ""); + QualType argType = arg.getAsType(); + if (HasLongVecs(argType)) { + m_sema->Diag(argLoc.getLocation(), + diag::err_hlsl_unsupported_long_vector) + << "tessellation patches"; + return true; + } } - bool isMatrix = Template->getCanonicalDecl() == m_matrixTemplateDecl->getCanonicalDecl(); bool isVector = Template->getCanonicalDecl() == @@ -11423,10 +11437,17 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc, HLSLExternalSource *source = HLSLExternalSource::FromSema(self); ArTypeObjectKind shapeKind = source->GetTypeObjectKind(ArgTy); switch (shapeKind) { + case AR_TOBJ_VECTOR: + if (GetHLSLVecSize(ArgTy) > 4) { + self->Diag(ArgLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) + << "node records"; + Empty = false; + return false; + } + LLVM_FALLTHROUGH; case AR_TOBJ_ARRAY: case AR_TOBJ_BASIC: case AR_TOBJ_MATRIX: - case AR_TOBJ_VECTOR: Empty = false; return false; case AR_TOBJ_OBJECT: @@ -11888,6 +11909,33 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType, return false; } +bool hlsl::HasLongVecs(const QualType &qt) { + if (qt.isNull()) { + return false; + } + + if (IsHLSLVecType(qt)) { + if (GetHLSLVecSize(qt) > 4) + return true; + } else if (qt->isArrayType()) { + const ArrayType *arrayType = qt->getAsArrayTypeUnsafe(); + return HasLongVecs(arrayType->getElementType()); + } else if (qt->isStructureOrClassType()) { + const RecordType *recordType = qt->getAs(); + const RecordDecl *recordDecl = recordType->getDecl(); + if (recordDecl->isInvalidDecl()) + return false; + RecordDecl::field_iterator begin = recordDecl->field_begin(); + RecordDecl::field_iterator end = recordDecl->field_end(); + for (; begin != end; begin++) { + const FieldDecl *fieldDecl = *begin; + if (HasLongVecs(fieldDecl->getType())) + return true; + } + } + return false; +} + bool hlsl::IsConversionToLessOrEqualElements( clang::Sema *self, const clang::ExprResult &sourceExpr, const clang::QualType &targetType, bool explicitConversion) { @@ -14211,6 +14259,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, *pDispatchGrid = nullptr, *pMaxDispatchGrid = nullptr; bool usageIn = false; bool usageOut = false; + bool isGroupShared = false; for (clang::AttributeList *pAttr = D.getDeclSpec().getAttributes().getList(); pAttr != NULL; pAttr = pAttr->getNext()) { @@ -14234,6 +14283,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, } break; case AttributeList::AT_HLSLGroupShared: + isGroupShared = true; if (!isGlobal) { Diag(pAttr->getLoc(), diag::err_hlsl_varmodifierna) << pAttr->getName() << declarationType << pAttr->getRange(); @@ -14514,6 +14564,12 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, result = false; } + // Disallow long vecs from cbuffers. + if (isGlobal && !isStatic && !isGroupShared && HasLongVecs(qt)) { + Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) << "cbuffers"; + result = false; + } + // SPIRV change starts #ifdef ENABLE_SPIRV_CODEGEN // Validate that Vulkan specific feature is only used when targeting SPIR-V @@ -15402,6 +15458,16 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) { return false; } +// Verify that user-defined intrinsic struct args contain no long vectors +static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) { + if (HasLongVecs(Arg->getType())) { + S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector) + << "user-defined struct parameter"; + return true; + } + return false; +} + static bool CheckIntrinsicGetAttributeAtVertex(Sema *S, FunctionDecl *FDecl, CallExpr *TheCall) { assert(TheCall->getNumArgs() > 0); @@ -15419,6 +15485,12 @@ static bool CheckIntrinsicGetAttributeAtVertex(Sema *S, FunctionDecl *FDecl, bool Sema::CheckHLSLIntrinsicCall(FunctionDecl *FDecl, CallExpr *TheCall) { auto attr = FDecl->getAttr(); + if (!attr) + return false; + + if (!IsBuiltinTable(attr->getGroup())) + return false; + switch (hlsl::IntrinsicOp(attr->getOpcode())) { case hlsl::IntrinsicOp::IOP_GetAttributeAtVertex: // See #hlsl-specs/issues/181. Feature is broken. For SPIR-V we want @@ -15430,6 +15502,22 @@ bool Sema::CheckHLSLIntrinsicCall(FunctionDecl *FDecl, CallExpr *TheCall) { // existing ones. See the ExtensionTest.EvalAttributeCollision test. assert(FDecl->getName() == "GetAttributeAtVertex"); return CheckIntrinsicGetAttributeAtVertex(this, FDecl, TheCall); + case hlsl::IntrinsicOp::IOP_DispatchMesh: + assert(TheCall->getNumArgs() > 3); + assert(FDecl->getName() == "DispatchMesh"); + return CheckUDTIntrinsicArg(this, TheCall->getArg(3)->IgnoreCasts()); + case hlsl::IntrinsicOp::IOP_CallShader: + assert(TheCall->getNumArgs() > 1); + assert(FDecl->getName() == "CallShader"); + return CheckUDTIntrinsicArg(this, TheCall->getArg(1)->IgnoreCasts()); + case hlsl::IntrinsicOp::IOP_TraceRay: + assert(TheCall->getNumArgs() > 7); + assert(FDecl->getName() == "TraceRay"); + return CheckUDTIntrinsicArg(this, TheCall->getArg(7)->IgnoreCasts()); + case hlsl::IntrinsicOp::IOP_ReportHit: + assert(TheCall->getNumArgs() > 2); + assert(FDecl->getName() == "ReportHit"); + return CheckUDTIntrinsicArg(this, TheCall->getArg(2)->IgnoreCasts()); default: break; } @@ -16110,6 +16198,17 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) { return; } + // Check general parameter characteristics + // Would be nice to check for resources here as they crash the compiler now. + for (const auto *param : FD->params()) + if (HasLongVecs(param->getType())) + S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) + << "entry function parameters"; + + if (HasLongVecs(FD->getReturnType())) + S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector) + << "entry function return type"; + DXIL::ShaderKind Stage = ShaderModel::KindFromFullName(shaderAttr->getStage()); llvm::StringRef StageName = shaderAttr->getStage(); diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index cf5d741541..ee5ea567ce 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -520,6 +520,16 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { << hullPatchCount.value(); } } + for (const auto *param : pPatchFnDecl->params()) + if (HasLongVecs(param->getType())) + self->Diag(param->getLocation(), + diag::err_hlsl_unsupported_long_vector) + << "patch constant function parameters"; + + if (HasLongVecs(pPatchFnDecl->getReturnType())) + self->Diag(pPatchFnDecl->getLocation(), + diag::err_hlsl_unsupported_long_vector) + << "patch constant function return type"; } DXIL::ShaderKind EntrySK = shaderModel->GetKind(); diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls.hlsl new file mode 100644 index 0000000000..ae52983772 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls.hlsl @@ -0,0 +1,132 @@ +// RUN: %dxc -DTYPE=float -DNUM=7 -T ps_6_9 -verify %s + +struct [raypayload] LongVec { + float4 f : write(closesthit) : read(caller); + vector vec : write(closesthit) : read(caller); +}; + +struct LongVecParm { + float f; + float4 tar2 : SV_Target2; + vector vec; +}; + +vector global_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + +vector global_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + +LongVec global_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + +cbuffer BadBuffy { + vector cb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + vector cb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + LongVec cb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} +}; + +tbuffer BadTuffy { + vector tb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + vector tb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + LongVec tb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} +}; + +ConstantBuffer< LongVec > const_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} +TextureBuffer< LongVec > tex_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + +vector main( // expected-error{{Vectors of over 4 elements in entry function return type are not supported}} + vector vec : V, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} + LongVecParm parm : P) : SV_Target { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} + parm.f = vec; // expected-warning {{implicit truncation of vector type}} + parm.tar2 = vec; // expected-warning {{implicit truncation of vector type}} + return vec; // expected-warning {{implicit truncation of vector type}} +} + +[shader("domain")] +[domain("tri")] +void ds_main(OutputPatch TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} + +void PatchConstantFunction(InputPatch inpatch, // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} + OutputPatch outpatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} + + +[shader("hull")] +[domain("tri")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(32)] +[patchconstantfunc("PatchConstantFunction")] +void hs_main(InputPatch TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} + +RaytracingAccelerationStructure RTAS; + +[shader("raygeneration")] +void raygen() { + LongVec p = (LongVec)0; + RayDesc ray = (RayDesc)0; + TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} + CallShader(0, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +[shader("closesthit")] +void closesthit(inout LongVec payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} + in LongVec attribs ) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} + RayDesc ray; + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} + CallShader(0, payload); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +[shader("anyhit")] +void AnyHit( inout LongVec payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} + in LongVec attribs ) // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} +{ +} + +[shader("miss")] +void Miss(inout LongVec payload){ // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} + RayDesc ray; + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} + CallShader(0, payload); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +[shader("intersection")] +void Intersection() { + float hitT = RayTCurrent(); + LongVec attr = (LongVec)0; + bool bReported = ReportHit(hitT, 0, attr); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +[shader("callable")] +void callable1(inout LongVec p) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} + CallShader(0, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +groupshared LongVec as_pld; + +[shader("amplification")] +[numthreads(1,1,1)] +void Amp() { + DispatchMesh(1,1,1,as_pld); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +struct LongVecRec { + uint3 grid : SV_DispatchGrid; + vector vec; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(8,1,1)] +[NodeMaxDispatchGrid(8,1,1)] +void broadcast(DispatchNodeInputRecord input, // expected-error{{Vectors of over 4 elements in node records are not supported}} + NodeOutput output) // expected-error{{Vectors of over 4 elements in node records are not supported}} +{ + ThreadNodeOutputRecords touts; // expected-error{{Vectors of over 4 elements in node records are not supported}} + GroupNodeOutputRecords gouts; // expected-error{{Vectors of over 4 elements in node records are not supported}} +} + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(8,1,1)] +void coalesce(GroupNodeInputRecords input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}} + +[Shader("node")] +[NodeLaunch("thread")] +void threader(ThreadNodeInputRecord input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl new file mode 100644 index 0000000000..8aac527c1f --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl @@ -0,0 +1,108 @@ +// RUN: %dxc -DTYPE=float -DNUM=7 -T ps_6_8 -verify %s + +// CHECK: %struct.LongVec = type { <4 x float>, <7 x [[STY:[a-z0-9]*]]> } +struct LongVec { + float4 f; + vector vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} +}; + +static vector static_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} +static vector static_vec_arr[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + +groupshared vector gs_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} +groupshared vector gs_vec_arr[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + +export vector lv_param_passthru(vector vec1) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + vector ret = vec1; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + return ret; +} + +export void lv_param_in_out(in vector vec1, out vector vec2) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + vec2 = vec1; +} + +export void lv_param_inout(inout vector vec1, inout vector vec2) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + vector tmp = vec1; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + vec1 = vec2; + vec2 = tmp; +} + +export void lv_global_assign(vector vec) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + static_vec = vec; +} + +export vector lv_global_ret() { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + vector ret = static_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + return ret; +} + +export void lv_gs_assign(vector vec) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + gs_vec = vec; +} + +export vector lv_gs_ret() { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + vector ret = gs_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + return ret; +} + +export vector lv_param_arr_passthru(vector vec)[10] { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + vector ret[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + for (int i = 0; i < 10; i++) + ret[i] = vec; + return ret; +} + +export void lv_global_arr_assign(vector vec[10]) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + for (int i = 0; i < 10; i++) + static_vec_arr[i] = vec[i]; +} + +export vector lv_global_arr_ret()[10] { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + vector ret[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + for (int i = 0; i < 10; i++) + ret[i] = static_vec_arr[i]; + return ret; +} + +export void lv_gs_arr_assign(vector vec[10]) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + for (int i = 0; i < 10; i++) + gs_vec_arr[i] = vec[i]; +} + +export vector lv_gs_arr_ret()[10] { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + vector ret[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + for (int i = 0; i < 10; i++) + ret[i] = gs_vec_arr[i]; + return ret; +} + +export vector lv_splat(TYPE scalar) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + vector ret = scalar; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + return ret; +} + +export vector lv_initlist() { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + vector ret = {1, 2, 3, 4, 5, 6}; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + return ret; +} + +export vector lv_initlist_vec(vector vec) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + vector ret = {vec, 4.0, 5.0, 6.0}; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + return ret; +} + +export vector lv_vec_vec(vector vec1, vector vec2) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + vector ret = {vec1, vec2}; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + return ret; +} + +export vector lv_array_cast(TYPE arr[NUM]) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + vector ret = (vector)arr; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + return ret; +} + +export vector lv_ctor(TYPE s) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + vector ret = vector(1.0, 2.0, 3.0, 4.0, 5.0, s); // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} + return ret; +} + diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_hs.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_hs.hlsl new file mode 100644 index 0000000000..185233ad0f --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_hs.hlsl @@ -0,0 +1,24 @@ +// RUN: %dxc -DTYPE=float -DNUM=7 -T hs_6_9 -verify %s + +struct HsConstantData { + float Edges[3] : SV_TessFactor; + vector vec; +}; + +struct LongVec { + float4 f; + vector vec; +}; + +HsConstantData PatchConstantFunction( // expected-error{{Vectors of over 4 elements in patch constant function return type are not supported}} + vector vec : V, // expected-error{{Vectors of over 4 elements in patch constant function parameters are not supported}} + LongVec lv : L) { // expected-error{{Vectors of over 4 elements in patch constant function parameters are not supported}} + return (HsConstantData)0; +} + +[domain("tri")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(32)] +[patchconstantfunc("PatchConstantFunction")] +void main() { +} From cd72abec4341d7de07fbd7f7807f145b0960134a Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Tue, 18 Feb 2025 11:17:17 -0700 Subject: [PATCH 03/88] fix assert for tesselation patch template args This got lost somewhere --- tools/clang/lib/Sema/SemaHLSL.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index c5a30e00fa..aea960f2e8 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -5185,8 +5185,8 @@ class HLSLExternalSource : public ExternalSemaSource { } return false; } else if (Template->getTemplatedDecl()->hasAttr()) { - DXASSERT(TemplateArgList.size() == 1, - "Tessellation patch has more than one template arg"); + DXASSERT(TemplateArgList.size() > 0, + "Tessellation patch should have at least one template args"); const TemplateArgumentLoc &argLoc = TemplateArgList[0]; const TemplateArgument &arg = argLoc.getArgument(); DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, ""); From 1f12a3f08fd896ee005170c8ff7025f3de204950 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Sun, 2 Mar 2025 22:33:24 -0700 Subject: [PATCH 04/88] Refactor builtin type detection with attributes Expand resource attribute to all resource types by adding reskind and resclass arguments indicating the specific resource type. Change detection in HlslTypes to use these attribute arguments. Similarly add vertex number arguments to output stream attribute and a boolean indicator of input or output for tessellation patches. Add geomstream attr to detect those objects Use attribute to detect tesselation patches Removes template arg counts and startswith stirngs to identify tesslations patches and distinguish them from multisampled textures --- tools/clang/include/clang/AST/HlslTypes.h | 7 +- tools/clang/include/clang/Basic/Attr.td | 6 +- tools/clang/lib/AST/ASTContextHLSL.cpp | 28 ++-- tools/clang/lib/AST/HlslTypes.cpp | 190 +++++----------------- tools/clang/lib/Sema/SemaHLSL.cpp | 68 ++++++-- 5 files changed, 121 insertions(+), 178 deletions(-) diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index 2aa9afa5f9..9aeb97d3ee 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -350,7 +350,8 @@ void AddHLSLNodeOutputRecordTemplate( clang::CXXRecordDecl *DeclareRecordTypeWithHandle(clang::ASTContext &context, llvm::StringRef name, - bool isCompleteType = true); + bool isCompleteType = true, + clang::InheritableAttr *Attr = nullptr); void AddRaytracingConstants(clang::ASTContext &context); void AddSamplerFeedbackConstants(clang::ASTContext &context); @@ -382,11 +383,11 @@ clang::CXXRecordDecl *DeclareTemplateTypeWithHandleInDeclContext( clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandle( clang::ASTContext &context, llvm::StringRef typeName, llvm::StringRef templateParamName, - clang::TagTypeKind tagKind = clang::TagTypeKind::TTK_Class); + clang::InheritableAttr *Attr = nullptr); clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandleInDeclContext( clang::ASTContext &context, clang::DeclContext *declContext, llvm::StringRef typeName, llvm::StringRef templateParamName, - clang::TagTypeKind tagKind = clang::TagTypeKind::TTK_Class); + clang::InheritableAttr *Attr = nullptr); clang::CXXRecordDecl *DeclareConstantBufferViewType(clang::ASTContext &context, bool bTBuf); clang::CXXRecordDecl *DeclareRayQueryType(clang::ASTContext &context); diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index 7304bba06e..e344e7b851 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -993,14 +993,16 @@ def HLSLNodeTrackRWInputSharing : InheritableAttr { } -def HLSLCBuffer : InheritableAttr { +def HLSLTessPatch : InheritableAttr { let Spellings = []; // No spellings! + let Args = [BoolArgument<"IsInput">]; let Subjects = SubjectList<[CXXRecord]>; let Documentation = [Undocumented]; } -def HLSLTessPatch : InheritableAttr { +def HLSLStreamOutput : InheritableAttr { let Spellings = []; // No spellings! + let Args = [UnsignedArgument<"Vertices">]; let Subjects = SubjectList<[CXXRecord]>; let Documentation = [Undocumented]; } diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index 978c97aeb5..e71f37b663 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -525,11 +525,15 @@ hlsl::DeclareRecordTypeWithHandleAndNoMemberFunctions(ASTContext &context, /// CXXRecordDecl * hlsl::DeclareRecordTypeWithHandle(ASTContext &context, StringRef name, - bool isCompleteType /*= true */) { + bool isCompleteType /*= true */, + InheritableAttr *Attr) { BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(), name, TagDecl::TagKind::TTK_Struct); typeDeclBuilder.startDefinition(); typeDeclBuilder.addField("h", GetHLSLObjectHandleType(context)); + if (Attr) + typeDeclBuilder.getRecordDecl()->addAttr(Attr); + if (isCompleteType) return typeDeclBuilder.completeDefinition(); return typeDeclBuilder.getRecordDecl(); @@ -939,11 +943,9 @@ CXXRecordDecl *hlsl::DeclareTemplateTypeWithHandleInDeclContext( QualType elementType = context.getTemplateTypeParmType( /*templateDepth*/ 0, 0, ParameterPackFalse, elementTemplateParamDecl); - if (templateArgCount > 1 && - // Only need array type for inputpatch and outputpatch. - // Avoid Texture2DMS which may use 0 count. - // TODO: use hlsl types to do the check. - !name.startswith("Texture") && !name.startswith("RWTexture")) { + // Only need array type for inputpatch and outputpatch. + if (Attr && isa(Attr)) { + DXASSERT(templateArgCount == 2, "Tess patches need 2 template params"); Expr *countExpr = DeclRefExpr::Create( context, NestedNameSpecifierLoc(), NoLoc, countTemplateParamDecl, false, DeclarationNameInfo(countTemplateParamDecl->getDeclName(), NoLoc), @@ -1099,22 +1101,25 @@ CXXMethodDecl *hlsl::CreateObjectFunctionDeclarationWithParams( CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandle( ASTContext &context, StringRef typeName, StringRef templateParamName, - TagTypeKind tagKind) { + InheritableAttr *Attr) { return DeclareUIntTemplatedTypeWithHandleInDeclContext( context, context.getTranslationUnitDecl(), typeName, templateParamName, - tagKind); + Attr); } CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandleInDeclContext( ASTContext &context, DeclContext *declContext, StringRef typeName, - StringRef templateParamName, TagTypeKind tagKind) { + StringRef templateParamName, InheritableAttr *Attr) { // template FeedbackTexture2D[Array] { ... } - BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName, tagKind); + BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName, TagTypeKind::TTK_Class); typeDeclBuilder.addIntegerTemplateParam(templateParamName, context.UnsignedIntTy); typeDeclBuilder.startDefinition(); typeDeclBuilder.addField( "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle. + if (Attr) + typeDeclBuilder.getRecordDecl()->addAttr(Attr); + return typeDeclBuilder.getRecordDecl(); } @@ -1136,7 +1141,8 @@ hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, bool bTBuf) { "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle. typeDeclBuilder.getRecordDecl()->addAttr( - HLSLCBufferAttr::CreateImplicit(context)); + HLSLResourceAttr::CreateImplicit(context, (unsigned)DXIL::ResourceKind::CBuffer, + (unsigned)DXIL::ResourceClass::CBuffer)); typeDeclBuilder.getRecordDecl(); diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index d83b307463..5f7e93fbee 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -474,160 +474,73 @@ clang::QualType GetHLSLMatElementType(clang::QualType type) { QualType elemTy = arg0.getAsType(); return elemTy; } + + +template +static AttrType *getAttr(clang::QualType type) { + type = type.getCanonicalType(); + if (const RecordType *RT = type->getAs()) { + if (const auto *Spec = + dyn_cast(RT->getDecl())) + if (const auto *Template = + dyn_cast(Spec->getSpecializedTemplate())) + return Template->getTemplatedDecl()->getAttr(); + if (const auto *Decl = dyn_cast(RT->getDecl())) + return Decl->getAttr(); + } + return nullptr; +} + // TODO: Add type cache to ASTContext. bool IsHLSLInputPatchType(QualType type) { type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "InputPatch") { - return true; - } - } - } + if (const HLSLTessPatchAttr *Attr = getAttr(type)) + return Attr->getIsInput(); return false; } + bool IsHLSLOutputPatchType(QualType type) { type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "OutputPatch") { - return true; - } - } - } + if (const HLSLTessPatchAttr *Attr = getAttr(type)) + return !Attr->getIsInput(); return false; } + bool IsHLSLPointStreamType(QualType type) { type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "PointStream") - return true; - } - } + if (const HLSLStreamOutputAttr *Attr = getAttr(type)) + return Attr->getVertices() == 1; return false; } + bool IsHLSLLineStreamType(QualType type) { type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "LineStream") - return true; - } - } + if (const HLSLStreamOutputAttr *Attr = getAttr(type)) + return Attr->getVertices() == 2; return false; } + bool IsHLSLTriangleStreamType(QualType type) { type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "TriangleStream") - return true; - } - } + if (const HLSLStreamOutputAttr *Attr = getAttr(type)) + return Attr->getVertices() == 3; return false; } + bool IsHLSLStreamOutputType(QualType type) { - type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "PointStream") - return true; - if (templateDecl->getName() == "LineStream") - return true; - if (templateDecl->getName() == "TriangleStream") - return true; - } - } + if (getAttr(type)) + return true; return false; } -bool IsHLSLResourceType(clang::QualType type) { - if (const RecordType *RT = type->getAs()) { - StringRef name = RT->getDecl()->getName(); - if (name == "Texture1D" || name == "RWTexture1D") - return true; - if (name == "Texture2D" || name == "RWTexture2D") - return true; - if (name == "Texture2DMS" || name == "RWTexture2DMS") - return true; - if (name == "Texture3D" || name == "RWTexture3D") - return true; - if (name == "TextureCube" || name == "RWTextureCube") - return true; - - if (name == "Texture1DArray" || name == "RWTexture1DArray") - return true; - if (name == "Texture2DArray" || name == "RWTexture2DArray") - return true; - if (name == "Texture2DMSArray" || name == "RWTexture2DMSArray") - return true; - if (name == "TextureCubeArray" || name == "RWTextureCubeArray") - return true; - - if (name == "FeedbackTexture2D" || name == "FeedbackTexture2DArray") - return true; - - if (name == "RasterizerOrderedTexture1D" || - name == "RasterizerOrderedTexture2D" || - name == "RasterizerOrderedTexture3D" || - name == "RasterizerOrderedTexture1DArray" || - name == "RasterizerOrderedTexture2DArray" || - name == "RasterizerOrderedBuffer" || - name == "RasterizerOrderedByteAddressBuffer" || - name == "RasterizerOrderedStructuredBuffer") - return true; - - if (name == "ByteAddressBuffer" || name == "RWByteAddressBuffer") - return true; - - if (name == "StructuredBuffer" || name == "RWStructuredBuffer") - return true; - - if (name == "AppendStructuredBuffer" || name == "ConsumeStructuredBuffer") - return true; - - if (name == "Buffer" || name == "RWBuffer") - return true; - - if (name == "SamplerState" || name == "SamplerComparisonState") - return true; - if (name == "ConstantBuffer" || name == "TextureBuffer") - return true; - - if (name == "RaytracingAccelerationStructure") - return true; - } +bool IsHLSLResourceType(clang::QualType type) { + if (getAttr(type)) + return true; return false; } -static HLSLNodeObjectAttr *getNodeAttr(clang::QualType type) { - if (const RecordType *RT = type->getAs()) { - if (const auto *Spec = - dyn_cast(RT->getDecl())) - if (const auto *Template = - dyn_cast(Spec->getSpecializedTemplate())) - return Template->getTemplatedDecl()->getAttr(); - if (const auto *Decl = dyn_cast(RT->getDecl())) - return Decl->getAttr(); - } - return nullptr; -} - DXIL::NodeIOKind GetNodeIOType(clang::QualType type) { - if (const HLSLNodeObjectAttr *Attr = getNodeAttr(type)) + if (const HLSLNodeObjectAttr *Attr = getAttr(type)) return Attr->getNodeIOType(); return DXIL::NodeIOKind::Invalid; } @@ -654,27 +567,20 @@ bool IsHLSLDynamicSamplerType(clang::QualType type) { } bool IsHLSLNodeType(clang::QualType type) { - if (const HLSLNodeObjectAttr *Attr = getNodeAttr(type)) + if (const HLSLNodeObjectAttr *Attr = getAttr(type)) return true; return false; } bool IsHLSLObjectWithImplicitMemberAccess(clang::QualType type) { - if (const RecordType *RT = type->getAs()) { - StringRef name = RT->getDecl()->getName(); - if (name == "ConstantBuffer" || name == "TextureBuffer") - return true; - } + if (const HLSLResourceAttr *Attr = getAttr(type)) + return Attr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer; return false; } bool IsHLSLObjectWithImplicitROMemberAccess(clang::QualType type) { - if (const RecordType *RT = type->getAs()) { - StringRef name = RT->getDecl()->getName(); - // Read-only records - if (name == "ConstantBuffer" || name == "TextureBuffer") - return true; - } + if (const HLSLResourceAttr *Attr = getAttr(type)) + return Attr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer; return false; } @@ -701,14 +607,8 @@ bool IsHLSLNodeOutputType(clang::QualType type) { } bool IsHLSLStructuredBufferType(clang::QualType type) { - if (const RecordType *RT = type->getAs()) { - StringRef name = RT->getDecl()->getName(); - if (name == "StructuredBuffer" || name == "RWStructuredBuffer") - return true; - - if (name == "AppendStructuredBuffer" || name == "ConsumeStructuredBuffer") - return true; - } + if (const HLSLResourceAttr *Attr = getAttr(type)) + return Attr->getResKind() == (unsigned)DXIL::ResourceKind::StructuredBuffer; return false; } diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 0665d7441e..57eb388893 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -363,6 +363,8 @@ enum ArBasicKind { #define IS_BPROP_STREAM(_Props) (((_Props)&BPROP_STREAM) != 0) +#define IS_BPROP_PATCH(_Props) (((_Props) & BPROP_PATCH) != 0) + #define IS_BPROP_SAMPLER(_Props) (((_Props)&BPROP_SAMPLER) != 0) #define IS_BPROP_TEXTURE(_Props) (((_Props)&BPROP_TEXTURE) != 0) @@ -616,6 +618,8 @@ C_ASSERT(ARRAYSIZE(g_uBasicKindProps) == AR_BASIC_MAXIMUM_COUNT); #define IS_BASIC_STREAM(_Kind) IS_BPROP_STREAM(GetBasicKindProps(_Kind)) +#define IS_BASIC_PATCH(_Kind) IS_BPROP_PATCH(GetBasicKindProps(_Kind)) + #define IS_BASIC_SAMPLER(_Kind) IS_BPROP_SAMPLER(GetBasicKindProps(_Kind)) #define IS_BASIC_TEXTURE(_Kind) IS_BPROP_TEXTURE(GetBasicKindProps(_Kind)) #define IS_BASIC_OBJECT(_Kind) IS_BPROP_OBJECT(GetBasicKindProps(_Kind)) @@ -3540,6 +3544,20 @@ class HLSLExternalSource : public ExternalSemaSource { if (kind == AR_OBJECT_LEGACY_EFFECT) effectKindIndex = i; + InheritableAttr *Attr = nullptr; + if (IS_BASIC_STREAM(kind)) + Attr = + HLSLStreamOutputAttr::CreateImplicit(*m_context, + kind - AR_OBJECT_POINTSTREAM + 1); + else if (IS_BASIC_PATCH(kind)) + Attr = HLSLTessPatchAttr::CreateImplicit(*m_context, kind == AR_OBJECT_INPUTPATCH); + else { + DXIL::ResourceKind ResKind = DXIL::ResourceKind::NumEntries; + DXIL::ResourceClass ResClass = DXIL::ResourceClass::Invalid; + if (GetBasicKindResourceKindAndClass(kind, ResKind, ResClass)) + Attr = HLSLResourceAttr::CreateImplicit(*m_context, (unsigned)ResKind, + (unsigned)ResClass); + } DXASSERT(kind < _countof(g_ArBasicTypeNames), "g_ArBasicTypeNames has the wrong number of entries"); assert(kind < _countof(g_ArBasicTypeNames)); @@ -3609,10 +3627,10 @@ class HLSLExternalSource : public ExternalSemaSource { } } else if (kind == AR_OBJECT_FEEDBACKTEXTURE2D) { recordDecl = DeclareUIntTemplatedTypeWithHandle( - *m_context, "FeedbackTexture2D", "kind"); + *m_context, "FeedbackTexture2D", "kind", Attr); } else if (kind == AR_OBJECT_FEEDBACKTEXTURE2D_ARRAY) { recordDecl = DeclareUIntTemplatedTypeWithHandle( - *m_context, "FeedbackTexture2DArray", "kind"); + *m_context, "FeedbackTexture2DArray", "kind", Attr); } else if (kind == AR_OBJECT_EMPTY_NODE_INPUT) { recordDecl = DeclareNodeOrRecordType( *m_context, DXIL::NodeIOKind::EmptyInput, @@ -3729,20 +3747,11 @@ class HLSLExternalSource : public ExternalSemaSource { #endif else if (templateArgCount == 0) { recordDecl = DeclareRecordTypeWithHandle(*m_context, typeName, - /*isCompleteType*/ false); + /*isCompleteType*/ false, + Attr); } else { DXASSERT(templateArgCount == 1 || templateArgCount == 2, "otherwise a new case has been added"); - - InheritableAttr *Attr = nullptr; - DXIL::ResourceKind ResKind = DXIL::ResourceKind::NumEntries; - DXIL::ResourceClass ResClass = DXIL::ResourceClass::Invalid; - if (GetBasicKindResourceKindAndClass(kind, ResKind, ResClass)) - Attr = HLSLResourceAttr::CreateImplicit(*m_context, (unsigned)ResKind, - (unsigned)ResClass); - else if (kind == AR_OBJECT_INPUTPATCH || kind == AR_OBJECT_OUTPUTPATCH) - Attr = HLSLTessPatchAttr::CreateImplicit(*m_context); - TypeSourceInfo *typeDefault = TemplateHasDefaultType(kind) ? float4TypeSourceInfo : nullptr; recordDecl = DeclareTemplateTypeWithHandle( @@ -4755,6 +4764,15 @@ class HLSLExternalSource : public ExternalSemaSource { ResKind = DXIL::ResourceKind::FeedbackTexture2DArray; ResClass = DXIL::ResourceClass::SRV; return true; + case AR_OBJECT_SAMPLER: + case AR_OBJECT_SAMPLERCOMPARISON: + ResKind = DXIL::ResourceKind::Sampler; + ResClass = DXIL::ResourceClass::Sampler; + return true; + case AR_OBJECT_ACCELERATION_STRUCT: + ResKind = DXIL::ResourceKind::RTAccelerationStructure; + ResClass = DXIL::ResourceClass::SRV; + return true; default: return false; } @@ -5217,7 +5235,9 @@ class HLSLExternalSource : public ExternalSemaSource { return false; } // Allow object type for Constant/TextureBuffer. - if (Template->getTemplatedDecl()->hasAttr()) { + HLSLResourceAttr *ResAttr = + Template->getTemplatedDecl()->getAttr(); + if (ResAttr && ResAttr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer) { if (TemplateArgList.size() == 1) { const TemplateArgumentLoc &argLoc = TemplateArgList[0]; const TemplateArgument &arg = argLoc.getArgument(); @@ -5326,7 +5346,8 @@ class HLSLExternalSource : public ExternalSemaSource { "Tessellation patch should have at least one template args"); const TemplateArgumentLoc &argLoc = TemplateArgList[0]; const TemplateArgument &arg = argLoc.getArgument(); - DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, ""); + DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, + "Tessellation patch requires type template arg 0"); QualType argType = arg.getAsType(); if (HasLongVecs(argType)) { m_sema->Diag(argLoc.getLocation(), @@ -5334,7 +5355,22 @@ class HLSLExternalSource : public ExternalSemaSource { << "tessellation patches"; return true; } + } else if (Template->getTemplatedDecl()->hasAttr()) { + DXASSERT(TemplateArgList.size() > 0, + "Geometry streams should have at least one template args"); + const TemplateArgumentLoc &argLoc = TemplateArgList[0]; + const TemplateArgument &arg = argLoc.getArgument(); + DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, + "Geometry stream requires type template arg 0"); + QualType argType = arg.getAsType(); + if (HasLongVecs(argType)) { + m_sema->Diag(argLoc.getLocation(), + diag::err_hlsl_unsupported_long_vector) + << "geometry streams"; + return true; + } } + bool isMatrix = Template->getCanonicalDecl() == m_matrixTemplateDecl->getCanonicalDecl(); bool isVector = Template->getCanonicalDecl() == @@ -5354,8 +5390,6 @@ class HLSLExternalSource : public ExternalSemaSource { // NOTE: IsValidTemplateArgumentType emits its own diagnostics return true; } - HLSLResourceAttr *ResAttr = - Template->getTemplatedDecl()->getAttr(); if (ResAttr && IsTyped((DXIL::ResourceKind)ResAttr->getResKind())) { // Check vectors for being too large. if (IsVectorType(m_sema, argType)) { From de6ac33353314da64d9f56154d7a428fcd2f320e Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Thu, 20 Feb 2025 19:11:31 -0700 Subject: [PATCH 05/88] Respond to feedback Add setting for max vec size. Determine long vector presence using DefinitionData bit? OR Rename testing for long vecs function? Add attribute for geometry streams, produce and test errors for long vectors there. Add and test errors for > 1024 element vectors. Add vector size to error messages good test changes --- include/dxc/DXIL/DxilConstants.h | 2 + .../clang/Basic/DiagnosticSemaKinds.td | 8 ++- tools/clang/include/clang/Basic/LangOptions.h | 3 +- tools/clang/include/clang/Sema/SemaHLSL.h | 2 +- tools/clang/lib/Sema/SemaDXR.cpp | 5 +- tools/clang/lib/Sema/SemaHLSL.cpp | 65 ++++++++++--------- tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp | 7 +- .../clang/tools/dxcompiler/dxcompilerobj.cpp | 7 ++ 8 files changed, 59 insertions(+), 40 deletions(-) diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index f8d5b740f7..ac894df1d6 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -147,6 +147,8 @@ const unsigned kMaxMSTotalSigRows = 32; const unsigned kMaxMSSMSize = 1024 * 28; const unsigned kMinWaveSize = 4; const unsigned kMaxWaveSize = 128; +const unsigned kDefaultMaxVectorLength = 4; +const unsigned kSM69MaxVectorLength = 1024; const float kMaxMipLodBias = 15.99f; const float kMinMipLodBias = -16.0f; diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 441509d4c5..4d81b25ccc 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7519,8 +7519,8 @@ def err_hlsl_half_load_store: Error< "LoadHalf and StoreHalf are not supported for min precision mode">; def err_hlsl_interfaces_cannot_inherit: Error< "interfaces cannot inherit from other types">; -def err_hlsl_invalid_range_1_4: Error< - "invalid value, valid range is between 1 and 4 inclusive">; +def err_hlsl_invalid_range_1_plus: Error< + "invalid value, valid range is between 1 and %0 inclusive">; def err_hlsl_matrix_member_bad_format: Error< "invalid format for matrix subscript '%0'">; def err_hlsl_matrix_member_empty: Error< @@ -7852,7 +7852,9 @@ def err_hlsl_load_from_mesh_out_arrays: Error< def err_hlsl_out_indices_array_incorrect_access: Error< "a vector in out indices array must be accessed as a whole">; def err_hlsl_unsupported_long_vector: Error< - "Vectors of over 4 elements in %0 are not supported">; + "Vectors of over %0 elements in %1 are not supported">; +def err_hlsl_vector_too_long: Error< + "Vectors of over %0 elements in are not supported">; def err_hlsl_logical_binop_scalar : Error< "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">; def err_hlsl_ternary_scalar : Error< diff --git a/tools/clang/include/clang/Basic/LangOptions.h b/tools/clang/include/clang/Basic/LangOptions.h index 8dc15da5d8..433b767c8d 100644 --- a/tools/clang/include/clang/Basic/LangOptions.h +++ b/tools/clang/include/clang/Basic/LangOptions.h @@ -15,7 +15,7 @@ #ifndef LLVM_CLANG_BASIC_LANGOPTIONS_H #define LLVM_CLANG_BASIC_LANGOPTIONS_H -#include "dxc/DXIL/DxilConstants.h" // For DXIL::DefaultLinkage +#include "dxc/DXIL/DxilConstants.h" // For DXIL:: default values. #include "dxc/Support/HLSLVersion.h" #include "clang/Basic/CommentOptions.h" #include "clang/Basic/LLVM.h" @@ -168,6 +168,7 @@ class LangOptions : public LangOptionsBase { hlsl::DXIL::DefaultLinkage::Default; /// Whether use row major as default matrix major. bool HLSLDefaultRowMajor = false; + unsigned MaxHLSLVectorLength = hlsl::DXIL::kDefaultMaxVectorLength; // HLSL Change Ends bool SPIRV = false; // SPIRV Change diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h index c52131b8a5..786f82933d 100644 --- a/tools/clang/include/clang/Sema/SemaHLSL.h +++ b/tools/clang/include/clang/Sema/SemaHLSL.h @@ -128,7 +128,7 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema, const clang::InitListExpr *InitList, const clang::QualType EltTy); -bool HasLongVecs(const clang::QualType &qt); +bool ContainsVectorLongerThan(const clang::QualType &qt, unsigned length); bool IsConversionToLessOrEqualElements(clang::Sema *self, const clang::ExprResult &sourceExpr, diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index cb16ced5df..07234554e2 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -810,9 +810,10 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, return; } - if (hlsl::HasLongVecs(Payload->getType())) { + if (hlsl::ContainsVectorLongerThan(Payload->getType(), + DXIL::kDefaultMaxVectorLength)) { S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector) - << "payload parameters"; + << DXIL::kDefaultMaxVectorLength << "payload parameters"; return; } diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 57eb388893..fe3390a89e 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -5207,12 +5207,13 @@ class HLSLExternalSource : public ExternalSemaSource { SourceLocation Loc); bool CheckRangedTemplateArgument(SourceLocation diagLoc, - llvm::APSInt &sintValue) { - const auto *SM = - hlsl::ShaderModel::GetByName(m_sema->getLangOpts().HLSLProfile.c_str()); + llvm::APSInt &sintValue, bool IsVector) { + unsigned MaxLength = DXIL::kDefaultMaxVectorLength; + if (IsVector) + MaxLength = m_sema->getLangOpts().MaxHLSLVectorLength; if (!sintValue.isStrictlyPositive() || - (sintValue.getLimitedValue() > 4 && !SM->IsSM69Plus())) { - m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_4); + sintValue.getLimitedValue() > MaxLength) { + m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_plus) << MaxLength; return true; } @@ -5252,9 +5253,9 @@ class HLSLExternalSource : public ExternalSemaSource { << argType; return true; } - if (HasLongVecs(argType)) { + if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) { m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector) - << "cbuffers"; + << DXIL::kDefaultMaxVectorLength << "cbuffers"; return true; } @@ -5349,10 +5350,10 @@ class HLSLExternalSource : public ExternalSemaSource { DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, "Tessellation patch requires type template arg 0"); QualType argType = arg.getAsType(); - if (HasLongVecs(argType)) { + if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) { m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) - << "tessellation patches"; + << DXIL::kDefaultMaxVectorLength << "tessellation patches"; return true; } } else if (Template->getTemplatedDecl()->hasAttr()) { @@ -5363,10 +5364,10 @@ class HLSLExternalSource : public ExternalSemaSource { DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, "Geometry stream requires type template arg 0"); QualType argType = arg.getAsType(); - if (HasLongVecs(argType)) { + if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) { m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) - << "geometry streams"; + << DXIL::kDefaultMaxVectorLength << "geometry streams"; return true; } } @@ -5419,17 +5420,16 @@ class HLSLExternalSource : public ExternalSemaSource { llvm::APSInt constantResult; if (expr != nullptr && expr->isIntegerConstantExpr(constantResult, *m_context)) { - if (CheckRangedTemplateArgument(argSrcLoc, constantResult)) { + if (CheckRangedTemplateArgument(argSrcLoc, constantResult, + isVector)) return true; - } } } } else if (arg.getKind() == TemplateArgument::ArgKind::Integral) { if (isMatrix || isVector) { llvm::APSInt Val = arg.getAsIntegral(); - if (CheckRangedTemplateArgument(argSrcLoc, Val)) { + if (CheckRangedTemplateArgument(argSrcLoc, Val, isVector)) return true; - } } } } @@ -11633,9 +11633,9 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc, ArTypeObjectKind shapeKind = source->GetTypeObjectKind(ArgTy); switch (shapeKind) { case AR_TOBJ_VECTOR: - if (GetHLSLVecSize(ArgTy) > 4) { + if (GetHLSLVecSize(ArgTy) > DXIL::kDefaultMaxVectorLength) { self->Diag(ArgLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) - << "node records"; + << DXIL::kDefaultMaxVectorLength << "node records"; Empty = false; return false; } @@ -12104,17 +12104,16 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType, return false; } -bool hlsl::HasLongVecs(const QualType &qt) { - if (qt.isNull()) { +bool hlsl::ContainsVectorLongerThan(const QualType &qt, unsigned length) { + if (qt.isNull()) return false; - } if (IsHLSLVecType(qt)) { - if (GetHLSLVecSize(qt) > 4) + if (GetHLSLVecSize(qt) > length) return true; } else if (qt->isArrayType()) { const ArrayType *arrayType = qt->getAsArrayTypeUnsafe(); - return HasLongVecs(arrayType->getElementType()); + return ContainsVectorLongerThan(arrayType->getElementType(), length); } else if (qt->isStructureOrClassType()) { const RecordType *recordType = qt->getAs(); const RecordDecl *recordDecl = recordType->getDecl(); @@ -12124,7 +12123,7 @@ bool hlsl::HasLongVecs(const QualType &qt) { RecordDecl::field_iterator end = recordDecl->field_end(); for (; begin != end; begin++) { const FieldDecl *fieldDecl = *begin; - if (HasLongVecs(fieldDecl->getType())) + if (ContainsVectorLongerThan(fieldDecl->getType(), length)) return true; } } @@ -14760,8 +14759,10 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, } // Disallow long vecs from cbuffers. - if (isGlobal && !isStatic && !isGroupShared && HasLongVecs(qt)) { - Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) << "cbuffers"; + if (isGlobal && !isStatic && !isGroupShared && + ContainsVectorLongerThan(qt, DXIL::kDefaultMaxVectorLength)) { + Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) + << DXIL::kDefaultMaxVectorLength << "cbuffers"; result = false; } @@ -15655,9 +15656,9 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) { // Verify that user-defined intrinsic struct args contain no long vectors static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) { - if (HasLongVecs(Arg->getType())) { + if (ContainsVectorLongerThan(Arg->getType(), DXIL::kDefaultMaxVectorLength)) { S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector) - << "user-defined struct parameter"; + << DXIL::kDefaultMaxVectorLength << "user-defined struct parameter"; return true; } return false; @@ -16396,13 +16397,15 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) { // Check general parameter characteristics // Would be nice to check for resources here as they crash the compiler now. for (const auto *param : FD->params()) - if (HasLongVecs(param->getType())) + if (ContainsVectorLongerThan(param->getType(), + DXIL::kDefaultMaxVectorLength)) S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) - << "entry function parameters"; + << DXIL::kDefaultMaxVectorLength << "entry function parameters"; - if (HasLongVecs(FD->getReturnType())) + if (ContainsVectorLongerThan(FD->getReturnType(), + DXIL::kDefaultMaxVectorLength)) S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector) - << "entry function return type"; + << DXIL::kDefaultMaxVectorLength << "entry function return type"; DXIL::ShaderKind Stage = ShaderModel::KindFromFullName(shaderAttr->getStage()); diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index ee5ea567ce..adb2352a56 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -521,14 +521,17 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { } } for (const auto *param : pPatchFnDecl->params()) - if (HasLongVecs(param->getType())) + if (ContainsVectorLongerThan(param->getType(), + DXIL::kDefaultMaxVectorLength)) self->Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) + << DXIL::kDefaultMaxVectorLength << "patch constant function parameters"; - if (HasLongVecs(pPatchFnDecl->getReturnType())) + if (ContainsVectorLongerThan(pPatchFnDecl->getReturnType(), 4)) self->Diag(pPatchFnDecl->getLocation(), diag::err_hlsl_unsupported_long_vector) + << DXIL::kDefaultMaxVectorLength << "patch constant function return type"; } diff --git a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp index c1c844d4be..11effb645b 100644 --- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp +++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp @@ -1440,6 +1440,13 @@ class DxcCompiler : public IDxcCompiler3, Opts.EnablePayloadQualifiers; compiler.getLangOpts().HLSLProfile = compiler.getCodeGenOpts().HLSLProfile = Opts.TargetProfile; + const ShaderModel *SM = hlsl::ShaderModel::GetByName( + compiler.getLangOpts().HLSLProfile.c_str()); + if (SM->IsSM69Plus()) + compiler.getLangOpts().MaxHLSLVectorLength = DXIL::kSM69MaxVectorLength; + else + compiler.getLangOpts().MaxHLSLVectorLength = + DXIL::kDefaultMaxVectorLength; // Enable dumping implicit top level decls either if it was specifically // requested or if we are not dumping the ast from the command line. That From 76dde0d016eb183d36d97adb14f6058b4118921d Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Sun, 2 Mar 2025 23:49:18 -0700 Subject: [PATCH 06/88] Reaname and consolidate longvecs tests Go for consistent test filename formatting. most LLVM tests have dashes, so dashes it is. Remove redundant sm68 test --- ...{longvec_decls.hlsl => longvec-decls.hlsl} | 0 ..._hs.hlsl => invalid-longvec-decls-hs.hlsl} | 0 ..._decls.hlsl => invalid-longvec-decls.hlsl} | 0 ...s_sm68.hlsl => invalid-longvecs-sm68.hlsl} | 0 .../hlsl/types/invalid_longvec_decls_68.hlsl | 108 ------------------ 5 files changed, 108 deletions(-) rename tools/clang/test/CodeGenDXIL/hlsl/types/{longvec_decls.hlsl => longvec-decls.hlsl} (100%) rename tools/clang/test/SemaHLSL/hlsl/types/{invalid_longvec_decls_hs.hlsl => invalid-longvec-decls-hs.hlsl} (100%) rename tools/clang/test/SemaHLSL/hlsl/types/{invalid_longvec_decls.hlsl => invalid-longvec-decls.hlsl} (100%) rename tools/clang/test/SemaHLSL/hlsl/types/{invalid_longvecs_sm68.hlsl => invalid-longvecs-sm68.hlsl} (100%) delete mode 100644 tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec_decls.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl similarity index 100% rename from tools/clang/test/CodeGenDXIL/hlsl/types/longvec_decls.hlsl rename to tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_hs.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl similarity index 100% rename from tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_hs.hlsl rename to tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl similarity index 100% rename from tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls.hlsl rename to tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvecs_sm68.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl similarity index 100% rename from tools/clang/test/SemaHLSL/hlsl/types/invalid_longvecs_sm68.hlsl rename to tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl deleted file mode 100644 index 8aac527c1f..0000000000 --- a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl +++ /dev/null @@ -1,108 +0,0 @@ -// RUN: %dxc -DTYPE=float -DNUM=7 -T ps_6_8 -verify %s - -// CHECK: %struct.LongVec = type { <4 x float>, <7 x [[STY:[a-z0-9]*]]> } -struct LongVec { - float4 f; - vector vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} -}; - -static vector static_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} -static vector static_vec_arr[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - -groupshared vector gs_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} -groupshared vector gs_vec_arr[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - -export vector lv_param_passthru(vector vec1) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - vector ret = vec1; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - return ret; -} - -export void lv_param_in_out(in vector vec1, out vector vec2) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - vec2 = vec1; -} - -export void lv_param_inout(inout vector vec1, inout vector vec2) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - vector tmp = vec1; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - vec1 = vec2; - vec2 = tmp; -} - -export void lv_global_assign(vector vec) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - static_vec = vec; -} - -export vector lv_global_ret() { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - vector ret = static_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - return ret; -} - -export void lv_gs_assign(vector vec) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - gs_vec = vec; -} - -export vector lv_gs_ret() { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - vector ret = gs_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - return ret; -} - -export vector lv_param_arr_passthru(vector vec)[10] { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - vector ret[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - for (int i = 0; i < 10; i++) - ret[i] = vec; - return ret; -} - -export void lv_global_arr_assign(vector vec[10]) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - for (int i = 0; i < 10; i++) - static_vec_arr[i] = vec[i]; -} - -export vector lv_global_arr_ret()[10] { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - vector ret[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - for (int i = 0; i < 10; i++) - ret[i] = static_vec_arr[i]; - return ret; -} - -export void lv_gs_arr_assign(vector vec[10]) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - for (int i = 0; i < 10; i++) - gs_vec_arr[i] = vec[i]; -} - -export vector lv_gs_arr_ret()[10] { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - vector ret[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - for (int i = 0; i < 10; i++) - ret[i] = gs_vec_arr[i]; - return ret; -} - -export vector lv_splat(TYPE scalar) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - vector ret = scalar; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - return ret; -} - -export vector lv_initlist() { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - vector ret = {1, 2, 3, 4, 5, 6}; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - return ret; -} - -export vector lv_initlist_vec(vector vec) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - vector ret = {vec, 4.0, 5.0, 6.0}; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - return ret; -} - -export vector lv_vec_vec(vector vec1, vector vec2) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - vector ret = {vec1, vec2}; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - return ret; -} - -export vector lv_array_cast(TYPE arr[NUM]) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - vector ret = (vector)arr; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - return ret; -} - -export vector lv_ctor(TYPE s) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - vector ret = vector(1.0, 2.0, 3.0, 4.0, 5.0, s); // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} - return ret; -} - From 765ab1c56eb412afdba517064716b1cccea3ed42 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Sun, 2 Mar 2025 23:03:09 -0700 Subject: [PATCH 07/88] Refactor, clarify, and expand testing Expand existing tests to different target and contexts. Add thorough testing for geometry streams and tessellation patches. Add toolong vector test. Verify that vectors that are over the maximum for 6.9 fail. Add subobjects and template classes to tests. These are unfortunately disabled because the code to make them work causes other tests to fail. --- .../CodeGenDXIL/hlsl/types/longvec-decls.hlsl | 305 +++++++++++------- .../hlsl/types/invalid-longvec-decls.hlsl | 164 +++++++--- .../SemaHLSL/hlsl/types/toolong-vectors.hlsl | 116 +++++++ 3 files changed, 414 insertions(+), 171 deletions(-) create mode 100644 tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl index d6672e7678..8bc7b9e73d 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl @@ -1,64 +1,137 @@ -// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float -DNUM=7 %s | FileCheck %s -// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=bool -DNUM=7 %s | FileCheck %s -// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=uint64_t -DNUM=7 %s | FileCheck %s -// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=double -DNUM=7 %s | FileCheck %s -// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float16_t -DNUM=7 -enable-16bit-types %s | FileCheck %s -// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=int16_t -DNUM=7 -enable-16bit-types %s | FileCheck %s +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float -DNUM=5 %s | FileCheck %s -check-prefixes=CHECK,F5 +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=bool -DNUM=7 %s | FileCheck %s -check-prefixes=CHECK,B7 +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=uint64_t -DNUM=9 %s | FileCheck %s -check-prefixes=CHECK,L9 +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=double -DNUM=17 %s | FileCheck %s -check-prefixes=CHECK,D17 +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float16_t -DNUM=256 -enable-16bit-types %s | FileCheck %s -check-prefixes=CHECK,H256 +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=int16_t -DNUM=1024 -enable-16bit-types %s | FileCheck %s -check-prefixes=CHECK,S1024 // A test to verify that declarations of longvecs are permitted in all the accepted places. // Only tests for acceptance, most codegen is ignored for now. -// CHECK: %struct.LongVec = type { <4 x float>, <7 x [[STY:[a-z0-9]*]]> } +// CHECK: %struct.LongVec = type { <4 x float>, <[[NUM:[0-9]*]] x [[STY:[a-z0-9]*]]> } struct LongVec { float4 f; vector vec; }; +struct LongVecSub : LongVec { + int3 is; +}; + +template +struct LongVecTpl { + float4 f; + vector vec; +}; // Just some dummies to capture the types and mangles. // CHECK: @"\01?dummy@@3[[MNG:F|M|N|_N|_K|\$f16@]]A" = external addrspace(3) global [[STY]] groupshared TYPE dummy; -// CHECK-DAG: @"\01?gs_vec@@3V?$vector@[[MNG]]$06@@A" = external addrspace(3) global <7 x [[STY]]> -// CHECK-DAG: @"\01?gs_vec_arr@@3PAV?$vector@[[MNG]]$06@@A" = external addrspace(3) global [10 x <7 x [[STY]]>] -// CHECK-DAG: @"\01?gs_vec_rec@@3ULongVec@@A" = external addrspace(3) global %struct.LongVec +// Use the first groupshared to establish mangles and sizes +// F5-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:M]]$[[VS:04]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +// B7-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:_N]]$[[VS:06]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +// L9-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:_K]]$[[VS:08]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +// D17-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:N]]$[[VS:0BB@]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +// H256-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:\$f16@]]$[[VS:0BAA@]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +// S1024-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:F]]$[[VS:0EAA@]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> groupshared vector gs_vec; + +// CHECK-DAG: @"\01?gs_vec_arr@@3PAV?$vector@[[MNG]]$[[VS]]@@A" = external addrspace(3) global [10 x <[[NUM]] x [[STY]]>] groupshared vector gs_vec_arr[10]; +// CHECK-DAG: @"\01?gs_vec_rec@@3ULongVec@@A" = external addrspace(3) global %struct.LongVec groupshared LongVec gs_vec_rec; +// CHECK-DAG: @"\01?gs_vec_sub@@3ULongVecSub@@A" = external addrspace(3) global %struct.LongVecSub +groupshared LongVecSub gs_vec_sub; +// CHECK-DAG: @"\01?gs_vec_tpl@@3U?$LongVecTpl@$[[VS]]@@A" = external addrspace(3) global %"struct.LongVecTpl<[[NUM]]>" +groupshared LongVecTpl gs_vec_tpl; -// CHECK-DAG: @static_vec = internal global <7 x [[STY]]> -// CHECK-DAG: @static_vec_arr = internal global [10 x <7 x [[STY]]>] zeroinitializer -// CHECK-DAG: @static_vec_rec = internal global %struct.LongVec +// CHECK-DAG: @static_vec = internal global <[[NUM]] x [[STY]]> static vector static_vec; +// CHECK-DAG: @static_vec_arr = internal global [10 x <[[NUM]] x [[STY]]>] zeroinitializer static vector static_vec_arr[10]; +// CHECK-DAG: @static_vec_rec = internal global %struct.LongVec static LongVec static_vec_rec; +// CHECK-DAG: @static_vec_sub = internal global %struct.LongVecSub +static LongVecSub static_vec_sub; +// CHECK-DAG: @static_vec_tpl = internal global %"struct.LongVecTpl<[[NUM]]>" +static LongVecTpl static_vec_tpl; // CHECK: define [[RTY:[a-z0-9]*]] @"\01?getVal@@YA[[MNG]][[MNG]]@Z"([[RTY]] {{.*}}%t) export TYPE getVal(TYPE t) {TYPE ret = dummy; dummy = t; return ret;} -// CHECK: define <7 x [[RTY]]> +// CHECK: define <[[NUM]] x [[RTY]]> // CHECK-LABEL: @"\01?lv_param_passthru -// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@V1@@Z"(<7 x [[RTY]]> %vec1) -// CHECK: ret <7 x [[RTY]]> +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@V1@@Z"(<[[NUM]] x [[RTY]]> %vec1) +// CHECK: ret <[[NUM]] x [[RTY]]> export vector lv_param_passthru(vector vec1) { - vector ret = vec1; - return ret; + return vec1; +} + +// CHECK-LABEL: define void @"\01?lv_param_arr_passthru +// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$[[VS]]@@Y09V1@@Z"([10 x <[[NUM]] x [[STY]]>]* noalias sret %agg.result, [10 x <[[NUM]] x [[STY]]>]* %vec) +// CHECK: ret void +export vector lv_param_arr_passthru(vector vec[10])[10] { + return vec; +} + +// CHECK-LABEL: define void @"\01?lv_param_rec_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec) +// CHECK: memcpy +// CHECK: ret void +export LongVec lv_param_rec_passthru(LongVec vec) { + return vec; +} + +// CHECK-LABEL: define void @"\01?lv_param_sub_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec) +// CHECK: memcpy +// CHECK: ret void +export LongVec lv_param_sub_passthru(LongVec vec) { + return vec; +} + +// CHECK-LABEL: define void @"\01?lv_param_tpl_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec) +// CHECK: memcpy +// CHECK: ret void +export LongVec lv_param_tpl_passthru(LongVec vec) { + return vec; } // CHECK-LABEL: define void @"\01?lv_param_in_out -// CHECK-SAME: @@YAXV?$vector@[[MNG]]$06@@AIAV1@@Z"(<7 x [[RTY]]> %vec1, <7 x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2) -// CHECK: store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* %vec2, align 4 +// CHECK-SAME: @@YAXV?$vector@[[MNG]]$[[VS]]@@AIAV1@@Z"(<[[NUM]] x [[RTY]]> %vec1, <[[NUM]] x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2) +// CHECK: store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* %vec2, align 4 // CHECK: ret void export void lv_param_in_out(in vector vec1, out vector vec2) { vec2 = vec1; } +// CHECK-LABEL: define void @"\01?lv_param_in_out_rec@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_in_out_rec(in LongVec vec1, out LongVec vec2) { + vec2 = vec1; +} + +// CHECK-LABEL: define void @"\01?lv_param_in_out_sub@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_in_out_sub(in LongVec vec1, out LongVec vec2) { + vec2 = vec1; +} + +// CHECK-LABEL: define void @"\01?lv_param_in_out_tpl@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_in_out_tpl(in LongVec vec1, out LongVec vec2) { + vec2 = vec1; +} + + // CHECK-LABEL: define void @"\01?lv_param_inout -// CHECK-SAME: @@YAXAIAV?$vector@[[MNG]]$06@@0@Z"(<7 x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec1, <7 x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2) -// CHECK: load <7 x [[STY]]>, <7 x [[STY]]>* %vec1, align 4 -// CHECK: load <7 x [[STY]]>, <7 x [[STY]]>* %vec2, align 4 -// CHECK: store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* %vec1, align 4 -// CHECK: store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* %vec2, align 4 +// CHECK-SAME: @@YAXAIAV?$vector@[[MNG]]$[[VS]]@@0@Z"(<[[NUM]] x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec1, <[[NUM]] x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2) +// CHECK: load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]>* %vec1, align 4 +// CHECK: load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]>* %vec2, align 4 +// CHECK: store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* %vec1, align 4 +// CHECK: store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* %vec2, align 4 // CHECK: ret void export void lv_param_inout(inout vector vec1, inout vector vec2) { vector tmp = vec1; @@ -66,152 +139,138 @@ export void lv_param_inout(inout vector vec1, inout vector vec2 = tmp; } -// CHECK-LABEL: define void @"\01?lv_param_in_out_rec@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2) +// CHECK-LABEL: define void @"\01?lv_param_inout_rec@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2) // CHECK: memcpy // CHECK: ret void -export void lv_param_in_out_rec(in LongVec vec1, out LongVec vec2) { - vec2 = vec1; +export void lv_param_inout_rec(inout LongVec vec1, inout LongVec vec2) { + LongVec tmp = vec1; + vec1 = vec2; + vec2 = tmp; } -// CHECK-LABEL: define void @"\01?lv_param_inout_rec@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2) +// CHECK-LABEL: define void @"\01?lv_param_inout_sub@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2) // CHECK: memcpy // CHECK: ret void -export void lv_param_inout_rec(inout LongVec vec1, inout LongVec vec2) { +export void lv_param_inout_sub(inout LongVec vec1, inout LongVec vec2) { LongVec tmp = vec1; vec1 = vec2; vec2 = tmp; } -// CHECK-LABEL: define void @"\01?lv_global_assign -// CHECK-SAME: @@YAXV?$vector@[[MNG]]$06@@@Z"(<7 x [[RTY]]> %vec) -// CHECK: store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* @static_vec +// CHECK-LABEL: define void @"\01?lv_param_inout_tpl@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy // CHECK: ret void -export void lv_global_assign(vector vec) { - static_vec = vec; +export void lv_param_inout_tpl(inout LongVec vec1, inout LongVec vec2) { + LongVec tmp = vec1; + vec1 = vec2; + vec2 = tmp; } -// CHECK: define <7 x [[RTY]]> -// CHECK-LABEL: @"\01?lv_global_ret -// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@XZ"() -// CHECK: load <7 x [[STY]]>, <7 x [[STY]]>* @static_vec -// CHECK: ret <7 x [[RTY]]> -export vector lv_global_ret() { - vector ret = static_vec; - return ret; +// CHECK-LABEL: define void @"\01?lv_global_assign +// CHECK-SAME: @@YAXV?$vector@[[MNG]]$[[VS]]@@Y09V1@ULongVec@@ULongVecSub@@U?$LongVecTpl@$[[VS]]@@@Z"(<[[NUM]] x [[RTY]]> %vec, [10 x <[[NUM]] x [[STY]]>]* %arr, %struct.LongVec* %rec, %struct.LongVecSub* %sub, %"struct.LongVecTpl<[[NUM]]>"* %tpl) +// CHECK: store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* @static_vec +// CHECK: ret void +export void lv_global_assign(vector vec, vector arr[10], + LongVec rec, LongVecSub sub, LongVecTpl tpl) { + static_vec = vec; + static_vec_arr = arr; + static_vec_rec = rec; + static_vec_sub = sub; + static_vec_tpl = tpl; } // CHECK-LABEL: define void @"\01?lv_gs_assign -// CHECK-SAME: @@YAXV?$vector@[[MNG]]$06@@@Z"(<7 x [[RTY]]> %vec) -// CHECK: store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$06@@A" +// CHECK-SAME: @@YAXV?$vector@[[MNG]]$[[VS]]@@Y09V1@ULongVec@@ULongVecSub@@U?$LongVecTpl@$[[VS]]@@@Z"(<[[NUM]] x [[RTY]]> %vec, [10 x <[[NUM]] x [[STY]]>]* %arr, %struct.LongVec* %rec, %struct.LongVecSub* %sub, %"struct.LongVecTpl<[[NUM]]>"* %tpl) +// CHECK: store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$[[VS]]@@A" // CHECK: ret void -export void lv_gs_assign(vector vec) { +export void lv_gs_assign(vector vec, vector arr[10], + LongVec rec, LongVecSub sub, LongVecTpl tpl) { gs_vec = vec; + gs_vec_arr = arr; + gs_vec_rec = sub; + gs_vec_tpl = tpl; } -// CHECK: define <7 x [[RTY]]> -// CHECK-LABEL: @"\01?lv_gs_ret -// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@XZ"() -// CHECK: load <7 x [[STY]]>, <7 x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$06@@A" -// CHECK: ret <7 x [[RTY]]> -export vector lv_gs_ret() { - vector ret = gs_vec; - return ret; -} - -#define DIMS 10 - -// CHECK-LABEL: define void @"\01?lv_param_arr_passthru -// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$06@@V1@@Z"([10 x <7 x [[STY]]>]* noalias sret %agg.result, <7 x [[RTY]]> %vec) -// Arrays are returned in the params -// CHECK: ret void -export vector lv_param_arr_passthru(vector vec)[10] { - vector ret[10]; - for (int i = 0; i < DIMS; i++) - ret[i] = vec; - return ret; -} - -// CHECK-LABEL: define void @"\01?lv_global_arr_assign -// CHECK-SAME: @@YAXY09V?$vector@[[MNG]]$06@@@Z"([10 x <7 x [[STY]]>]* %vec) -// CHECK: ret void -export void lv_global_arr_assign(vector vec[10]) { - for (int i = 0; i < DIMS; i++) - static_vec_arr[i] = vec[i]; +// CHECK: define <[[NUM]] x [[RTY]]> +// CHECK-LABEL: @"\01?lv_global_ret +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@XZ"() +// CHECK: load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]>* @static_vec +// CHECK: ret <[[NUM]] x [[RTY]]> +export vector lv_global_ret() { + return static_vec; } // CHECK-LABEL: define void @"\01?lv_global_arr_ret -// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$06@@XZ"([10 x <7 x [[STY]]>]* noalias sret %agg.result) -// Arrays are returned in the params +// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$[[VS]]@@XZ"([10 x <[[NUM]] x [[STY]]>]* noalias sret %agg.result) // CHECK: ret void export vector lv_global_arr_ret()[10] { - vector ret[10]; - for (int i = 0; i < DIMS; i++) - ret[i] = static_vec_arr[i]; - return ret; -} - -// CHECK-LABEL: define void @"\01?lv_gs_arr_assign -// CHECK-SAME: @@YAXY09V?$vector@[[MNG]]$06@@@Z"([10 x <7 x [[STY]]>]* %vec) -// ret void -export void lv_gs_arr_assign(vector vec[10]) { - for (int i = 0; i < DIMS; i++) - gs_vec_arr[i] = vec[i]; + return static_vec_arr; } -// CHECK-LABEL: define void @"\01?lv_gs_arr_ret -// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$06@@XZ"([10 x <7 x [[STY]]>]* noalias sret %agg.result) -export vector lv_gs_arr_ret()[10] { - vector ret[10]; - for (int i = 0; i < DIMS; i++) - ret[i] = gs_vec_arr[i]; - return ret; +// CHECK-LABEL: define void @"\01?lv_global_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result) +// CHECK: memcpy +// CHECK: ret void +export LongVec lv_global_rec_ret() { + return static_vec_rec; } -// CHECK-LABEL: define void @"\01?lv_param_rec_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec) +// CHECK-LABEL: define void @"\01?lv_global_sub_ret@@YA?AULongVecSub@@XZ"(%struct.LongVecSub* noalias sret %agg.result) // CHECK: memcpy -// Aggregates are returned in the params // CHECK: ret void -export LongVec lv_param_rec_passthru(LongVec vec) { - LongVec ret = vec; - return ret; +export LongVecSub lv_global_sub_ret() { + return static_vec_sub; } -// CHECK-LABEL: define void @"\01?lv_global_rec_assign@@YAXULongVec@@@Z"(%struct.LongVec* %vec) +// CHECK-LABEL: define void @"\01?lv_global_tpl_ret +// CHECK-SAME: @@YA?AU?$LongVecTpl@$[[VS]]@@XZ"(%"struct.LongVecTpl<[[NUM]]>"* noalias sret %agg.result) // CHECK: memcpy // CHECK: ret void -export void lv_global_rec_assign(LongVec vec) { - static_vec_rec = vec; +export LongVecTpl lv_global_tpl_ret() { + return static_vec_tpl; } -// CHECK-LABEL: define void @"\01?lv_global_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result) +// CHECK: define <[[NUM]] x [[RTY]]> +// CHECK-LABEL: @"\01?lv_gs_ret +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@XZ"() +// CHECK: load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$[[VS]]@@A" +// CHECK: ret <[[NUM]] x [[RTY]]> +export vector lv_gs_ret() { + return gs_vec; +} + +// CHECK-LABEL: define void @"\01?lv_gs_arr_ret +// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$[[VS]]@@XZ"([10 x <[[NUM]] x [[STY]]>]* noalias sret %agg.result) +// CHECK: ret void +export vector lv_gs_arr_ret()[10] { + return gs_vec_arr; +} + +// CHECK-LABEL: define void @"\01?lv_gs_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result) // CHECK: memcpy -// Aggregates are returned in the params // CHECK: ret void -export LongVec lv_global_rec_ret() { - LongVec ret = static_vec_rec; - return ret; +export LongVec lv_gs_rec_ret() { + return gs_vec_rec; } -// CHECK-LABEL: define void @"\01?lv_gs_rec_assign@@YAXULongVec@@@Z"(%struct.LongVec* %vec) +// CHECK-LABEL: define void @"\01?lv_gs_sub_ret@@YA?AULongVecSub@@XZ"(%struct.LongVecSub* noalias sret %agg.result) // CHECK: memcpy // CHECK: ret void -export void lv_gs_rec_assign(LongVec vec) { - gs_vec_rec = vec; +export LongVecSub lv_gs_sub_ret() { + return gs_vec_sub; } -// CHECK-LABEL: define void @"\01?lv_gs_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result) +// CHECK-LABEL: define void @"\01?lv_gs_tpl_ret +// CHECK-SAME: @@YA?AU?$LongVecTpl@$[[VS]]@@XZ"(%"struct.LongVecTpl<[[NUM]]>"* noalias sret %agg.result) // CHECK: memcpy -// Aggregates are returned in the params // CHECK: ret void -export LongVec lv_gs_rec_ret() { - LongVec ret = gs_vec_rec; - return ret; +export LongVecTpl lv_gs_tpl_ret() { + return gs_vec_tpl; } -// CHECK: define <7 x [[RTY]]> +// CHECK: define <[[NUM]] x [[RTY]]> // CHECK-LABEL: @"\01?lv_splat -// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@[[MNG]]@Z"([[RTY]] {{.*}}%scalar) -// CHECK: ret <7 x [[RTY]]> +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@[[MNG]]@Z"([[RTY]] {{.*}}%scalar) +// CHECK: ret <[[NUM]] x [[RTY]]> export vector lv_splat(TYPE scalar) { vector ret = scalar; return ret; @@ -244,10 +303,10 @@ export vector lv_vec_vec(vector vec1, vector vec2) { return ret; } -// CHECK: define <7 x [[RTY]]> +// CHECK: define <[[NUM]] x [[RTY]]> // CHECK-LABEL: @"\01?lv_array_cast -// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@Y06[[MNG]]@Z"([7 x [[STY]]]* %arr) -// CHECK: ret <7 x [[RTY]]> +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@Y[[VS]][[MNG]]@Z"({{\[}}[[NUM]] x [[STY]]]* %arr) +// CHECK: ret <[[NUM]] x [[RTY]]> export vector lv_array_cast(TYPE arr[NUM]) { vector ret = (vector)arr; return ret; diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl index ae52983772..98bcc14342 100644 --- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl @@ -1,86 +1,142 @@ -// RUN: %dxc -DTYPE=float -DNUM=7 -T ps_6_9 -verify %s +// RUN: %dxc -T ps_6_9 -DTYPE=LongVec -DNUM=5 -verify %s +// RUiN: %dxc -T ps_6_9 -DTYPE=LongVecSub -DNUM=128 -verify %s +// RUiN: %dxc -T ps_6_9 -DNUM=1024 -verify %s -struct [raypayload] LongVec { - float4 f : write(closesthit) : read(caller); - vector vec : write(closesthit) : read(caller); -}; +// Add tests for base types and instantiated template classes with longvecs +// Size of the vector shouldn't matter, but using a few different ones just in case. + +#define PASTE_(x,y) x##y +#define PASTE(x,y) PASTE_(x,y) + +#ifndef TYPE +#define TYPE LongVecTpl +#endif -struct LongVecParm { - float f; - float4 tar2 : SV_Target2; - vector vec; +struct LongVec { + float4 f; + vector vec; }; -vector global_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} +struct LongVecSub : LongVec { + int3 is; +}; -vector global_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} +template +struct LongVecTpl { + float4 f; + vector vec; +}; -LongVec global_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} +vector global_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} +vector global_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} +TYPE global_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} +TYPE global_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} cbuffer BadBuffy { - vector cb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} - vector cb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} - LongVec cb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + vector cb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + vector cb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + TYPE cb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + TYPE cb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} }; tbuffer BadTuffy { - vector tb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} - vector tb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} - LongVec tb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + vector tb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + vector tb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + TYPE tb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + TYPE tb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} }; -ConstantBuffer< LongVec > const_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} -TextureBuffer< LongVec > tex_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} +ConstantBuffer< TYPE > const_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} +TextureBuffer< TYPE > tex_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + +[shader("pixel")] +vector main( // expected-error{{Vectors of over 4 elements in entry function return type are not supported}} + vector vec : V) : SV_Target { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} + return vec; +} -vector main( // expected-error{{Vectors of over 4 elements in entry function return type are not supported}} - vector vec : V, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} - LongVecParm parm : P) : SV_Target { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} - parm.f = vec; // expected-warning {{implicit truncation of vector type}} - parm.tar2 = vec; // expected-warning {{implicit truncation of vector type}} - return vec; // expected-warning {{implicit truncation of vector type}} +[shader("vertex")] +TYPE vs_main( // expected-error{{Vectors of over 4 elements in entry function return type are not supported}} + TYPE parm : P) : SV_Target { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} + parm.f = 0; + return parm; } + +[shader("geometry")] +[maxvertexcount(3)] +void gs_point(line TYPE e, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} + inout PointStream OutputStream0) {} // expected-error{{Vectors of over 4 elements in geometry streams are not supported}} + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line(line TYPE a, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} + inout LineStream OutputStream0) {} // expected-error{{Vectors of over 4 elements in geometry streams are not supported}} + + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line(line TYPE a, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} + inout TriangleStream OutputStream0) {} // expected-error{{Vectors of over 4 elements in geometry streams are not supported}} + [shader("domain")] [domain("tri")] -void ds_main(OutputPatch TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} - -void PatchConstantFunction(InputPatch inpatch, // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} - OutputPatch outpatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} +void ds_main(OutputPatch TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} +void patch_const(InputPatch inpatch, // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} + OutputPatch outpatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} [shader("hull")] [domain("tri")] [outputtopology("triangle_cw")] [outputcontrolpoints(32)] -[patchconstantfunc("PatchConstantFunction")] -void hs_main(InputPatch TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} +[patchconstantfunc("patch_const")] +void hs_main(InputPatch TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} RaytracingAccelerationStructure RTAS; +struct [raypayload] DXRLongVec { + float4 f : write(closesthit) : read(caller); + vector vec : write(closesthit) : read(caller); +}; + +struct [raypayload] DXRLongVecSub : DXRLongVec { + int3 is : write(closesthit) : read(caller); +}; + +template +struct [raypayload] DXRLongVecTpl { + float4 f : write(closesthit) : read(caller); + vector vec : write(closesthit) : read(caller); +}; + +#define RTTYPE PASTE(DXR,TYPE) + [shader("raygeneration")] void raygen() { - LongVec p = (LongVec)0; + RTTYPE p = (RTTYPE)0; RayDesc ray = (RayDesc)0; TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} CallShader(0, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} } + [shader("closesthit")] -void closesthit(inout LongVec payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} - in LongVec attribs ) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} +void closesthit(inout RTTYPE payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} + in RTTYPE attribs ) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} RayDesc ray; TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} CallShader(0, payload); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} } [shader("anyhit")] -void AnyHit( inout LongVec payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} - in LongVec attribs ) // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} +void AnyHit( inout RTTYPE payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} + in RTTYPE attribs ) // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} { } [shader("miss")] -void Miss(inout LongVec payload){ // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} +void Miss(inout RTTYPE payload){ // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} RayDesc ray; TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} CallShader(0, payload); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} @@ -89,12 +145,12 @@ void Miss(inout LongVec payload){ // expected-error{{Vectors of over 4 elements [shader("intersection")] void Intersection() { float hitT = RayTCurrent(); - LongVec attr = (LongVec)0; + RTTYPE attr = (RTTYPE)0; bool bReported = ReportHit(hitT, 0, attr); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} } [shader("callable")] -void callable1(inout LongVec p) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} +void callable1(inout RTTYPE p) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} CallShader(0, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} } @@ -106,27 +162,39 @@ void Amp() { DispatchMesh(1,1,1,as_pld); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} } -struct LongVecRec { +struct NodeLongVec { uint3 grid : SV_DispatchGrid; - vector vec; + vector vec; +}; + +struct NodeLongVecSub : NodeLongVec { + int3 is; }; +template +struct NodeLongVecTpl { + uint3 grid : SV_DispatchGrid; + vector vec; +}; + +#define NTYPE PASTE(Node,TYPE) + [Shader("node")] [NodeLaunch("broadcasting")] [NumThreads(8,1,1)] [NodeMaxDispatchGrid(8,1,1)] -void broadcast(DispatchNodeInputRecord input, // expected-error{{Vectors of over 4 elements in node records are not supported}} - NodeOutput output) // expected-error{{Vectors of over 4 elements in node records are not supported}} +void broadcast(DispatchNodeInputRecord input, // expected-error{{Vectors of over 4 elements in node records are not supported}} + NodeOutput output) // expected-error{{Vectors of over 4 elements in node records are not supported}} { - ThreadNodeOutputRecords touts; // expected-error{{Vectors of over 4 elements in node records are not supported}} - GroupNodeOutputRecords gouts; // expected-error{{Vectors of over 4 elements in node records are not supported}} + ThreadNodeOutputRecords touts; // expected-error{{Vectors of over 4 elements in node records are not supported}} + GroupNodeOutputRecords gouts; // expected-error{{Vectors of over 4 elements in node records are not supported}} } [Shader("node")] [NodeLaunch("coalescing")] [NumThreads(8,1,1)] -void coalesce(GroupNodeInputRecords input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}} +void coalesce(GroupNodeInputRecords input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}} [Shader("node")] [NodeLaunch("thread")] -void threader(ThreadNodeInputRecord input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}} +void threader(ThreadNodeInputRecord input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl new file mode 100644 index 0000000000..c1da348695 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl @@ -0,0 +1,116 @@ +// RUN: %dxc -T lib_6_9 -DTYPE=float -DNUM=1025 -verify %s +// RUN: %dxc -T ps_6_9 -DTYPE=float -DNUM=1025 -verify %s + +// A test to verify that declarations of longvecs are permitted in all the accepted places. +// Only tests for acceptance, most codegen is ignored for now. + +struct LongVec { + float4 f; + vector vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +}; + +template +struct LongVecTpl { + float4 f; + vector vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +}; + +template +struct LongVecTpl2 { + float4 f; + vector vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +}; + +groupshared vector gs_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +groupshared vector gs_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +groupshared LongVecTpl gs_vec_tpl; // expected-note{{in instantiation of template class 'LongVecTpl<1025>' requested here}} + +static vector static_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +static vector static_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +static LongVecTpl2 static_vec_tpl; // expected-note{{in instantiation of template class 'LongVecTpl2<1025>' requested here}} + +export vector // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +lv_param_passthru(vector vec1) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret = vec1; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + return ret; +} + +export void lv_param_in_out(in vector vec1, // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + out vector vec2) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vec2 = vec1; +} + +export void lv_param_inout(inout vector vec1, // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + inout vector vec2) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector tmp = vec1; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vec1 = vec2; + vec2 = tmp; +} + +export void lv_global_assign(vector vec) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + static_vec = vec; +} + +export vector lv_global_ret() { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret = static_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + return ret; +} + +export void lv_gs_assign(vector vec) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + gs_vec = vec; +} + +export vector lv_gs_ret() { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret = gs_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + return ret; +} + +#define DIMS 10 + +export vector // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +lv_param_arr_passthru(vector vec)[10] { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + for (int i = 0; i < DIMS; i++) + ret[i] = vec; + return ret; +} + +export void lv_global_arr_assign(vector vec[10]) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + for (int i = 0; i < DIMS; i++) + static_vec_arr[i] = vec[i]; +} + +export vector lv_global_arr_ret()[10] { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + for (int i = 0; i < DIMS; i++) + ret[i] = static_vec_arr[i]; + return ret; +} + +export void lv_gs_arr_assign(vector vec[10]) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + for (int i = 0; i < DIMS; i++) + gs_vec_arr[i] = vec[i]; +} + +export vector lv_gs_arr_ret()[10] { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + for (int i = 0; i < DIMS; i++) + ret[i] = gs_vec_arr[i]; + return ret; +} + +export LongVec lv_param_rec_passthru(LongVec vec) { + LongVec ret = vec; + return ret; +} + +export vector lv_splat(TYPE scalar) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret = scalar; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + return ret; +} + +export vector lv_array_cast(TYPE arr[NUM]) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret = (vector)arr; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + return ret; +} + From fb6538e844a2993de10100c77769cbf0f2e862d2 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Mon, 3 Mar 2025 13:00:18 -0700 Subject: [PATCH 08/88] clang-format --- tools/clang/include/clang/AST/HlslTypes.h | 11 +++++----- tools/clang/lib/AST/ASTContextHLSL.cpp | 9 ++++---- tools/clang/lib/AST/HlslTypes.cpp | 4 +--- tools/clang/lib/Sema/SemaHLSL.cpp | 25 ++++++++++++----------- 4 files changed, 24 insertions(+), 25 deletions(-) diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index 9aeb97d3ee..5cd14cbe8a 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -348,10 +348,10 @@ void AddHLSLNodeOutputRecordTemplate( _Outptr_ clang::ClassTemplateDecl **outputRecordTemplateDecl, bool isCompleteType = true); -clang::CXXRecordDecl *DeclareRecordTypeWithHandle(clang::ASTContext &context, - llvm::StringRef name, - bool isCompleteType = true, - clang::InheritableAttr *Attr = nullptr); +clang::CXXRecordDecl * +DeclareRecordTypeWithHandle(clang::ASTContext &context, llvm::StringRef name, + bool isCompleteType = true, + clang::InheritableAttr *Attr = nullptr); void AddRaytracingConstants(clang::ASTContext &context); void AddSamplerFeedbackConstants(clang::ASTContext &context); @@ -382,8 +382,7 @@ clang::CXXRecordDecl *DeclareTemplateTypeWithHandleInDeclContext( clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandle( clang::ASTContext &context, llvm::StringRef typeName, - llvm::StringRef templateParamName, - clang::InheritableAttr *Attr = nullptr); + llvm::StringRef templateParamName, clang::InheritableAttr *Attr = nullptr); clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandleInDeclContext( clang::ASTContext &context, clang::DeclContext *declContext, llvm::StringRef typeName, llvm::StringRef templateParamName, diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index e71f37b663..5b10540e7a 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -1111,7 +1111,8 @@ CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandleInDeclContext( ASTContext &context, DeclContext *declContext, StringRef typeName, StringRef templateParamName, InheritableAttr *Attr) { // template FeedbackTexture2D[Array] { ... } - BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName, TagTypeKind::TTK_Class); + BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName, + TagTypeKind::TTK_Class); typeDeclBuilder.addIntegerTemplateParam(templateParamName, context.UnsignedIntTy); typeDeclBuilder.startDefinition(); @@ -1140,9 +1141,9 @@ hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, bool bTBuf) { typeDeclBuilder.addField( "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle. - typeDeclBuilder.getRecordDecl()->addAttr( - HLSLResourceAttr::CreateImplicit(context, (unsigned)DXIL::ResourceKind::CBuffer, - (unsigned)DXIL::ResourceClass::CBuffer)); + typeDeclBuilder.getRecordDecl()->addAttr(HLSLResourceAttr::CreateImplicit( + context, (unsigned)DXIL::ResourceKind::CBuffer, + (unsigned)DXIL::ResourceClass::CBuffer)); typeDeclBuilder.getRecordDecl(); diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index 5f7e93fbee..4dd44c02d7 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -475,9 +475,7 @@ clang::QualType GetHLSLMatElementType(clang::QualType type) { return elemTy; } - -template -static AttrType *getAttr(clang::QualType type) { +template static AttrType *getAttr(clang::QualType type) { type = type.getCanonicalType(); if (const RecordType *RT = type->getAs()) { if (const auto *Spec = diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index fe3390a89e..ff0624045f 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -363,7 +363,7 @@ enum ArBasicKind { #define IS_BPROP_STREAM(_Props) (((_Props)&BPROP_STREAM) != 0) -#define IS_BPROP_PATCH(_Props) (((_Props) & BPROP_PATCH) != 0) +#define IS_BPROP_PATCH(_Props) (((_Props)&BPROP_PATCH) != 0) #define IS_BPROP_SAMPLER(_Props) (((_Props)&BPROP_SAMPLER) != 0) @@ -3546,11 +3546,11 @@ class HLSLExternalSource : public ExternalSemaSource { InheritableAttr *Attr = nullptr; if (IS_BASIC_STREAM(kind)) - Attr = - HLSLStreamOutputAttr::CreateImplicit(*m_context, - kind - AR_OBJECT_POINTSTREAM + 1); + Attr = HLSLStreamOutputAttr::CreateImplicit( + *m_context, kind - AR_OBJECT_POINTSTREAM + 1); else if (IS_BASIC_PATCH(kind)) - Attr = HLSLTessPatchAttr::CreateImplicit(*m_context, kind == AR_OBJECT_INPUTPATCH); + Attr = HLSLTessPatchAttr::CreateImplicit(*m_context, + kind == AR_OBJECT_INPUTPATCH); else { DXIL::ResourceKind ResKind = DXIL::ResourceKind::NumEntries; DXIL::ResourceClass ResClass = DXIL::ResourceClass::Invalid; @@ -3746,9 +3746,9 @@ class HLSLExternalSource : public ExternalSemaSource { } #endif else if (templateArgCount == 0) { - recordDecl = DeclareRecordTypeWithHandle(*m_context, typeName, - /*isCompleteType*/ false, - Attr); + recordDecl = + DeclareRecordTypeWithHandle(*m_context, typeName, + /*isCompleteType*/ false, Attr); } else { DXASSERT(templateArgCount == 1 || templateArgCount == 2, "otherwise a new case has been added"); @@ -5237,8 +5237,9 @@ class HLSLExternalSource : public ExternalSemaSource { } // Allow object type for Constant/TextureBuffer. HLSLResourceAttr *ResAttr = - Template->getTemplatedDecl()->getAttr(); - if (ResAttr && ResAttr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer) { + Template->getTemplatedDecl()->getAttr(); + if (ResAttr && + ResAttr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer) { if (TemplateArgList.size() == 1) { const TemplateArgumentLoc &argLoc = TemplateArgList[0]; const TemplateArgument &arg = argLoc.getArgument(); @@ -5353,7 +5354,7 @@ class HLSLExternalSource : public ExternalSemaSource { if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) { m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) - << DXIL::kDefaultMaxVectorLength << "tessellation patches"; + << DXIL::kDefaultMaxVectorLength << "tessellation patches"; return true; } } else if (Template->getTemplatedDecl()->hasAttr()) { @@ -5367,7 +5368,7 @@ class HLSLExternalSource : public ExternalSemaSource { if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) { m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) - << DXIL::kDefaultMaxVectorLength << "geometry streams"; + << DXIL::kDefaultMaxVectorLength << "geometry streams"; return true; } } From 19633b2b0ec8187cda5a4163c577a9dcec6e29d6 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Tue, 25 Feb 2025 14:43:14 -0700 Subject: [PATCH 09/88] Handle subclasses and templates of longvector structs Use RequireCompleteType to force specialization of templates encountered in global and other scopes where finding long vectors is necessary where possible. This populates the definitiondata which contains the base class chain needed to detect when a base class has disqualifying long vectors. It was also needed to detect when dependent types in a template class result in long vectors. Work graph node types didn't check their base classes for failures. This affects base classes with longvectors that have sub classes used for node objects which should fail for having long vector members. Respond to feedback about iterating through fields in clunky manner which got left out of the last reviewer feedback response --- tools/clang/include/clang/Sema/SemaHLSL.h | 3 +- tools/clang/lib/Sema/SemaDXR.cpp | 2 +- tools/clang/lib/Sema/SemaHLSL.cpp | 78 ++++++++++--------- tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp | 5 +- .../hlsl/types/invalid-longvec-decls.hlsl | 4 +- 5 files changed, 50 insertions(+), 42 deletions(-) diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h index 786f82933d..d31e32acbb 100644 --- a/tools/clang/include/clang/Sema/SemaHLSL.h +++ b/tools/clang/include/clang/Sema/SemaHLSL.h @@ -128,7 +128,8 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema, const clang::InitListExpr *InitList, const clang::QualType EltTy); -bool ContainsVectorLongerThan(const clang::QualType &qt, unsigned length); +bool ContainsVectorLongerThan(clang::Sema *S, clang::QualType qt, + unsigned length); bool IsConversionToLessOrEqualElements(clang::Sema *self, const clang::ExprResult &sourceExpr, diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index 07234554e2..32ca88c27a 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -810,7 +810,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, return; } - if (hlsl::ContainsVectorLongerThan(Payload->getType(), + if (hlsl::ContainsVectorLongerThan(&S, Payload->getType(), DXIL::kDefaultMaxVectorLength)) { S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "payload parameters"; diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index ff0624045f..ac5ab27835 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -5254,23 +5254,13 @@ class HLSLExternalSource : public ExternalSemaSource { << argType; return true; } - if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) { + if (ContainsVectorLongerThan(m_sema, argType, + DXIL::kDefaultMaxVectorLength)) { m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "cbuffers"; return true; } - if (auto *TST = dyn_cast(argType)) { - // This is a bit of a special case we need to handle. Because the - // buffer types don't use their template parameter in a way that would - // force instantiation, we need to force specialization here. - GetOrCreateTemplateSpecialization( - *m_context, *m_sema, - cast( - TST->getTemplateName().getAsTemplateDecl()), - llvm::ArrayRef(TST->getArgs(), - TST->getNumArgs())); - } if (const RecordType *recordType = argType->getAs()) { if (!recordType->getDecl()->isCompleteDefinition()) { m_sema->Diag(argSrcLoc, diag::err_typecheck_decl_incomplete_type) @@ -5351,7 +5341,8 @@ class HLSLExternalSource : public ExternalSemaSource { DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, "Tessellation patch requires type template arg 0"); QualType argType = arg.getAsType(); - if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) { + if (ContainsVectorLongerThan(m_sema, argType, + DXIL::kDefaultMaxVectorLength)) { m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "tessellation patches"; @@ -5365,7 +5356,8 @@ class HLSLExternalSource : public ExternalSemaSource { DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, "Geometry stream requires type template arg 0"); QualType argType = arg.getAsType(); - if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) { + if (ContainsVectorLongerThan(m_sema, argType, + DXIL::kDefaultMaxVectorLength)) { m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "geometry streams"; @@ -11662,14 +11654,15 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc, bool ErrorFound = false; const RecordDecl *RD = ArgTy->getAs()->getDecl(); // Check the fields of the RecordDecl - RecordDecl::field_iterator begin = RD->field_begin(); - RecordDecl::field_iterator end = RD->field_end(); - while (begin != end) { - const FieldDecl *FD = *begin; + for (auto *FD : RD->fields()) ErrorFound |= DiagnoseNodeStructArgument(self, ArgLoc, FD->getType(), Empty, FD); - begin++; - } + if (RD->isCompleteDefinition()) + if (auto *Child = dyn_cast(RD)) + // Walk up the inheritance chain and check base class fields + for (auto &B : Child->bases()) + ErrorFound |= + DiagnoseNodeStructArgument(self, ArgLoc, B.getType(), Empty); return ErrorFound; } default: @@ -12105,8 +12098,8 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType, return false; } -bool hlsl::ContainsVectorLongerThan(const QualType &qt, unsigned length) { - if (qt.isNull()) +bool hlsl::ContainsVectorLongerThan(Sema *S, QualType qt, unsigned length) { + if (qt.isNull() || qt->isDependentType()) return false; if (IsHLSLVecType(qt)) { @@ -12114,19 +12107,30 @@ bool hlsl::ContainsVectorLongerThan(const QualType &qt, unsigned length) { return true; } else if (qt->isArrayType()) { const ArrayType *arrayType = qt->getAsArrayTypeUnsafe(); - return ContainsVectorLongerThan(arrayType->getElementType(), length); + return ContainsVectorLongerThan(S, arrayType->getElementType(), length); } else if (qt->isStructureOrClassType()) { const RecordType *recordType = qt->getAs(); - const RecordDecl *recordDecl = recordType->getDecl(); + RecordDecl *recordDecl = recordType->getDecl(); if (recordDecl->isInvalidDecl()) return false; - RecordDecl::field_iterator begin = recordDecl->field_begin(); - RecordDecl::field_iterator end = recordDecl->field_end(); - for (; begin != end; begin++) { - const FieldDecl *fieldDecl = *begin; - if (ContainsVectorLongerThan(fieldDecl->getType(), length)) - return true; + if (ClassTemplateSpecializationDecl *templateSpecializationDecl = + dyn_cast(recordDecl)) { + if (templateSpecializationDecl->getSpecializationKind() == + TSK_Undeclared) { + S->RequireCompleteType(recordDecl->getLocation(), qt, + diag::err_typecheck_decl_incomplete_type); + } } + if (!recordDecl->isCompleteDefinition()) + return false; + for (FieldDecl *FD : recordDecl->fields()) + if (ContainsVectorLongerThan(S, FD->getType(), length)) + return true; + if (auto *Child = dyn_cast(recordDecl)) + // Walk up the inheritance chain and check all fields on base classes + for (auto &B : Child->bases()) + if (ContainsVectorLongerThan(S, B.getType(), length)) + return true; } return false; } @@ -14759,9 +14763,9 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, result = false; } - // Disallow long vecs from cbuffers. + // Disallow long vecs from $Global cbuffers. if (isGlobal && !isStatic && !isGroupShared && - ContainsVectorLongerThan(qt, DXIL::kDefaultMaxVectorLength)) { + ContainsVectorLongerThan(this, qt, DXIL::kDefaultMaxVectorLength)) { Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "cbuffers"; result = false; @@ -15657,7 +15661,8 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) { // Verify that user-defined intrinsic struct args contain no long vectors static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) { - if (ContainsVectorLongerThan(Arg->getType(), DXIL::kDefaultMaxVectorLength)) { + if (ContainsVectorLongerThan(S, Arg->getType(), + DXIL::kDefaultMaxVectorLength)) { S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "user-defined struct parameter"; return true; @@ -16397,13 +16402,14 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) { // Check general parameter characteristics // Would be nice to check for resources here as they crash the compiler now. - for (const auto *param : FD->params()) - if (ContainsVectorLongerThan(param->getType(), + for (const auto *param : FD->params()) { + if (ContainsVectorLongerThan(&S, param->getType(), DXIL::kDefaultMaxVectorLength)) S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "entry function parameters"; + } - if (ContainsVectorLongerThan(FD->getReturnType(), + if (ContainsVectorLongerThan(&S, FD->getReturnType(), DXIL::kDefaultMaxVectorLength)) S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "entry function return type"; diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index adb2352a56..6645c4c3d2 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -521,14 +521,15 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { } } for (const auto *param : pPatchFnDecl->params()) - if (ContainsVectorLongerThan(param->getType(), + if (ContainsVectorLongerThan(self, param->getType(), DXIL::kDefaultMaxVectorLength)) self->Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "patch constant function parameters"; - if (ContainsVectorLongerThan(pPatchFnDecl->getReturnType(), 4)) + if (ContainsVectorLongerThan(self, pPatchFnDecl->getReturnType(), + DXIL::kDefaultMaxVectorLength)) self->Diag(pPatchFnDecl->getLocation(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl index 98bcc14342..2d0f800121 100644 --- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl @@ -1,6 +1,6 @@ // RUN: %dxc -T ps_6_9 -DTYPE=LongVec -DNUM=5 -verify %s -// RUiN: %dxc -T ps_6_9 -DTYPE=LongVecSub -DNUM=128 -verify %s -// RUiN: %dxc -T ps_6_9 -DNUM=1024 -verify %s +// RUN: %dxc -T ps_6_9 -DTYPE=LongVecSub -DNUM=128 -verify %s +// RUN: %dxc -T ps_6_9 -DNUM=1024 -verify %s // Add tests for base types and instantiated template classes with longvecs // Size of the vector shouldn't matter, but using a few different ones just in case. From 466bb1498f9edd8ea7c8e265185ae6c679aee288 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 5 Mar 2025 20:50:11 +0000 Subject: [PATCH 10/88] chore: autopublish 2025-03-05T20:50:11Z --- tools/clang/lib/Sema/SemaHLSL.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index ac5ab27835..9b1537d03a 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -363,7 +363,7 @@ enum ArBasicKind { #define IS_BPROP_STREAM(_Props) (((_Props)&BPROP_STREAM) != 0) -#define IS_BPROP_PATCH(_Props) (((_Props)&BPROP_PATCH) != 0) +#define IS_BPROP_PATCH(_Props) (((_Props) & BPROP_PATCH) != 0) #define IS_BPROP_SAMPLER(_Props) (((_Props)&BPROP_SAMPLER) != 0) From 66bb77262fff4f660b065da8dd30fc3f342d4880 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Wed, 5 Mar 2025 19:11:54 -0700 Subject: [PATCH 11/88] Identify matrices and vectors by attributes I guess it was about time. Should simplify some things later as well as at present and it was too easy to not do. Specifically, I was going to need to add another string check to the template instantiation code to identify longvectors. This is cleaner. Incidentally convert another feedback texture string check to use attribs. Incidentally resort the recently-added attribs to not break up the node shader attribs. --- tools/clang/include/clang/Basic/Attr.td | 55 ++++++++------ tools/clang/lib/AST/ASTContextHLSL.cpp | 6 ++ tools/clang/lib/AST/HlslTypes.cpp | 76 ++++++++----------- .../hlsl/types/matrix/matrix-ast.hlsl | 1 + .../hlsl/types/vector/vector-ast.hlsl | 1 + 5 files changed, 74 insertions(+), 65 deletions(-) diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index e344e7b851..29430e6d4c 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -939,6 +939,39 @@ def HLSLCXXOverload : InheritableAttr { let Documentation = [Undocumented]; } +def HLSLVector : InheritableAttr { + let Spellings = []; // No spellings! + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + +def HLSLMatrix : InheritableAttr { + let Spellings = []; // No spellings! + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + +def HLSLTessPatch : InheritableAttr { + let Spellings = []; // No spellings! + let Args = [BoolArgument<"IsInput">]; + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + +def HLSLStreamOutput : InheritableAttr { + let Spellings = []; // No spellings! + let Args = [UnsignedArgument<"Vertices">]; + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + +def HLSLResource : InheritableAttr { + let Spellings = []; // No spellings! + let Args = [UnsignedArgument<"ResKind">, UnsignedArgument<"ResClass">]; + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + def HLSLNodeLaunch : InheritableAttr { let Spellings = [CXX11<"", "nodelaunch", 2017>]; let Args = [StringArgument<"LaunchType">]; // one of broadcasting, coalescing, thread @@ -992,28 +1025,6 @@ def HLSLNodeTrackRWInputSharing : InheritableAttr { let Documentation = [Undocumented]; } - -def HLSLTessPatch : InheritableAttr { - let Spellings = []; // No spellings! - let Args = [BoolArgument<"IsInput">]; - let Subjects = SubjectList<[CXXRecord]>; - let Documentation = [Undocumented]; -} - -def HLSLStreamOutput : InheritableAttr { - let Spellings = []; // No spellings! - let Args = [UnsignedArgument<"Vertices">]; - let Subjects = SubjectList<[CXXRecord]>; - let Documentation = [Undocumented]; -} - -def HLSLResource : InheritableAttr { - let Spellings = []; // No spellings! - let Args = [UnsignedArgument<"ResKind">, UnsignedArgument<"ResClass">]; - let Subjects = SubjectList<[CXXRecord]>; - let Documentation = [Undocumented]; -} - def HLSLNodeObject : InheritableAttr { let Spellings = []; // No spellings! let Subjects = SubjectList<[CXXRecord]>; diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index 5b10540e7a..9bacfc8b42 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -329,6 +329,9 @@ void hlsl::AddHLSLMatrixTemplate(ASTContext &context, typeDeclBuilder.addField("h", vectorArrayType); + typeDeclBuilder.getRecordDecl()->addAttr( + HLSLMatrixAttr::CreateImplicit(context)); + // Add an operator[]. The operator ranges from zero to rowcount-1, and returns // a vector of colcount elements. const unsigned int templateDepth = 0; @@ -385,6 +388,9 @@ void hlsl::AddHLSLVectorTemplate(ASTContext &context, // Add an 'h' field to hold the handle. typeDeclBuilder.addField("h", vectorType); + typeDeclBuilder.getRecordDecl()->addAttr( + HLSLVectorAttr::CreateImplicit(context)); + // Add an operator[]. The operator ranges from zero to colcount-1, and returns // a scalar. diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index 4dd44c02d7..e9c443b9d7 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -53,44 +53,44 @@ ConvertHLSLVecMatTypeToExtVectorType(const clang::ASTContext &context, return nullptr; } +template static AttrType *getAttr(clang::QualType type) { + type = type.getCanonicalType(); + if (const RecordType *RT = type->getAs()) { + if (const auto *Spec = + dyn_cast(RT->getDecl())) + if (const auto *Template = + dyn_cast(Spec->getSpecializedTemplate())) + return Template->getTemplatedDecl()->getAttr(); + if (const auto *Decl = dyn_cast(RT->getDecl())) + return Decl->getAttr(); + } + return nullptr; +} + bool IsHLSLVecMatType(clang::QualType type) { - const Type *Ty = type.getCanonicalType().getTypePtr(); - if (const RecordType *RT = dyn_cast(Ty)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast(RT->getDecl())) { - if (templateDecl->getName() == "vector") { - return true; - } else if (templateDecl->getName() == "matrix") { - return true; - } - } + type = type.getCanonicalType(); + if (const RecordType *RT = type->getAs()) { + if (const auto *Spec = + dyn_cast(RT->getDecl())) + if (const auto *Template = + dyn_cast(Spec->getSpecializedTemplate())) + return Template->getTemplatedDecl()->getAttr() || + Template->getTemplatedDecl()->getAttr(); + if (const auto *Decl = dyn_cast(RT->getDecl())) + return Decl->getAttr() || Decl->getAttr(); } return false; } bool IsHLSLMatType(clang::QualType type) { - const clang::Type *Ty = type.getCanonicalType().getTypePtr(); - if (const RecordType *RT = dyn_cast(Ty)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast(RT->getDecl())) { - if (templateDecl->getName() == "matrix") { - return true; - } - } - } + if (getAttr(type)) + return true; return false; } bool IsHLSLVecType(clang::QualType type) { - const clang::Type *Ty = type.getCanonicalType().getTypePtr(); - if (const RecordType *RT = dyn_cast(Ty)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast(RT->getDecl())) { - if (templateDecl->getName() == "vector") { - return true; - } - } - } + if (getAttr(type)) + return true; return false; } @@ -475,20 +475,6 @@ clang::QualType GetHLSLMatElementType(clang::QualType type) { return elemTy; } -template static AttrType *getAttr(clang::QualType type) { - type = type.getCanonicalType(); - if (const RecordType *RT = type->getAs()) { - if (const auto *Spec = - dyn_cast(RT->getDecl())) - if (const auto *Template = - dyn_cast(Spec->getSpecializedTemplate())) - return Template->getTemplatedDecl()->getAttr(); - if (const auto *Decl = dyn_cast(RT->getDecl())) - return Decl->getAttr(); - } - return nullptr; -} - // TODO: Add type cache to ASTContext. bool IsHLSLInputPatchType(QualType type) { type = type.getCanonicalType(); @@ -812,7 +798,11 @@ QualType GetHLSLResourceResultType(QualType type) { if (const ClassTemplateSpecializationDecl *templateDecl = dyn_cast(RD)) { - if (RD->getName().startswith("FeedbackTexture")) { + const HLSLResourceAttr *Attr = getAttr(type); + if (Attr && (Attr->getResKind() == + (unsigned)DXIL::ResourceKind::FeedbackTexture2D || + Attr->getResKind() == + (unsigned)DXIL::ResourceKind::FeedbackTexture2DArray)) { // Feedback textures are write-only and the data is opaque, // so there is no result type per se. return {}; diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl index 33086852ab..5443ada0c9 100644 --- a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl +++ b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl @@ -15,6 +15,7 @@ // ext_vector array. // CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <> implicit class matrix definition // CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +// CHECK-NEXT: HLSLMatrixAttr {{0x[0-9a-fA-F]+}} <> Implicit // CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'element [row_count] __attribute__((ext_vector_type(col_count)))' diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl index 0ad236a4b2..12859b7eda 100644 --- a/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl +++ b/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl @@ -12,6 +12,7 @@ // Verify the class, final attribute and ext_vector field decl. // CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <> implicit class vector definition // CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +// CHECK-NEXT: HLSLVectorAttr {{0x[0-9a-fA-F]+}} <> Implicit // CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'element __attribute__((ext_vector_type(element_count)))' // Verify operator overloads for const vector subscript operators. From 20c2609253a50817de2d1d1884ef414f80fdd592 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Tue, 4 Mar 2025 11:45:49 -0700 Subject: [PATCH 12/88] Use constant vector limit value for cached types Vector types can be cached in a 2D array that has a column for lenghts 1-4. This uses the added contant to indicate the length and for the checks that confirm it isn't exceeded. --- tools/clang/lib/Sema/SemaHLSL.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 9b1537d03a..ff682ef501 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -2860,8 +2860,9 @@ class HLSLExternalSource : public ExternalSemaSource { TypedefDecl *m_matrixShorthandTypes[HLSLScalarTypeCount][4][4]; // Vector types already built. - QualType m_vectorTypes[HLSLScalarTypeCount][4]; - TypedefDecl *m_vectorTypedefs[HLSLScalarTypeCount][4]; + QualType m_vectorTypes[HLSLScalarTypeCount][DXIL::kDefaultMaxVectorLength]; + TypedefDecl + *m_vectorTypedefs[HLSLScalarTypeCount][DXIL::kDefaultMaxVectorLength]; // BuiltinType for each scalar type. QualType m_baseTypes[HLSLScalarTypeCount]; @@ -3840,7 +3841,7 @@ class HLSLExternalSource : public ExternalSemaSource { clang::TypedefDecl *LookupVectorShorthandType(HLSLScalarType scalarType, UINT colCount) { DXASSERT_NOMSG(scalarType != HLSLScalarType::HLSLScalarType_unknown && - colCount <= 4); + colCount <= DXIL::kDefaultMaxVectorLength); TypedefDecl *qts = m_vectorTypedefs[scalarType][colCount - 1]; if (qts == nullptr) { QualType type = LookupVectorType(scalarType, colCount); @@ -3948,7 +3949,7 @@ class HLSLExternalSource : public ExternalSemaSource { QualType LookupVectorType(HLSLScalarType scalarType, unsigned int colCount) { QualType qt; - if (colCount < 4) + if (colCount < DXIL::kDefaultMaxVectorLength) qt = m_vectorTypes[scalarType][colCount - 1]; if (qt.isNull()) { if (m_scalarTypes[scalarType].isNull()) { @@ -3957,7 +3958,7 @@ class HLSLExternalSource : public ExternalSemaSource { qt = GetOrCreateVectorSpecialization(*m_context, m_sema, m_vectorTemplateDecl, m_scalarTypes[scalarType], colCount); - if (colCount < 4) + if (colCount < DXIL::kDefaultMaxVectorLength) m_vectorTypes[scalarType][colCount - 1] = qt; } return qt; From 1b3ad427e556c9d7e8086f7dd3971d2c1e070f19 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Sun, 2 Mar 2025 22:46:38 -0700 Subject: [PATCH 13/88] Use definitiondata bits to determine long vector presence By setting the bit when the vector template is instantiated and then propagating it when members, be they standard members or base classes, the bit will be set correctly for any struct or struct-like type. For arrays, the arrays are pealed away in a utility function to get at the elements. Decided to separate the check for completeness from the check for long vectors. Even though the latter almost always requires the former, they are separate concepts and embedding the first in the second would be unexpected --- tools/clang/include/clang/AST/DeclCXX.h | 11 +++ tools/clang/include/clang/Sema/SemaHLSL.h | 3 +- tools/clang/lib/AST/DeclCXX.cpp | 60 ++++++------ tools/clang/lib/Sema/SemaDXR.cpp | 3 +- tools/clang/lib/Sema/SemaHLSL.cpp | 97 ++++++++----------- tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp | 6 +- .../lib/Sema/SemaTemplateInstantiate.cpp | 12 +++ tools/clang/test/SemaHLSL/const-default.hlsl | 4 + .../clang/test/SemaHLSL/incomplete-type.hlsl | 1 + 9 files changed, 105 insertions(+), 92 deletions(-) diff --git a/tools/clang/include/clang/AST/DeclCXX.h b/tools/clang/include/clang/AST/DeclCXX.h index 3b07576545..36e0f99c82 100644 --- a/tools/clang/include/clang/AST/DeclCXX.h +++ b/tools/clang/include/clang/AST/DeclCXX.h @@ -465,6 +465,10 @@ class CXXRecordDecl : public RecordDecl { /// \brief Whether we are currently parsing base specifiers. bool IsParsingBaseSpecifiers : 1; + /// \brief Whether this class contains at least one member or base + /// class containing an HLSL vector longer than 4 elements. + bool HasHLSLLongVector : 1; + /// \brief The number of base class specifiers in Bases. unsigned NumBases; @@ -1018,6 +1022,13 @@ class CXXRecordDecl : public RecordDecl { return data().NeedOverloadResolutionForDestructor; } + // HLSL Change add HLSL Long vector bit. + /// \brief Determine whether this class contains an HLSL long vector + /// of over 4 elements. + bool hasHLSLLongVector() { return data().HasHLSLLongVector; } + /// \brief Set that this class contains an HLSL long vector of over 4 elements + bool setHasHLSLLongVector() { return data().HasHLSLLongVector = true; } + /// \brief Determine whether this class describes a lambda function object. bool isLambda() const { // An update record can't turn a non-lambda into a lambda. diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h index d31e32acbb..7e7400d390 100644 --- a/tools/clang/include/clang/Sema/SemaHLSL.h +++ b/tools/clang/include/clang/Sema/SemaHLSL.h @@ -128,8 +128,7 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema, const clang::InitListExpr *InitList, const clang::QualType EltTy); -bool ContainsVectorLongerThan(clang::Sema *S, clang::QualType qt, - unsigned length); +bool ContainsLongVector(clang::QualType qt); bool IsConversionToLessOrEqualElements(clang::Sema *self, const clang::ExprResult &sourceExpr, diff --git a/tools/clang/lib/AST/DeclCXX.cpp b/tools/clang/lib/AST/DeclCXX.cpp index 9ef771b932..5f8c186919 100644 --- a/tools/clang/lib/AST/DeclCXX.cpp +++ b/tools/clang/lib/AST/DeclCXX.cpp @@ -48,34 +48,31 @@ void LazyASTUnresolvedSet::getFromExternalSource(ASTContext &C) const { } CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D) - : UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0), - Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false), - Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true), - HasPrivateFields(false), HasProtectedFields(false), HasPublicFields(false), - HasMutableFields(false), HasVariantMembers(false), HasOnlyCMembers(true), - HasInClassInitializer(false), HasUninitializedReferenceMember(false), - NeedOverloadResolutionForMoveConstructor(false), - NeedOverloadResolutionForMoveAssignment(false), - NeedOverloadResolutionForDestructor(false), - DefaultedMoveConstructorIsDeleted(false), - DefaultedMoveAssignmentIsDeleted(false), - DefaultedDestructorIsDeleted(false), - HasTrivialSpecialMembers(SMF_All), - DeclaredNonTrivialSpecialMembers(0), - HasIrrelevantDestructor(true), - HasConstexprNonCopyMoveConstructor(false), - DefaultedDefaultConstructorIsConstexpr(true), - HasConstexprDefaultConstructor(false), - HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false), - UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0), - ImplicitCopyConstructorHasConstParam(true), - ImplicitCopyAssignmentHasConstParam(true), - HasDeclaredCopyConstructorWithConstParam(false), - HasDeclaredCopyAssignmentWithConstParam(false), - IsLambda(false), IsParsingBaseSpecifiers(false), NumBases(0), NumVBases(0), - Bases(), VBases(), - Definition(D), FirstFriend() { -} + : UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0), + Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false), + Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true), + HasPrivateFields(false), HasProtectedFields(false), + HasPublicFields(false), HasMutableFields(false), HasVariantMembers(false), + HasOnlyCMembers(true), HasInClassInitializer(false), + HasUninitializedReferenceMember(false), + NeedOverloadResolutionForMoveConstructor(false), + NeedOverloadResolutionForMoveAssignment(false), + NeedOverloadResolutionForDestructor(false), + DefaultedMoveConstructorIsDeleted(false), + DefaultedMoveAssignmentIsDeleted(false), + DefaultedDestructorIsDeleted(false), HasTrivialSpecialMembers(SMF_All), + DeclaredNonTrivialSpecialMembers(0), HasIrrelevantDestructor(true), + HasConstexprNonCopyMoveConstructor(false), + DefaultedDefaultConstructorIsConstexpr(true), + HasConstexprDefaultConstructor(false), + HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false), + UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0), + ImplicitCopyConstructorHasConstParam(true), + ImplicitCopyAssignmentHasConstParam(true), + HasDeclaredCopyConstructorWithConstParam(false), + HasDeclaredCopyAssignmentWithConstParam(false), IsLambda(false), + IsParsingBaseSpecifiers(false), HasHLSLLongVector(false), NumBases(0), + NumVBases(0), Bases(), VBases(), Definition(D), FirstFriend() {} CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const { return Bases.get(Definition->getASTContext().getExternalSource()); @@ -204,6 +201,10 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases, if (!BaseClassDecl->isStandardLayout()) data().IsStandardLayout = false; + // Propagate presence of long vector to child classes. + if (BaseClassDecl->hasHLSLLongVector()) + data().HasHLSLLongVector = true; + // Record if this base is the first non-literal field or base. if (!hasNonLiteralTypeFieldsOrBases() && !BaseType->isLiteralType(C)) data().HasNonLiteralTypeFieldsOrBases = true; @@ -385,6 +386,9 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) { data().NeedOverloadResolutionForMoveConstructor = true; data().NeedOverloadResolutionForDestructor = true; } + + if (Subobj->hasHLSLLongVector()) + data().HasHLSLLongVector = true; } /// Callback function for CXXRecordDecl::forallBases that acknowledges diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index 32ca88c27a..d71dc2be4c 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -810,8 +810,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, return; } - if (hlsl::ContainsVectorLongerThan(&S, Payload->getType(), - DXIL::kDefaultMaxVectorLength)) { + if (ContainsLongVector(Payload->getType())) { S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "payload parameters"; return; diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index ff682ef501..fc6e7004d4 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -942,6 +942,11 @@ GetOrCreateVectorSpecialization(ASTContext &context, Sema *sema, "otherwise vector handle cannot be looked up"); #endif + // I don't think this is necessary. + CXXRecordDecl *Decl = vectorSpecializationType->getAsCXXRecordDecl(); + if (GetHLSLVecSize(vectorSpecializationType) > DXIL::kDefaultMaxVectorLength) + Decl->setHasHLSLLongVector(); + return vectorSpecializationType; } @@ -5255,20 +5260,14 @@ class HLSLExternalSource : public ExternalSemaSource { << argType; return true; } - if (ContainsVectorLongerThan(m_sema, argType, - DXIL::kDefaultMaxVectorLength)) { + m_sema->RequireCompleteType(argSrcLoc, argType, + diag::err_typecheck_decl_incomplete_type); + + if (ContainsLongVector(argType)) { m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "cbuffers"; return true; } - - if (const RecordType *recordType = argType->getAs()) { - if (!recordType->getDecl()->isCompleteDefinition()) { - m_sema->Diag(argSrcLoc, diag::err_typecheck_decl_incomplete_type) - << argType; - return true; - } - } } return false; @@ -5341,9 +5340,10 @@ class HLSLExternalSource : public ExternalSemaSource { const TemplateArgument &arg = argLoc.getArgument(); DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, "Tessellation patch requires type template arg 0"); - QualType argType = arg.getAsType(); - if (ContainsVectorLongerThan(m_sema, argType, - DXIL::kDefaultMaxVectorLength)) { + + m_sema->RequireCompleteType(argLoc.getLocation(), arg.getAsType(), + diag::err_typecheck_decl_incomplete_type); + if (ContainsLongVector(arg.getAsType())) { m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "tessellation patches"; @@ -5356,9 +5356,9 @@ class HLSLExternalSource : public ExternalSemaSource { const TemplateArgument &arg = argLoc.getArgument(); DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, "Geometry stream requires type template arg 0"); - QualType argType = arg.getAsType(); - if (ContainsVectorLongerThan(m_sema, argType, - DXIL::kDefaultMaxVectorLength)) { + m_sema->RequireCompleteType(argLoc.getLocation(), arg.getAsType(), + diag::err_typecheck_decl_incomplete_type); + if (ContainsLongVector(arg.getAsType())) { m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "geometry streams"; @@ -12099,39 +12099,17 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType, return false; } -bool hlsl::ContainsVectorLongerThan(Sema *S, QualType qt, unsigned length) { +bool hlsl::ContainsLongVector(QualType qt) { if (qt.isNull() || qt->isDependentType()) return false; - if (IsHLSLVecType(qt)) { - if (GetHLSLVecSize(qt) > length) - return true; - } else if (qt->isArrayType()) { - const ArrayType *arrayType = qt->getAsArrayTypeUnsafe(); - return ContainsVectorLongerThan(S, arrayType->getElementType(), length); - } else if (qt->isStructureOrClassType()) { - const RecordType *recordType = qt->getAs(); - RecordDecl *recordDecl = recordType->getDecl(); - if (recordDecl->isInvalidDecl()) - return false; - if (ClassTemplateSpecializationDecl *templateSpecializationDecl = - dyn_cast(recordDecl)) { - if (templateSpecializationDecl->getSpecializationKind() == - TSK_Undeclared) { - S->RequireCompleteType(recordDecl->getLocation(), qt, - diag::err_typecheck_decl_incomplete_type); - } - } - if (!recordDecl->isCompleteDefinition()) + while (const ArrayType *Arr = qt->getAsArrayTypeUnsafe()) + qt = Arr->getElementType(); + + if (CXXRecordDecl *Decl = qt->getAsCXXRecordDecl()) { + if (!Decl->isCompleteDefinition()) return false; - for (FieldDecl *FD : recordDecl->fields()) - if (ContainsVectorLongerThan(S, FD->getType(), length)) - return true; - if (auto *Child = dyn_cast(recordDecl)) - // Walk up the inheritance chain and check all fields on base classes - for (auto &B : Child->bases()) - if (ContainsVectorLongerThan(S, B.getType(), length)) - return true; + return Decl->hasHLSLLongVector(); } return false; } @@ -14765,11 +14743,21 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, } // Disallow long vecs from $Global cbuffers. - if (isGlobal && !isStatic && !isGroupShared && - ContainsVectorLongerThan(this, qt, DXIL::kDefaultMaxVectorLength)) { - Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) - << DXIL::kDefaultMaxVectorLength << "cbuffers"; - result = false; + if (isGlobal && !isStatic && !isGroupShared) { + if (qt->isStructureOrClassType()) { + if (ClassTemplateSpecializationDecl *templateSpecializationDecl = + dyn_cast( + qt->getAsCXXRecordDecl())) + if (templateSpecializationDecl->getSpecializationKind() == + TSK_Undeclared) + RequireCompleteType(D.getLocStart(), qt, + diag::err_typecheck_decl_incomplete_type); + } + if (ContainsLongVector(qt)) { + Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) + << DXIL::kDefaultMaxVectorLength << "cbuffers"; + result = false; + } } // SPIRV change starts @@ -15662,8 +15650,7 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) { // Verify that user-defined intrinsic struct args contain no long vectors static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) { - if (ContainsVectorLongerThan(S, Arg->getType(), - DXIL::kDefaultMaxVectorLength)) { + if (ContainsLongVector(Arg->getType())) { S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "user-defined struct parameter"; return true; @@ -16404,14 +16391,12 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) { // Check general parameter characteristics // Would be nice to check for resources here as they crash the compiler now. for (const auto *param : FD->params()) { - if (ContainsVectorLongerThan(&S, param->getType(), - DXIL::kDefaultMaxVectorLength)) + if (ContainsLongVector(param->getType())) S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "entry function parameters"; } - if (ContainsVectorLongerThan(&S, FD->getReturnType(), - DXIL::kDefaultMaxVectorLength)) + if (ContainsLongVector(FD->getReturnType())) S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "entry function return type"; diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index 6645c4c3d2..2275c48114 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -521,15 +521,13 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { } } for (const auto *param : pPatchFnDecl->params()) - if (ContainsVectorLongerThan(self, param->getType(), - DXIL::kDefaultMaxVectorLength)) + if (ContainsLongVector(param->getType())) self->Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "patch constant function parameters"; - if (ContainsVectorLongerThan(self, pPatchFnDecl->getReturnType(), - DXIL::kDefaultMaxVectorLength)) + if (ContainsLongVector(pPatchFnDecl->getReturnType())) self->Diag(pPatchFnDecl->getLocation(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength diff --git a/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp b/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp index a6ae05faa5..1eacedbb0b 100644 --- a/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -2139,6 +2139,18 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation, SourceLocation(), SourceLocation(), nullptr); CheckCompletedCXXClass(Instantiation); + // HLSL Change Begin - set longvec bit for vectors of over 4 elements + ClassTemplateSpecializationDecl *Spec = + dyn_cast(Instantiation); + if (Spec && Spec->hasAttr()) { + const TemplateArgumentList &argList = Spec->getTemplateArgs(); + const TemplateArgument &arg1 = argList[1]; + llvm::APSInt vecSize = arg1.getAsIntegral(); + if (vecSize.getLimitedValue() > hlsl::DXIL::kDefaultMaxVectorLength) + Instantiation->setHasHLSLLongVector(); + } + // HLSL Change End - set longvec bit for vectors of over 4 elements + // Default arguments are parsed, if not instantiated. We can go instantiate // default arg exprs for default constructors if necessary now. ActOnFinishCXXMemberDefaultArgs(Instantiation); diff --git a/tools/clang/test/SemaHLSL/const-default.hlsl b/tools/clang/test/SemaHLSL/const-default.hlsl index 2ebb6fe52e..6b5e43e0e9 100644 --- a/tools/clang/test/SemaHLSL/const-default.hlsl +++ b/tools/clang/test/SemaHLSL/const-default.hlsl @@ -33,7 +33,11 @@ class MyClass { ConstantBuffer g_const_buffer2; TextureBuffer g_texture_buffer2; +// expected-note@+2 {{forward declaration of 'FWDDeclStruct'}} +// expected-note@+1 {{forward declaration of 'FWDDeclStruct'}} struct FWDDeclStruct; +// expected-note@+2 {{forward declaration of 'FWDDeclClass'}} +// expected-note@+1 {{forward declaration of 'FWDDeclClass'}} class FWDDeclClass; // Ensure forward declared struct/class fails as expected diff --git a/tools/clang/test/SemaHLSL/incomplete-type.hlsl b/tools/clang/test/SemaHLSL/incomplete-type.hlsl index 8869b80400..250171ad05 100644 --- a/tools/clang/test/SemaHLSL/incomplete-type.hlsl +++ b/tools/clang/test/SemaHLSL/incomplete-type.hlsl @@ -3,6 +3,7 @@ // Tests that the compiler is well-behaved with regard to uses of incomplete types. // Regression test for GitHub #2058, which crashed in this case. +// expected-note@+5 {{forward declaration of 'S'}} // expected-note@+4 {{forward declaration of 'S'}} // expected-note@+3 {{forward declaration of 'S'}} // expected-note@+2 {{forward declaration of 'S'}} From d3fec833e31f2b375279c5ca48ed7655ee685272 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Wed, 5 Mar 2025 19:17:41 -0700 Subject: [PATCH 14/88] Test for incomplete types in a number of builtin template-like objects Output Streams, Tessellation patches, and global variables should be complete when receiving other correctness checks. If they cannot be made complete, they should produce an error. This was omitted for various of these including non-template globals, which was fine, but it meant that redundant errors were produced for templates, but not standard globals likely just because that was what was tested. This removes that distinction and adds testing for all of the above to the existing incomplete-type.hlsl test. --- tools/clang/lib/Sema/SemaHLSL.cpp | 45 +++++----- ...ent_type_for_node_object_template_arg.hlsl | 3 - .../clang/test/SemaHLSL/incomplete-type.hlsl | 87 +++++++++++++++++-- 3 files changed, 101 insertions(+), 34 deletions(-) diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index fc6e7004d4..8abad632a2 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -5296,22 +5296,13 @@ class HLSLExternalSource : public ExternalSemaSource { // template instantiation. if (ArgTy->isDependentType()) return false; - if (auto *recordType = ArgTy->getAs()) { - if (CXXRecordDecl *cxxRecordDecl = - dyn_cast(recordType->getDecl())) { - if (ClassTemplateSpecializationDecl *templateSpecializationDecl = - dyn_cast(cxxRecordDecl)) { - if (templateSpecializationDecl->getSpecializationKind() == - TSK_Undeclared) { - // Make sure specialization is done before IsTypeNumeric. - // If not, ArgTy might be treat as empty struct. - m_sema->RequireCompleteType( - ArgLoc.getLocation(), ArgTy, - diag::err_typecheck_decl_incomplete_type); - } - } - } - } + // Make sure specialization is done before IsTypeNumeric. + // If not, ArgTy might be treat as empty struct. + m_sema->RequireCompleteType(ArgLoc.getLocation(), ArgTy, + diag::err_typecheck_decl_incomplete_type); + CXXRecordDecl *Decl = ArgTy->getAsCXXRecordDecl(); + if (Decl && !Decl->isCompleteDefinition()) + return true; // The node record type must be compound - error if it is not. if (GetTypeObjectKind(ArgTy) != AR_TOBJ_COMPOUND) { m_sema->Diag(ArgLoc.getLocation(), diag::err_hlsl_node_record_type) @@ -5343,6 +5334,9 @@ class HLSLExternalSource : public ExternalSemaSource { m_sema->RequireCompleteType(argLoc.getLocation(), arg.getAsType(), diag::err_typecheck_decl_incomplete_type); + CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); + if (Decl && !Decl->isCompleteDefinition()) + return true; if (ContainsLongVector(arg.getAsType())) { m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) @@ -5358,6 +5352,9 @@ class HLSLExternalSource : public ExternalSemaSource { "Geometry stream requires type template arg 0"); m_sema->RequireCompleteType(argLoc.getLocation(), arg.getAsType(), diag::err_typecheck_decl_incomplete_type); + CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); + if (Decl && !Decl->isCompleteDefinition()) + return true; if (ContainsLongVector(arg.getAsType())) { m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) @@ -14744,15 +14741,13 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, // Disallow long vecs from $Global cbuffers. if (isGlobal && !isStatic && !isGroupShared) { - if (qt->isStructureOrClassType()) { - if (ClassTemplateSpecializationDecl *templateSpecializationDecl = - dyn_cast( - qt->getAsCXXRecordDecl())) - if (templateSpecializationDecl->getSpecializationKind() == - TSK_Undeclared) - RequireCompleteType(D.getLocStart(), qt, - diag::err_typecheck_decl_incomplete_type); - } + // Suppress actual emitting of errors for incompletable types here + // They are redundant to those produced in ActOnUninitializedDecl. + struct SilentDiagnoser : public TypeDiagnoser { + SilentDiagnoser() : TypeDiagnoser(true) {} + virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) {} + } SD; + RequireCompleteType(D.getLocStart(), qt, SD); if (ContainsLongVector(qt)) { Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) << DXIL::kDefaultMaxVectorLength << "cbuffers"; diff --git a/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl b/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl index 40e0452719..05ec268a0c 100644 --- a/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl @@ -60,12 +60,9 @@ void woo() { } template -// expected-note@+1{{zero sized record defined here}} struct ForwardDecl; // expected-note{{template is declared here}} void woot() { - // Forward decl fails because forcing completion to check empty size for node object. - // expected-error@+1{{record used in GroupNodeInputRecords may not have zero size}} GroupNodeInputRecords > data; // expected-error{{implicit instantiation of undefined template 'ForwardDecl'}} foo(data); } diff --git a/tools/clang/test/SemaHLSL/incomplete-type.hlsl b/tools/clang/test/SemaHLSL/incomplete-type.hlsl index 250171ad05..a2856f448e 100644 --- a/tools/clang/test/SemaHLSL/incomplete-type.hlsl +++ b/tools/clang/test/SemaHLSL/incomplete-type.hlsl @@ -1,18 +1,93 @@ -// RUN: %dxc -Tlib_6_3 -Wno-unused-value -verify %s +// RUN: %dxc -Tlib_6_8 -Wno-unused-value -verify %s // Tests that the compiler is well-behaved with regard to uses of incomplete types. // Regression test for GitHub #2058, which crashed in this case. -// expected-note@+5 {{forward declaration of 'S'}} -// expected-note@+4 {{forward declaration of 'S'}} -// expected-note@+3 {{forward declaration of 'S'}} -// expected-note@+2 {{forward declaration of 'S'}} -// expected-note@+1 {{forward declaration of 'S'}} +// expected-note@+8 {{forward declaration of 'S'}} expected-note@+8 {{forward declaration of 'S'}} expected-note@+8 {{forward declaration of 'S'}} +// expected-note@+7 {{forward declaration of 'S'}} expected-note@+7 {{forward declaration of 'S'}} expected-note@+7 {{forward declaration of 'S'}} +// expected-note@+6 {{forward declaration of 'S'}} expected-note@+6 {{forward declaration of 'S'}} expected-note@+6 {{forward declaration of 'S'}} +// expected-note@+5 {{forward declaration of 'S'}} expected-note@+5 {{forward declaration of 'S'}} expected-note@+5 {{forward declaration of 'S'}} +// expected-note@+4 {{forward declaration of 'S'}} expected-note@+4 {{forward declaration of 'S'}} expected-note@+4 {{forward declaration of 'S'}} +// expected-note@+3 {{forward declaration of 'S'}} expected-note@+3 {{forward declaration of 'S'}} expected-note@+3 {{forward declaration of 'S'}} +// expected-note@+2 {{forward declaration of 'S'}} expected-note@+2 {{forward declaration of 'S'}} expected-note@+2 {{forward declaration of 'S'}} +// expected-note@+1 {{forward declaration of 'S'}} expected-note@+1 {{forward declaration of 'S'}} expected-note@+1 {{forward declaration of 'S'}} struct S; + +// expected-note@+2 {{template is declared here}} +// expected-note@+1 {{template is declared here}} expected-note@+1 {{template is declared here}} expected-note@+1 {{template is declared here}} +template struct T; + ConstantBuffer CB; // expected-error {{variable has incomplete type 'S'}} +ConstantBuffer > TB; // expected-error {{implicit instantiation of undefined template 'T<1>'}} + +S s; // expected-error {{variable has incomplete type 'S'}} +T<1> t; // expected-error {{implicit instantiation of undefined template 'T<1>'}} + +cbuffer BadBuffy { + S cb_s; // expected-error {{variable has incomplete type 'S'}} + T<1> cb_t; // expected-error {{implicit instantiation of undefined template 'T<1>'}} +}; + +tbuffer BadTuffy { + S tb_s; // expected-error {{variable has incomplete type 'S'}} + T<1> tb_t; // expected-error {{implicit instantiation of undefined template 'T<1>'}} +}; + S func( // expected-error {{incomplete result type 'S' in function definition}} S param) // expected-error {{variable has incomplete type 'S'}} { S local; // expected-error {{variable has incomplete type 'S'}} return (S)0; // expected-error {{'S' is an incomplete type}} } + +[shader("geometry")] +[maxvertexcount(3)] +void gs_point(line S e, // expected-error {{variable has incomplete type 'S'}} + inout PointStream OutputStream0) {} // expected-error {{variable has incomplete type 'S'}} + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line(line S a, // expected-error {{variable has incomplete type 'S'}} + inout LineStream OutputStream0) {} // expected-error {{variable has incomplete type 'S'}} + + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line(line S a, // expected-error {{variable has incomplete type 'S'}} + inout TriangleStream OutputStream0) {} // expected-error {{variable has incomplete type 'S'}} + + +[shader("domain")] +[domain("tri")] +void ds_main(OutputPatch TrianglePatch) {} // expected-error{{variable has incomplete type 'S'}} + +void patch_const(InputPatch inpatch, // expected-error{{variable has incomplete type 'S'}} + OutputPatch outpatch) {} // expected-error{{variable has incomplete type 'S'}} + +[shader("hull")] +[domain("tri")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(32)] +[patchconstantfunc("patch_const")] +void hs_main(InputPatch TrianglePatch) {} // expected-error{{variable has incomplete type 'S'}} + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(8,1,1)] +[NodeMaxDispatchGrid(8,1,1)] +// expected-error@+1{{Broadcasting node shader 'broadcast' with NodeMaxDispatchGrid attribute must declare an input record containing a field with SV_DispatchGrid semantic}} +void broadcast(DispatchNodeInputRecord input, // expected-error{{variable has incomplete type 'S'}} + NodeOutput output) // expected-error{{variable has incomplete type 'S'}} +{ + ThreadNodeOutputRecords touts; // expected-error{{variable has incomplete type 'S'}} + GroupNodeOutputRecords gouts; // expected-error{{variable has incomplete type 'S'}} +} + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(8,1,1)] +void coalesce(GroupNodeInputRecords input) {} // expected-error{{variable has incomplete type 'S'}} + +[Shader("node")] +[NodeLaunch("thread")] +void threader(ThreadNodeInputRecord input) {} // expected-error{{variable has incomplete type 'S'}} From f7f1e3dd8d8c097eeb74a86ccd348bf3f5a27b82 Mon Sep 17 00:00:00 2001 From: Chris B Date: Fri, 7 Mar 2025 17:27:47 -0600 Subject: [PATCH 15/88] [Metal] Add experimental Metal support (#6805) This adds a new `-metal` flag to DXC which can be used to generate Metal's IR directly from DXC after compilation. There are some limitations in this flag which are worth noting: 1) It does not support library shaders (yet) 2) It does not support disassembly (yet) 3) It is _wildly_ under tested because wtihout (2) we can't do anything to really verify correct output (yay?) --- README.md | 10 +++ cmake/config-ix.cmake | 9 ++ cmake/modules/FindMetalIRConverter.cmake | 16 ++++ include/dxc/Support/HLSLOptions.h | 2 + include/dxc/Support/HLSLOptions.td | 5 ++ lib/DxcSupport/HLSLOptions.cpp | 17 ++++ tools/clang/test/DXC/metal.test | 7 ++ tools/clang/test/DXC/no_metal.test | 4 + .../clang/test/DXC/no_metal_disassembly.test | 7 ++ tools/clang/test/lit.cfg | 3 + tools/clang/test/lit.site.cfg.in | 1 + tools/clang/tools/dxcompiler/CMakeLists.txt | 8 ++ .../clang/tools/dxcompiler/dxcompilerobj.cpp | 89 ++++++++++++++++++- 13 files changed, 177 insertions(+), 1 deletion(-) create mode 100644 cmake/modules/FindMetalIRConverter.cmake create mode 100644 tools/clang/test/DXC/metal.test create mode 100644 tools/clang/test/DXC/no_metal.test create mode 100644 tools/clang/test/DXC/no_metal_disassembly.test diff --git a/README.md b/README.md index 35c0132068..ddafde2115 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,16 @@ Development kits containing only the dxc.exe driver app, the dxcompiler.dll, and As an example of community contribution, this project can also target the [SPIR-V](https://www.khronos.org/registry/spir-v/) intermediate representation. Please see the [doc](docs/SPIR-V.rst) for how HLSL features are mapped to SPIR-V, and the [wiki](https://github.com/microsoft/DirectXShaderCompiler/wiki/SPIR%E2%80%90V-CodeGen) page for how to build, use, and contribute to the SPIR-V CodeGen. +### Metal CodeGen + +When built from source DXC can utilize the [Metal Shader +Converter](https://developer.apple.com/metal/shader-converter/) if it is +available during build and configuration time. This allows DXC to generate Metal +shader libraries directly using the `-metal` flag. + +Note: DXC cannot currently disassemble Metal shaders so the `-Fc` flag cannot be +used in conjunction with the `-Fo` flag. + ## Building Sources See the full documentation for [Building and testing DXC](docs/BuildingAndTestingDXC.rst) for detailed instructions. diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 01b30568a9..4541d08162 100644 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -568,3 +568,12 @@ else() endif() string(REPLACE " " ";" LLVM_BINDINGS_LIST "${LLVM_BINDINGS}") + +# HLSL Change Begin - Metal IR Converter +find_package(MetalIRConverter) +if (METAL_IRCONVERTER_FOUND) + set(ENABLE_METAL_CODEGEN On) + message(STATUS "Enabling Metal Support") + add_definitions(-DENABLE_METAL_CODEGEN) +endif() +# HLSL Change End - Metal IR Converter diff --git a/cmake/modules/FindMetalIRConverter.cmake b/cmake/modules/FindMetalIRConverter.cmake new file mode 100644 index 0000000000..fc7df1d6cc --- /dev/null +++ b/cmake/modules/FindMetalIRConverter.cmake @@ -0,0 +1,16 @@ +find_path(METAL_IRCONVERTER_INCLUDE_DIR metal_irconverter.h + HINTS /usr/local/include/metal_irconverter + DOC "Path to metal IR converter headers" + ) + +find_library(METAL_IRCONVERTER_LIB NAMES metalirconverter + PATH_SUFFIXES lib + ) + +include(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(METAL_IRCONVERTER + REQUIRED_VARS METAL_IRCONVERTER_LIB METAL_IRCONVERTER_INCLUDE_DIR) + +message(STATUS "Metal IR Converter Include Dir: ${METAL_IRCONVERTER_INCLUDE_DIR}") +message(STATUS "Metal IR Converter Library: ${METAL_IRCONVERTER_LIB}") +mark_as_advanced(METAL_IRCONVERTER_LIB METAL_IRCONVERTER_INCLUDE_DIR) diff --git a/include/dxc/Support/HLSLOptions.h b/include/dxc/Support/HLSLOptions.h index 887591ae82..56e95a1659 100644 --- a/include/dxc/Support/HLSLOptions.h +++ b/include/dxc/Support/HLSLOptions.h @@ -274,6 +274,8 @@ class DxcOpts { SpirvOptions; // All SPIR-V CodeGen-related options #endif // SPIRV Change Ends + + bool GenMetal = false; // OPT_metal }; /// Use this class to capture, convert and handle the lifetime for the diff --git a/include/dxc/Support/HLSLOptions.td b/include/dxc/Support/HLSLOptions.td index 130e19a525..ea000f4877 100644 --- a/include/dxc/Support/HLSLOptions.td +++ b/include/dxc/Support/HLSLOptions.td @@ -346,6 +346,11 @@ def disable_exception_handling : Flag<["-", "/"], "disable-exception-handling">, def skip_serialization : Flag<["-", "/"], "skip-serialization">, Group, Flags<[CoreOption, HelpHidden]>, HelpText<"Return a module interface instead of serialized output">; +def metal : Flag<["-"], "metal">, + Group, + Flags<[CoreOption, DriverOption]>, + HelpText<"Generate Metal code">; + // SPIRV Change Starts def spirv : Flag<["-"], "spirv">, Group, Flags<[CoreOption, DriverOption]>, HelpText<"Generate SPIR-V code">; diff --git a/lib/DxcSupport/HLSLOptions.cpp b/lib/DxcSupport/HLSLOptions.cpp index 3daf880f6d..1ce7d0dfc0 100644 --- a/lib/DxcSupport/HLSLOptions.cpp +++ b/lib/DxcSupport/HLSLOptions.cpp @@ -1089,6 +1089,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude, addDiagnosticArgs(Args, OPT_W_Group, OPT_W_value_Group, opts.Warnings); + opts.GenMetal = Args.hasFlag(OPT_metal, OPT_INVALID, false); + // SPIRV Change Starts #ifdef ENABLE_SPIRV_CODEGEN opts.GenSPIRV = Args.hasFlag(OPT_spirv, OPT_INVALID, false); @@ -1313,6 +1315,21 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude, #endif // ENABLE_SPIRV_CODEGEN // SPIRV Change Ends +#ifndef ENABLE_METAL_CODEGEN + if (opts.GenMetal) { + errors << "Metal CodeGen not available. " + "Please rebuild with Metal IR Converter installed."; + return 1; + } +#endif + + if (opts.GenMetal) { + if (!opts.AssemblyCode.empty() || opts.OutputObject.empty()) { + errors << "Disassembly of Metal IR not supported (yet)."; + return 1; + } + } + // Validation for DebugInfo here because spirv uses same DebugInfo opt, // and legacy wrappers will add EmbedDebug in this case, leading to this // failing if placed before spirv path sets DebugInfo to true. diff --git a/tools/clang/test/DXC/metal.test b/tools/clang/test/DXC/metal.test new file mode 100644 index 0000000000..3d00850abc --- /dev/null +++ b/tools/clang/test/DXC/metal.test @@ -0,0 +1,7 @@ +// REQUIRES: metal + +// Metal libraries are LLVM bitcode. This check inspects the magic number from +// the metal library output. +// RUN: %dxc %S/Inputs/smoke.hlsl /T ps_6_0 -metal -Fo Tmp.metal +// RUN: head -c 4 Tmp.metal | FileCheck -check-prefix=MTL %s +// MTL: {{^MTLB}} diff --git a/tools/clang/test/DXC/no_metal.test b/tools/clang/test/DXC/no_metal.test new file mode 100644 index 0000000000..37af16cad5 --- /dev/null +++ b/tools/clang/test/DXC/no_metal.test @@ -0,0 +1,4 @@ +// UNSUPPORTED: metal + +// RUN:not %dxc %S/Inputs/smoke.hlsl /T ps_6_0 -metal 2>&1 | FileCheck %s +// CHECK:Metal CodeGen not available diff --git a/tools/clang/test/DXC/no_metal_disassembly.test b/tools/clang/test/DXC/no_metal_disassembly.test new file mode 100644 index 0000000000..44283a8fe8 --- /dev/null +++ b/tools/clang/test/DXC/no_metal_disassembly.test @@ -0,0 +1,7 @@ +// REQUIRES: metal + +// These cases both fail because the shader converter library cannot emit +// textual IR. +// RUN: not %dxc %S/Inputs/smoke.hlsl /T ps_6_0 -metal -Fo Tmp.metal -Fc Tmp.air 2>&1 | FileCheck %s +// RUN: not %dxc %S/Inputs/smoke.hlsl /T ps_6_0 -metal 2>&1 | FileCheck %s +// CHECK: Disassembly of Metal IR not supported (yet). diff --git a/tools/clang/test/lit.cfg b/tools/clang/test/lit.cfg index 5fc5d4a27c..a3a352071c 100644 --- a/tools/clang/test/lit.cfg +++ b/tools/clang/test/lit.cfg @@ -504,6 +504,9 @@ if config.enable_backtrace == "1": if config.spirv: config.available_features.add("spirv") +if config.metal: + config.available_features.add("metal") + # Check supported dxil version def get_dxil_version(): result = subprocess.run([lit.util.which('dxc', llvm_tools_dir), "--version"], stdout=subprocess.PIPE) diff --git a/tools/clang/test/lit.site.cfg.in b/tools/clang/test/lit.site.cfg.in index 207450add5..80dcadf288 100644 --- a/tools/clang/test/lit.site.cfg.in +++ b/tools/clang/test/lit.site.cfg.in @@ -22,6 +22,7 @@ config.enable_backtrace = "@ENABLE_BACKTRACES@" config.host_arch = "@HOST_ARCH@" config.spirv = "@ENABLE_SPIRV_CODEGEN@" =="ON" config.hlsl_headers_dir = "@HLSL_HEADERS_DIR@" # HLSL change +config.metal = "@ENABLE_METAL_CODEGEN@".upper() == "ON" # HLSL change # Support substitution of the tools and libs dirs with user parameters. This is # used when we can't determine the tool dir at configuration time. diff --git a/tools/clang/tools/dxcompiler/CMakeLists.txt b/tools/clang/tools/dxcompiler/CMakeLists.txt index 004d2e5ad1..c69e276194 100644 --- a/tools/clang/tools/dxcompiler/CMakeLists.txt +++ b/tools/clang/tools/dxcompiler/CMakeLists.txt @@ -136,6 +136,14 @@ target_link_libraries(dxcompiler PRIVATE ${LIBRARIES}) if (ENABLE_SPIRV_CODEGEN) target_link_libraries(dxcompiler PRIVATE clangSPIRV) endif (ENABLE_SPIRV_CODEGEN) +if (ENABLE_METAL_CODEGEN) + target_link_libraries(dxcompiler PRIVATE ${METAL_IRCONVERTER_LIB}) + target_include_directories(dxcompiler PRIVATE ${METAL_IRCONVERTER_INCLUDE_DIR}) + + get_filename_component(METAL_IRCONVERTER_LIB_DIR ${METAL_IRCONVERTER_LIB} DIRECTORY CACHE) + set_property(TARGET dxcompiler APPEND_STRING + PROPERTY LINK_FLAGS " -Wl,-rpath,${METAL_IRCONVERTER_LIB_DIR}") +endif (ENABLE_METAL_CODEGEN) include_directories(AFTER ${LLVM_INCLUDE_DIR}/dxc/Tracing ${DIASDK_INCLUDE_DIRS} ${HLSL_VERSION_LOCATION}) include_directories(${LLVM_SOURCE_DIR}/tools/clang/tools/dxcvalidator) diff --git a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp index c1c844d4be..a8f804bdca 100644 --- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp +++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp @@ -71,6 +71,10 @@ #include "clang/Basic/Version.h" #endif // SUPPORT_QUERY_GIT_COMMIT_INFO +#ifdef ENABLE_METAL_CODEGEN +#include "metal_irconverter.h" +#endif + #define CP_UTF16 1200 using namespace llvm; @@ -817,6 +821,10 @@ class DxcCompiler : public IDxcCompiler3, } compiler.getLangOpts().IsHLSLLibrary = opts.IsLibraryProfile(); + if (compiler.getLangOpts().IsHLSLLibrary && opts.GenMetal) + return ErrorWithString("Shader libraries unsupported in Metal (yet)", + riid, ppResult); + // Clear entry function if library target if (compiler.getLangOpts().IsHLSLLibrary) compiler.getLangOpts().HLSLEntryFunction = @@ -1107,7 +1115,86 @@ class DxcCompiler : public IDxcCompiler3, &pHashBlob)); IFT(pResult->SetOutputObject(DXC_OUT_SHADER_HASH, pHashBlob)); } // SUCCEEDED(valHR) - } // compileOK && !opts.CodeGenHighLevel +#ifdef ENABLE_METAL_CODEGEN + // This is a bit hacky because we don't currently have a good way to + // disassemble AIR. + if (opts.GenMetal && produceFullContainer && + !opts.OutputObject.empty()) { + IRCompiler *MetalCompiler = IRCompilerCreate(); + IRCompilerSetEntryPointName( + MetalCompiler, + compiler.getCodeGenOpts().HLSLEntryFunction.c_str()); + + IRObject *DXILObj = IRObjectCreateFromDXIL( + static_cast(pOutputBlob->GetBufferPointer()), + pOutputBlob->GetBufferSize(), IRBytecodeOwnershipNone); + + // Compile DXIL to Metal IR: + IRError *Error = nullptr; + IRObject *AIR = IRCompilerAllocCompileAndLink(MetalCompiler, NULL, + DXILObj, &Error); + + if (!AIR) { + IRObjectDestroy(DXILObj); + IRCompilerDestroy(MetalCompiler); + IRErrorDestroy(Error); + return ErrorWithString( + "Error occurred in Metal Shader Conversion", riid, ppResult); + } + + IRMetalLibBinary *MetalLib = IRMetalLibBinaryCreate(); + IRShaderStage Stage = IRShaderStageInvalid; + const ShaderModel *SM = hlsl::ShaderModel::GetByName( + compiler.getLangOpts().HLSLProfile); + switch (SM->GetKind()) { + case DXIL::ShaderKind::Vertex: + Stage = IRShaderStageVertex; + break; + case DXIL::ShaderKind::Pixel: + Stage = IRShaderStageFragment; + break; + case DXIL::ShaderKind::Hull: + Stage = IRShaderStageHull; + break; + case DXIL::ShaderKind::Domain: + Stage = IRShaderStageDomain; + break; + case DXIL::ShaderKind::Mesh: + Stage = IRShaderStageMesh; + break; + case DXIL::ShaderKind::Amplification: + Stage = IRShaderStageAmplification; + break; + case DXIL::ShaderKind::Geometry: + Stage = IRShaderStageGeometry; + break; + case DXIL::ShaderKind::Compute: + Stage = IRShaderStageCompute; + break; + } + assert(Stage != IRShaderStageInvalid && + "Library targets not supported for Metal (yet)."); + IRObjectGetMetalLibBinary(AIR, Stage, MetalLib); + size_t MetalLibSize = IRMetalLibGetBytecodeSize(MetalLib); + std::unique_ptr MetalLibBytes = + std::unique_ptr(new uint8_t[MetalLibSize]); + IRMetalLibGetBytecode(MetalLib, MetalLibBytes.get()); + + // Store the metallib to custom format or disk, or use to create a + // MTLLibrary. + + CComPtr MetalBlob; + IFT(hlsl::DxcCreateBlobOnHeapCopy( + MetalLibBytes.get(), (uint32_t)MetalLibSize, &MetalBlob)); + std::swap(pOutputBlob, MetalBlob); + + IRMetalLibBinaryDestroy(MetalLib); + IRObjectDestroy(DXILObj); + IRObjectDestroy(AIR); + IRCompilerDestroy(MetalCompiler); + } +#endif + } // compileOK && !opts.CodeGenHighLevel } std::string remarks; From 4d3a2f5489fd9f438f13b2308e767a93882d4728 Mon Sep 17 00:00:00 2001 From: Chris B Date: Fri, 7 Mar 2025 17:28:14 -0600 Subject: [PATCH 16/88] [NFC] Improve time tracing data (#7146) This is a bunch of small changes to improve the quality of the time traces. This mostly adds new timers breakign down dxcompilerobj and the always inliner code. --- lib/DxilContainer/DxilContainerAssembler.cpp | 2 ++ lib/Transforms/Utils/CloneFunction.cpp | 6 +++++- lib/Transforms/Utils/InlineFunction.cpp | 16 +++++++++++----- tools/clang/lib/CodeGen/CodeGenModule.cpp | 6 ++++++ tools/clang/lib/Parse/ParseAST.cpp | 16 ++++++++++------ tools/clang/tools/dxcompiler/dxcompilerobj.cpp | 5 +++++ 6 files changed, 39 insertions(+), 12 deletions(-) diff --git a/lib/DxilContainer/DxilContainerAssembler.cpp b/lib/DxilContainer/DxilContainerAssembler.cpp index 0b7f5dd467..f0d7bf6d23 100644 --- a/lib/DxilContainer/DxilContainerAssembler.cpp +++ b/lib/DxilContainer/DxilContainerAssembler.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Transforms/Utils/Cloning.h" #include #include // Needed for DxilPipelineStateValidation.h @@ -1895,6 +1896,7 @@ void hlsl::SerializeDxilContainerForModule( DxilShaderHash *pShaderHashOut, AbstractMemoryStream *pReflectionStreamOut, AbstractMemoryStream *pRootSigStreamOut, void *pPrivateData, size_t PrivateDataSize) { + llvm::TimeTraceScope TimeScope("SerializeDxilContainer", StringRef("")); // TODO: add a flag to update the module and remove information that is not // part of DXIL proper and is used only to assemble the container. diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index f0d2dbcd7a..46294b3db8 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -29,7 +28,9 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include @@ -473,6 +474,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const char *NameSuffix, ClonedCodeInfo *CodeInfo, CloningDirector *Director) { + TimeTraceScope TimeScope("CloneAndPruneIntoFromInst", [&] { + return (Twine(OldFunc->getName()) + "->" + NewFunc->getName()).str(); + }); assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index f6a255a0e4..bfa4b61fbe 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -12,10 +12,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" @@ -24,13 +23,13 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -38,8 +37,10 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/TimeProfiler.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; @@ -291,6 +292,8 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, /// non-aliasing property communicated by the metadata could have /// call-site-specific control dependencies). static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { + TimeTraceScope TimeScope("CloneAliasScopeMetadata", + [&] { return CS.getCalledFunction()->getName(); }); const Function *CalledFunc = CS.getCalledFunction(); SetVector MD; @@ -401,6 +404,8 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, const DataLayout &DL, AliasAnalysis *AA) { + TimeTraceScope TimeScope("AddAliasScopeMetadata", + [&] { return CS.getCalledFunction()->getName(); }); if (!EnableNoAliasConversion) return; @@ -872,6 +877,7 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx, /// to encode location where these instructions are inlined. static void fixupLineNumbers(Function *Fn, Function::iterator FI, Instruction *TheCall) { + TimeTraceScope TimeScope("fixupLineNumbers", [&] { return Fn->getName(); }); DebugLoc TheCallDL = TheCall->getDebugLoc(); #if 0 // HLSL Change if (!TheCallDL) diff --git a/tools/clang/lib/CodeGen/CodeGenModule.cpp b/tools/clang/lib/CodeGen/CodeGenModule.cpp index 73ad296d47..b274ea9d64 100644 --- a/tools/clang/lib/CodeGen/CodeGenModule.cpp +++ b/tools/clang/lib/CodeGen/CodeGenModule.cpp @@ -3376,6 +3376,12 @@ void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) { /// EmitTopLevelDecl - Emit code for a single top level declaration. void CodeGenModule::EmitTopLevelDecl(Decl *D) { + llvm::TimeTraceScope TimeScope("CGM::EmitTopLevelDecl", [&] { + if (const auto *ND = dyn_cast(D)) + return ND->getName(); + return StringRef("Unnamed decl"); + }); + // Ignore dependent declarations. if (D->getDeclContext() && D->getDeclContext()->isDependentContext()) return; diff --git a/tools/clang/lib/Parse/ParseAST.cpp b/tools/clang/lib/Parse/ParseAST.cpp index e06a4ee09e..c8009b9b53 100644 --- a/tools/clang/lib/Parse/ParseAST.cpp +++ b/tools/clang/lib/Parse/ParseAST.cpp @@ -100,8 +100,6 @@ void clang::ParseAST(Preprocessor &PP, ASTConsumer *Consumer, void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) { - // HLSL Change - Support hierarchial time tracing. - llvm::TimeTraceScope TimeScope("Frontend", StringRef("")); // Collect global stats on Decls/Stmts (until we have a module streamer). if (PrintStats) { Decl::EnableStatistics(); @@ -137,6 +135,8 @@ void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) { External->StartTranslationUnit(Consumer); if (!S.getDiagnostics().hasUnrecoverableErrorOccurred()) { // HLSL Change: Skip if fatal error already occurred + // HLSL Change - Support hierarchial time tracing. + llvm::TimeTraceScope TimeScope("Frontend", StringRef("")); if (P.ParseTopLevelDecl(ADecl)) { if (!External && !S.getLangOpts().CPlusPlus) P.Diag(diag::ext_empty_translation_unit); @@ -151,10 +151,14 @@ void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) { } } // HLSL Change: Skip if fatal error already occurred - // Process any TopLevelDecls generated by #pragma weak. - for (Decl *D : S.WeakTopLevelDecls()) - Consumer->HandleTopLevelDecl(DeclGroupRef(D)); - + { + // HLSL Change - Support hierarchial time tracing. + llvm::TimeTraceScope TimeScope("Frontend - Consumer", StringRef("")); + // Process any TopLevelDecls generated by #pragma weak. + for (Decl *D : S.WeakTopLevelDecls()) + Consumer->HandleTopLevelDecl(DeclGroupRef(D)); + } + // HLSL Change Starts // Provide the opportunity to generate translation-unit level validation // errors in the front-end, without relying on code generation being diff --git a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp index a8f804bdca..ab66838b66 100644 --- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp +++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp @@ -722,6 +722,7 @@ class DxcCompiler : public IDxcCompiler3, bool validateRootSigContainer = false; if (isPreprocessing) { + TimeTraceScope TimeScope("PreprocessAction", StringRef("")); // These settings are back-compatible with fxc. clang::PreprocessorOutputOptions &PPOutOpts = compiler.getPreprocessorOutputOpts(); @@ -867,6 +868,7 @@ class DxcCompiler : public IDxcCompiler3, compiler.getTarget().adjust(compiler.getLangOpts()); if (opts.AstDump) { + TimeTraceScope TimeScope("DumpAST", StringRef("")); clang::ASTDumpAction dumpAction; // Consider - ASTDumpFilter, ASTDumpLookups compiler.getFrontendOpts().ASTDumpDecls = true; @@ -876,6 +878,7 @@ class DxcCompiler : public IDxcCompiler3, dumpAction.EndSourceFile(); outStream.flush(); } else if (opts.DumpDependencies) { + TimeTraceScope TimeScope("DumpDependencies", StringRef("")); auto dependencyCollector = std::make_shared(); compiler.addDependencyCollector(dependencyCollector); compiler.createPreprocessor(clang::TranslationUnitKind::TU_Complete); @@ -978,6 +981,7 @@ class DxcCompiler : public IDxcCompiler3, EmitBCAction action(&llvmContext); FrontendInputFile file(pUtf8SourceName, IK_HLSL); bool compileOK; + TimeTraceScope TimeScope("Compile Action", StringRef("")); if (action.BeginSourceFile(compiler, file)) { action.Execute(); action.EndSourceFile(); @@ -1032,6 +1036,7 @@ class DxcCompiler : public IDxcCompiler3, // Do not create a container when there is only a a high-level // representation in the module. if (compileOK && !opts.CodeGenHighLevel) { + TimeTraceScope TimeScope("AssembleAndWriteContainer", StringRef("")); HRESULT valHR = S_OK; CComPtr pRootSigStream; IFT(CreateMemoryStream(DxcGetThreadMallocNoRef(), From 50d1af5b645651b7ee4d4ef063bdc88c7d6790d4 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Mon, 10 Mar 2025 04:10:08 -0600 Subject: [PATCH 17/88] Respond to feedback remove some stale elements. Add some HLSL type helper functions and add some new ones. Make resource type retreiveals type-safe. Add some parameter comments and names to make clearer what the effect of them are. Pass resource attribute to cbuffer/tbuffer creation. Clean up and clarify error messages. Remove redundant type canonization from type queries. Correct resclass of tbuffers. Use multimatch utility of verify to condense checks --- include/dxc/DXIL/DxilConstants.h | 5 +++ lib/DXIL/DxilUtil.cpp | 40 +++++++++---------- tools/clang/include/clang/AST/HlslTypes.h | 2 +- tools/clang/include/clang/Basic/Attr.td | 17 +++++++- .../clang/Basic/DiagnosticSemaKinds.td | 6 +-- tools/clang/lib/AST/ASTContextHLSL.cpp | 15 ++++--- tools/clang/lib/AST/HlslTypes.cpp | 22 ++++------ tools/clang/lib/Sema/SemaDXR.cpp | 2 +- tools/clang/lib/Sema/SemaHLSL.cpp | 34 +++++++--------- tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp | 2 - .../hlsl/types/invalid-longvec-decls.hlsl | 28 ++++++------- .../clang/test/SemaHLSL/incomplete-type.hlsl | 16 ++------ 12 files changed, 90 insertions(+), 99 deletions(-) diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index ac894df1d6..b3c510a038 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -465,6 +465,11 @@ inline bool IsTBuffer(DXIL::ResourceKind ResourceKind) { return ResourceKind == DXIL::ResourceKind::TBuffer; } +inline bool IsCTBuffer(DXIL::ResourceKind ResourceKind) { + return ResourceKind == DXIL::ResourceKind::CBuffer || + ResourceKind == DXIL::ResourceKind::TBuffer; +} + /// Whether the resource kind is a FeedbackTexture. inline bool IsFeedbackTexture(DXIL::ResourceKind ResourceKind) { return ResourceKind == DXIL::ResourceKind::FeedbackTexture2D || diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp index 757a0bc3ee..065f19d7d0 100644 --- a/lib/DXIL/DxilUtil.cpp +++ b/lib/DXIL/DxilUtil.cpp @@ -427,34 +427,34 @@ GetHLSLResourceProperties(llvm::Type *Ty) { if (name == "SamplerComparisonState") return RetType( - true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Sampler, false, - false, /*cmp or counter*/ true)); + true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Sampler, /*UAV*/ false, + /*ROV*/ false, /*cmp or counter*/ true)); if (name.startswith("AppendStructuredBuffer<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::StructuredBuffer, - false, false, /*cmp or counter*/ true)); + /*UAV*/ true, /*ROV*/ false, /*cmp or counter*/ true)); if (name.startswith("ConsumeStructuredBuffer<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::StructuredBuffer, - false, false, /*cmp or counter*/ true)); + /*UAV*/ false, /*ROV*/ false, /*cmp or counter*/ true)); if (name == "RaytracingAccelerationStructure") return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::RTAccelerationStructure, - false, false, false)); + /*UAV*/ false, /*ROV*/ false, false)); if (name.startswith("ConstantBuffer<")) return RetType(true, MakeResourceProperties(hlsl::DXIL::ResourceKind::CBuffer, - false, false, false)); + /*UAV*/ false, /*ROV*/ false, false)); if (name.startswith("TextureBuffer<")) return RetType(true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TBuffer, - false, false, false)); + /*UAV*/ false, /*ROV*/ false, false)); if (ConsumePrefix(name, "FeedbackTexture2D")) { hlsl::DXIL::ResourceKind kind = hlsl::DXIL::ResourceKind::Invalid; @@ -464,7 +464,7 @@ GetHLSLResourceProperties(llvm::Type *Ty) { kind = hlsl::DXIL::ResourceKind::FeedbackTexture2D; if (name.startswith("<")) - return RetType(true, MakeResourceProperties(kind, false, false, false)); + return RetType(true, MakeResourceProperties(kind, /*UAV*/ false, /*ROV*/ false, /*Cmp*/ false)); return FalseRet; } @@ -475,63 +475,63 @@ GetHLSLResourceProperties(llvm::Type *Ty) { if (name == "ByteAddressBuffer") return RetType(true, MakeResourceProperties(hlsl::DXIL::ResourceKind::RawBuffer, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("Buffer<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TypedBuffer, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("StructuredBuffer<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::StructuredBuffer, UAV, - ROV, false)); + ROV, /*Cmp*/ false)); if (ConsumePrefix(name, "Texture")) { if (name.startswith("1D<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture1D, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("1DArray<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::Texture1DArray, UAV, - ROV, false)); + ROV, /*Cmp*/ false)); if (name.startswith("2D<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture2D, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("2DArray<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::Texture2DArray, UAV, - ROV, false)); + ROV, /*Cmp*/ false)); if (name.startswith("3D<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture3D, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("Cube<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TextureCube, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("CubeArray<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::TextureCubeArray, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("2DMS<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture2DMS, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("2DMSArray<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::Texture2DMSArray, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); return FalseRet; } } diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index 5cd14cbe8a..9fd09b6539 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -388,7 +388,7 @@ clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandleInDeclContext( llvm::StringRef typeName, llvm::StringRef templateParamName, clang::InheritableAttr *Attr = nullptr); clang::CXXRecordDecl *DeclareConstantBufferViewType(clang::ASTContext &context, - bool bTBuf); + clang::InheritableAttr *Attr); clang::CXXRecordDecl *DeclareRayQueryType(clang::ASTContext &context); clang::CXXRecordDecl *DeclareResourceType(clang::ASTContext &context, bool bSampler); diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index 29430e6d4c..bbc1263e20 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -960,16 +960,29 @@ def HLSLTessPatch : InheritableAttr { def HLSLStreamOutput : InheritableAttr { let Spellings = []; // No spellings! - let Args = [UnsignedArgument<"Vertices">]; + // PrimVertices are the number of vertices that make up the streamed primitive. + // Points have 1. Lines have 2. Triangles have 3. + let Args = [UnsignedArgument<"PrimVertices">]; let Subjects = SubjectList<[CXXRecord]>; let Documentation = [Undocumented]; } def HLSLResource : InheritableAttr { let Spellings = []; // No spellings! - let Args = [UnsignedArgument<"ResKind">, UnsignedArgument<"ResClass">]; + let Args = [UnsignedArgument<"ResKindUint">, UnsignedArgument<"ResClassUint">]; let Subjects = SubjectList<[CXXRecord]>; let Documentation = [Undocumented]; + + // Add enum typed getters for safety and brevity. + let AdditionalMembers = + [{ + hlsl::DXIL::ResourceKind getResKind() const { + return (hlsl::DXIL::ResourceKind)getResKindUint(); + } + hlsl::DXIL::ResourceClass getResClass() const { + return (hlsl::DXIL::ResourceClass)getResClassUint(); + } + }]; } def HLSLNodeLaunch : InheritableAttr { diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 4d81b25ccc..5f6b7effce 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7519,7 +7519,7 @@ def err_hlsl_half_load_store: Error< "LoadHalf and StoreHalf are not supported for min precision mode">; def err_hlsl_interfaces_cannot_inherit: Error< "interfaces cannot inherit from other types">; -def err_hlsl_invalid_range_1_plus: Error< +def err_hlsl_invalid_range_1_to_max: Error< "invalid value, valid range is between 1 and %0 inclusive">; def err_hlsl_matrix_member_bad_format: Error< "invalid format for matrix subscript '%0'">; @@ -7852,9 +7852,7 @@ def err_hlsl_load_from_mesh_out_arrays: Error< def err_hlsl_out_indices_array_incorrect_access: Error< "a vector in out indices array must be accessed as a whole">; def err_hlsl_unsupported_long_vector: Error< - "Vectors of over %0 elements in %1 are not supported">; -def err_hlsl_vector_too_long: Error< - "Vectors of over %0 elements in are not supported">; + "Vectors of over 4 elements in %0 are not supported">; def err_hlsl_logical_binop_scalar : Error< "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">; def err_hlsl_ternary_scalar : Error< diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index 9bacfc8b42..0a64772d11 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -1131,25 +1131,24 @@ CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandleInDeclContext( } clang::CXXRecordDecl * -hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, bool bTBuf) { +hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, InheritableAttr *Attr) { // Create ConstantBufferView template declaration in translation unit scope // like other resource. // template ConstantBuffer { int h; } DeclContext *DC = context.getTranslationUnitDecl(); + DXASSERT(Attr, "Constbuffer types require an attribute"); - BuiltinTypeDeclBuilder typeDeclBuilder( - DC, bTBuf ? "TextureBuffer" : "ConstantBuffer", - TagDecl::TagKind::TTK_Struct); + const char *TypeName = "ConstantBuffer"; + if (IsTBuffer(cast(Attr)->getResKind())) + TypeName = "TextureBuffer"; + BuiltinTypeDeclBuilder typeDeclBuilder(DC, TypeName, TagDecl::TagKind::TTK_Struct); (void)typeDeclBuilder.addTypeTemplateParam("T"); typeDeclBuilder.startDefinition(); CXXRecordDecl *templateRecordDecl = typeDeclBuilder.getRecordDecl(); typeDeclBuilder.addField( "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle. - - typeDeclBuilder.getRecordDecl()->addAttr(HLSLResourceAttr::CreateImplicit( - context, (unsigned)DXIL::ResourceKind::CBuffer, - (unsigned)DXIL::ResourceClass::CBuffer)); + typeDeclBuilder.getRecordDecl()->addAttr(Attr); typeDeclBuilder.getRecordDecl(); diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index e9c443b9d7..41175e3d37 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -477,37 +477,32 @@ clang::QualType GetHLSLMatElementType(clang::QualType type) { // TODO: Add type cache to ASTContext. bool IsHLSLInputPatchType(QualType type) { - type = type.getCanonicalType(); if (const HLSLTessPatchAttr *Attr = getAttr(type)) return Attr->getIsInput(); return false; } bool IsHLSLOutputPatchType(QualType type) { - type = type.getCanonicalType(); if (const HLSLTessPatchAttr *Attr = getAttr(type)) return !Attr->getIsInput(); return false; } bool IsHLSLPointStreamType(QualType type) { - type = type.getCanonicalType(); if (const HLSLStreamOutputAttr *Attr = getAttr(type)) - return Attr->getVertices() == 1; + return Attr->getPrimVertices() == (unsigned)DXIL::InputPrimitive::Point; return false; } bool IsHLSLLineStreamType(QualType type) { - type = type.getCanonicalType(); if (const HLSLStreamOutputAttr *Attr = getAttr(type)) - return Attr->getVertices() == 2; + return Attr->getPrimVertices() == (unsigned)DXIL::InputPrimitive::Line; return false; } bool IsHLSLTriangleStreamType(QualType type) { - type = type.getCanonicalType(); if (const HLSLStreamOutputAttr *Attr = getAttr(type)) - return Attr->getVertices() == 3; + return Attr->getPrimVertices() == (unsigned)DXIL::InputPrimitive::Triangle; return false; } @@ -558,13 +553,13 @@ bool IsHLSLNodeType(clang::QualType type) { bool IsHLSLObjectWithImplicitMemberAccess(clang::QualType type) { if (const HLSLResourceAttr *Attr = getAttr(type)) - return Attr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer; + return DXIL::IsCTBuffer(Attr->getResKind()); return false; } bool IsHLSLObjectWithImplicitROMemberAccess(clang::QualType type) { if (const HLSLResourceAttr *Attr = getAttr(type)) - return Attr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer; + return DXIL::IsCTBuffer(Attr->getResKind()); return false; } @@ -592,7 +587,7 @@ bool IsHLSLNodeOutputType(clang::QualType type) { bool IsHLSLStructuredBufferType(clang::QualType type) { if (const HLSLResourceAttr *Attr = getAttr(type)) - return Attr->getResKind() == (unsigned)DXIL::ResourceKind::StructuredBuffer; + return Attr->getResKind() == DXIL::ResourceKind::StructuredBuffer; return false; } @@ -799,10 +794,7 @@ QualType GetHLSLResourceResultType(QualType type) { dyn_cast(RD)) { const HLSLResourceAttr *Attr = getAttr(type); - if (Attr && (Attr->getResKind() == - (unsigned)DXIL::ResourceKind::FeedbackTexture2D || - Attr->getResKind() == - (unsigned)DXIL::ResourceKind::FeedbackTexture2DArray)) { + if (Attr && DXIL::IsFeedbackTexture(Attr->getResKind())) { // Feedback textures are write-only and the data is opaque, // so there is no result type per se. return {}; diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index d71dc2be4c..73ea9dd93c 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -812,7 +812,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, if (ContainsLongVector(Payload->getType())) { S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector) - << DXIL::kDefaultMaxVectorLength << "payload parameters"; + << "payload parameters"; return; } diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 8abad632a2..a7d38dc1a6 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -942,11 +942,6 @@ GetOrCreateVectorSpecialization(ASTContext &context, Sema *sema, "otherwise vector handle cannot be looked up"); #endif - // I don't think this is necessary. - CXXRecordDecl *Decl = vectorSpecializationType->getAsCXXRecordDecl(); - if (GetHLSLVecSize(vectorSpecializationType) > DXIL::kDefaultMaxVectorLength) - Decl->setHasHLSLLongVector(); - return vectorSpecializationType; } @@ -3610,9 +3605,9 @@ class HLSLExternalSource : public ExternalSemaSource { break; } } else if (kind == AR_OBJECT_CONSTANT_BUFFER) { - recordDecl = DeclareConstantBufferViewType(*m_context, /*bTBuf*/ false); + recordDecl = DeclareConstantBufferViewType(*m_context, Attr); } else if (kind == AR_OBJECT_TEXTURE_BUFFER) { - recordDecl = DeclareConstantBufferViewType(*m_context, /*bTBuf*/ true); + recordDecl = DeclareConstantBufferViewType(*m_context, Attr); } else if (kind == AR_OBJECT_RAY_QUERY) { recordDecl = DeclareRayQueryType(*m_context); } else if (kind == AR_OBJECT_HEAP_RESOURCE) { @@ -4760,7 +4755,7 @@ class HLSLExternalSource : public ExternalSemaSource { return true; case AR_OBJECT_TEXTURE_BUFFER: ResKind = DXIL::ResourceKind::TBuffer; - ResClass = DXIL::ResourceClass::CBuffer; + ResClass = DXIL::ResourceClass::SRV; return true; case AR_OBJECT_FEEDBACKTEXTURE2D: ResKind = DXIL::ResourceKind::FeedbackTexture2D; @@ -5219,7 +5214,7 @@ class HLSLExternalSource : public ExternalSemaSource { MaxLength = m_sema->getLangOpts().MaxHLSLVectorLength; if (!sintValue.isStrictlyPositive() || sintValue.getLimitedValue() > MaxLength) { - m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_plus) << MaxLength; + m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_to_max) << MaxLength; return true; } @@ -5245,7 +5240,7 @@ class HLSLExternalSource : public ExternalSemaSource { HLSLResourceAttr *ResAttr = Template->getTemplatedDecl()->getAttr(); if (ResAttr && - ResAttr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer) { + DXIL::IsCTBuffer(ResAttr->getResKind())) { if (TemplateArgList.size() == 1) { const TemplateArgumentLoc &argLoc = TemplateArgList[0]; const TemplateArgument &arg = argLoc.getArgument(); @@ -5265,7 +5260,7 @@ class HLSLExternalSource : public ExternalSemaSource { if (ContainsLongVector(argType)) { m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector) - << DXIL::kDefaultMaxVectorLength << "cbuffers"; + << "ConstantBuffers or TextureBuffers"; return true; } } @@ -5340,7 +5335,7 @@ class HLSLExternalSource : public ExternalSemaSource { if (ContainsLongVector(arg.getAsType())) { m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) - << DXIL::kDefaultMaxVectorLength << "tessellation patches"; + << "tessellation patches"; return true; } } else if (Template->getTemplatedDecl()->hasAttr()) { @@ -5358,7 +5353,7 @@ class HLSLExternalSource : public ExternalSemaSource { if (ContainsLongVector(arg.getAsType())) { m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) - << DXIL::kDefaultMaxVectorLength << "geometry streams"; + << "geometry streams"; return true; } } @@ -5382,7 +5377,7 @@ class HLSLExternalSource : public ExternalSemaSource { // NOTE: IsValidTemplateArgumentType emits its own diagnostics return true; } - if (ResAttr && IsTyped((DXIL::ResourceKind)ResAttr->getResKind())) { + if (ResAttr && IsTyped(ResAttr->getResKind())) { // Check vectors for being too large. if (IsVectorType(m_sema, argType)) { unsigned NumElt = hlsl::GetElementCount(argType); @@ -11626,7 +11621,7 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc, case AR_TOBJ_VECTOR: if (GetHLSLVecSize(ArgTy) > DXIL::kDefaultMaxVectorLength) { self->Diag(ArgLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) - << DXIL::kDefaultMaxVectorLength << "node records"; + << "node records"; Empty = false; return false; } @@ -14750,7 +14745,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, RequireCompleteType(D.getLocStart(), qt, SD); if (ContainsLongVector(qt)) { Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) - << DXIL::kDefaultMaxVectorLength << "cbuffers"; + << "cbuffers or tbuffers"; result = false; } } @@ -15647,7 +15642,7 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) { static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) { if (ContainsLongVector(Arg->getType())) { S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector) - << DXIL::kDefaultMaxVectorLength << "user-defined struct parameter"; + << "user-defined struct parameter"; return true; } return false; @@ -16385,15 +16380,16 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) { // Check general parameter characteristics // Would be nice to check for resources here as they crash the compiler now. + // See issue #7186. for (const auto *param : FD->params()) { if (ContainsLongVector(param->getType())) S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) - << DXIL::kDefaultMaxVectorLength << "entry function parameters"; + << "entry function parameters"; } if (ContainsLongVector(FD->getReturnType())) S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector) - << DXIL::kDefaultMaxVectorLength << "entry function return type"; + << "entry function return type"; DXIL::ShaderKind Stage = ShaderModel::KindFromFullName(shaderAttr->getStage()); diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index 2275c48114..a11f72b306 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -524,13 +524,11 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { if (ContainsLongVector(param->getType())) self->Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) - << DXIL::kDefaultMaxVectorLength << "patch constant function parameters"; if (ContainsLongVector(pPatchFnDecl->getReturnType())) self->Diag(pPatchFnDecl->getLocation(), diag::err_hlsl_unsupported_long_vector) - << DXIL::kDefaultMaxVectorLength << "patch constant function return type"; } diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl index 2d0f800121..142eb59f87 100644 --- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl @@ -27,27 +27,27 @@ struct LongVecTpl { vector vec; }; -vector global_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} -vector global_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} -TYPE global_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} -TYPE global_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} +vector global_vec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} +vector global_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} +TYPE global_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} +TYPE global_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} cbuffer BadBuffy { - vector cb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} - vector cb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} - TYPE cb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} - TYPE cb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + vector cb_vec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} + vector cb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE cb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE cb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} }; tbuffer BadTuffy { - vector tb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} - vector tb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} - TYPE tb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} - TYPE tb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} + vector tb_vec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} + vector tb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE tb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE tb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} }; -ConstantBuffer< TYPE > const_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} -TextureBuffer< TYPE > tex_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}} +ConstantBuffer< TYPE > const_buf; // expected-error{{Vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}} +TextureBuffer< TYPE > tex_buf; // expected-error{{Vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}} [shader("pixel")] vector main( // expected-error{{Vectors of over 4 elements in entry function return type are not supported}} diff --git a/tools/clang/test/SemaHLSL/incomplete-type.hlsl b/tools/clang/test/SemaHLSL/incomplete-type.hlsl index a2856f448e..b0d4f1da7f 100644 --- a/tools/clang/test/SemaHLSL/incomplete-type.hlsl +++ b/tools/clang/test/SemaHLSL/incomplete-type.hlsl @@ -3,19 +3,9 @@ // Tests that the compiler is well-behaved with regard to uses of incomplete types. // Regression test for GitHub #2058, which crashed in this case. -// expected-note@+8 {{forward declaration of 'S'}} expected-note@+8 {{forward declaration of 'S'}} expected-note@+8 {{forward declaration of 'S'}} -// expected-note@+7 {{forward declaration of 'S'}} expected-note@+7 {{forward declaration of 'S'}} expected-note@+7 {{forward declaration of 'S'}} -// expected-note@+6 {{forward declaration of 'S'}} expected-note@+6 {{forward declaration of 'S'}} expected-note@+6 {{forward declaration of 'S'}} -// expected-note@+5 {{forward declaration of 'S'}} expected-note@+5 {{forward declaration of 'S'}} expected-note@+5 {{forward declaration of 'S'}} -// expected-note@+4 {{forward declaration of 'S'}} expected-note@+4 {{forward declaration of 'S'}} expected-note@+4 {{forward declaration of 'S'}} -// expected-note@+3 {{forward declaration of 'S'}} expected-note@+3 {{forward declaration of 'S'}} expected-note@+3 {{forward declaration of 'S'}} -// expected-note@+2 {{forward declaration of 'S'}} expected-note@+2 {{forward declaration of 'S'}} expected-note@+2 {{forward declaration of 'S'}} -// expected-note@+1 {{forward declaration of 'S'}} expected-note@+1 {{forward declaration of 'S'}} expected-note@+1 {{forward declaration of 'S'}} -struct S; - -// expected-note@+2 {{template is declared here}} -// expected-note@+1 {{template is declared here}} expected-note@+1 {{template is declared here}} expected-note@+1 {{template is declared here}} -template struct T; + +struct S; // expected-note 24 {{forward declaration of 'S'}} +template struct T; // expected-note 4 {{template is declared here}} ConstantBuffer CB; // expected-error {{variable has incomplete type 'S'}} ConstantBuffer > TB; // expected-error {{implicit instantiation of undefined template 'T<1>'}} From eedab25273063edd04740d96174dcb8d799c44d7 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Mon, 10 Mar 2025 04:16:43 -0600 Subject: [PATCH 18/88] clang-format --- lib/DXIL/DxilUtil.cpp | 32 +++++++++++-------- tools/clang/include/clang/AST/HlslTypes.h | 5 +-- tools/clang/include/clang/Basic/Attr.td | 10 +++--- .../clang/Basic/DiagnosticSemaKinds.td | 8 ++--- tools/clang/lib/AST/ASTContextHLSL.cpp | 6 ++-- tools/clang/lib/Sema/SemaHLSL.cpp | 3 +- 6 files changed, 35 insertions(+), 29 deletions(-) diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp index 065f19d7d0..f6ffd7f7e2 100644 --- a/lib/DXIL/DxilUtil.cpp +++ b/lib/DXIL/DxilUtil.cpp @@ -426,19 +426,21 @@ GetHLSLResourceProperties(llvm::Type *Ty) { false, false, false)); if (name == "SamplerComparisonState") - return RetType( - true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Sampler, /*UAV*/ false, - /*ROV*/ false, /*cmp or counter*/ true)); + return RetType(true, MakeResourceProperties( + hlsl::DXIL::ResourceKind::Sampler, /*UAV*/ false, + /*ROV*/ false, /*cmp or counter*/ true)); if (name.startswith("AppendStructuredBuffer<")) - return RetType(true, MakeResourceProperties( - hlsl::DXIL::ResourceKind::StructuredBuffer, - /*UAV*/ true, /*ROV*/ false, /*cmp or counter*/ true)); + return RetType(true, + MakeResourceProperties( + hlsl::DXIL::ResourceKind::StructuredBuffer, + /*UAV*/ true, /*ROV*/ false, /*cmp or counter*/ true)); if (name.startswith("ConsumeStructuredBuffer<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::StructuredBuffer, - /*UAV*/ false, /*ROV*/ false, /*cmp or counter*/ true)); + /*UAV*/ false, /*ROV*/ false, + /*cmp or counter*/ true)); if (name == "RaytracingAccelerationStructure") return RetType(true, @@ -447,14 +449,14 @@ GetHLSLResourceProperties(llvm::Type *Ty) { /*UAV*/ false, /*ROV*/ false, false)); if (name.startswith("ConstantBuffer<")) - return RetType(true, - MakeResourceProperties(hlsl::DXIL::ResourceKind::CBuffer, - /*UAV*/ false, /*ROV*/ false, false)); + return RetType( + true, MakeResourceProperties(hlsl::DXIL::ResourceKind::CBuffer, + /*UAV*/ false, /*ROV*/ false, false)); if (name.startswith("TextureBuffer<")) - return RetType(true, - MakeResourceProperties(hlsl::DXIL::ResourceKind::TBuffer, - /*UAV*/ false, /*ROV*/ false, false)); + return RetType( + true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TBuffer, + /*UAV*/ false, /*ROV*/ false, false)); if (ConsumePrefix(name, "FeedbackTexture2D")) { hlsl::DXIL::ResourceKind kind = hlsl::DXIL::ResourceKind::Invalid; @@ -464,7 +466,9 @@ GetHLSLResourceProperties(llvm::Type *Ty) { kind = hlsl::DXIL::ResourceKind::FeedbackTexture2D; if (name.startswith("<")) - return RetType(true, MakeResourceProperties(kind, /*UAV*/ false, /*ROV*/ false, /*Cmp*/ false)); + return RetType(true, + MakeResourceProperties(kind, /*UAV*/ false, + /*ROV*/ false, /*Cmp*/ false)); return FalseRet; } diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index 9fd09b6539..e6a50de8fb 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -387,8 +387,9 @@ clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandleInDeclContext( clang::ASTContext &context, clang::DeclContext *declContext, llvm::StringRef typeName, llvm::StringRef templateParamName, clang::InheritableAttr *Attr = nullptr); -clang::CXXRecordDecl *DeclareConstantBufferViewType(clang::ASTContext &context, - clang::InheritableAttr *Attr); +clang::CXXRecordDecl * +DeclareConstantBufferViewType(clang::ASTContext &context, + clang::InheritableAttr *Attr); clang::CXXRecordDecl *DeclareRayQueryType(clang::ASTContext &context); clang::CXXRecordDecl *DeclareResourceType(clang::ASTContext &context, bool bSampler); diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index bbc1263e20..9e48df51fd 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -960,8 +960,8 @@ def HLSLTessPatch : InheritableAttr { def HLSLStreamOutput : InheritableAttr { let Spellings = []; // No spellings! - // PrimVertices are the number of vertices that make up the streamed primitive. - // Points have 1. Lines have 2. Triangles have 3. + // PrimVertices are the number of vertices that make up the streamed + // primitive. Points have 1. Lines have 2. Triangles have 3. let Args = [UnsignedArgument<"PrimVertices">]; let Subjects = SubjectList<[CXXRecord]>; let Documentation = [Undocumented]; @@ -969,13 +969,13 @@ def HLSLStreamOutput : InheritableAttr { def HLSLResource : InheritableAttr { let Spellings = []; // No spellings! - let Args = [UnsignedArgument<"ResKindUint">, UnsignedArgument<"ResClassUint">]; + let Args = [UnsignedArgument<"ResKindUint">, + UnsignedArgument<"ResClassUint">]; let Subjects = SubjectList<[CXXRecord]>; let Documentation = [Undocumented]; // Add enum typed getters for safety and brevity. - let AdditionalMembers = - [{ + let AdditionalMembers = [{ hlsl::DXIL::ResourceKind getResKind() const { return (hlsl::DXIL::ResourceKind)getResKindUint(); } diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 5f6b7effce..64eebfeb0e 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7519,8 +7519,8 @@ def err_hlsl_half_load_store: Error< "LoadHalf and StoreHalf are not supported for min precision mode">; def err_hlsl_interfaces_cannot_inherit: Error< "interfaces cannot inherit from other types">; -def err_hlsl_invalid_range_1_to_max: Error< - "invalid value, valid range is between 1 and %0 inclusive">; +def err_hlsl_invalid_range_1_to_max + : Error<"invalid value, valid range is between 1 and %0 inclusive">; def err_hlsl_matrix_member_bad_format: Error< "invalid format for matrix subscript '%0'">; def err_hlsl_matrix_member_empty: Error< @@ -7851,8 +7851,8 @@ def err_hlsl_load_from_mesh_out_arrays: Error< "output arrays of a mesh shader can not be read from">; def err_hlsl_out_indices_array_incorrect_access: Error< "a vector in out indices array must be accessed as a whole">; -def err_hlsl_unsupported_long_vector: Error< - "Vectors of over 4 elements in %0 are not supported">; +def err_hlsl_unsupported_long_vector + : Error<"Vectors of over 4 elements in %0 are not supported">; def err_hlsl_logical_binop_scalar : Error< "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">; def err_hlsl_ternary_scalar : Error< diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index 0a64772d11..870d032d39 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -1131,7 +1131,8 @@ CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandleInDeclContext( } clang::CXXRecordDecl * -hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, InheritableAttr *Attr) { +hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, + InheritableAttr *Attr) { // Create ConstantBufferView template declaration in translation unit scope // like other resource. // template ConstantBuffer { int h; } @@ -1141,7 +1142,8 @@ hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, InheritableAttr const char *TypeName = "ConstantBuffer"; if (IsTBuffer(cast(Attr)->getResKind())) TypeName = "TextureBuffer"; - BuiltinTypeDeclBuilder typeDeclBuilder(DC, TypeName, TagDecl::TagKind::TTK_Struct); + BuiltinTypeDeclBuilder typeDeclBuilder(DC, TypeName, + TagDecl::TagKind::TTK_Struct); (void)typeDeclBuilder.addTypeTemplateParam("T"); typeDeclBuilder.startDefinition(); CXXRecordDecl *templateRecordDecl = typeDeclBuilder.getRecordDecl(); diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index a7d38dc1a6..2de7004532 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -5239,8 +5239,7 @@ class HLSLExternalSource : public ExternalSemaSource { // Allow object type for Constant/TextureBuffer. HLSLResourceAttr *ResAttr = Template->getTemplatedDecl()->getAttr(); - if (ResAttr && - DXIL::IsCTBuffer(ResAttr->getResKind())) { + if (ResAttr && DXIL::IsCTBuffer(ResAttr->getResKind())) { if (TemplateArgList.size() == 1) { const TemplateArgumentLoc &argLoc = TemplateArgList[0]; const TemplateArgument &arg = argLoc.getArgument(); From e9cf3d2b9693ca997b579c1fc1b5ab5af3df7c29 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Mon, 10 Mar 2025 04:49:24 -0600 Subject: [PATCH 19/88] Respond to feedback from a different PR --- .../clang/Basic/DiagnosticSemaKinds.td | 2 +- .../hlsl/types/invalid-longvec-decls-hs.hlsl | 6 +- .../hlsl/types/invalid-longvec-decls.hlsl | 98 +++++++++---------- 3 files changed, 53 insertions(+), 53 deletions(-) diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 64eebfeb0e..9be040b8a0 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7852,7 +7852,7 @@ def err_hlsl_load_from_mesh_out_arrays: Error< def err_hlsl_out_indices_array_incorrect_access: Error< "a vector in out indices array must be accessed as a whole">; def err_hlsl_unsupported_long_vector - : Error<"Vectors of over 4 elements in %0 are not supported">; + : Error<"vectors of over 4 elements in %0 are not supported">; def err_hlsl_logical_binop_scalar : Error< "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">; def err_hlsl_ternary_scalar : Error< diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl index 185233ad0f..1625454360 100644 --- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl @@ -10,9 +10,9 @@ struct LongVec { vector vec; }; -HsConstantData PatchConstantFunction( // expected-error{{Vectors of over 4 elements in patch constant function return type are not supported}} - vector vec : V, // expected-error{{Vectors of over 4 elements in patch constant function parameters are not supported}} - LongVec lv : L) { // expected-error{{Vectors of over 4 elements in patch constant function parameters are not supported}} +HsConstantData PatchConstantFunction( // expected-error{{vectors of over 4 elements in patch constant function return type are not supported}} + vector vec : V, // expected-error{{vectors of over 4 elements in patch constant function parameters are not supported}} + LongVec lv : L) { // expected-error{{vectors of over 4 elements in patch constant function parameters are not supported}} return (HsConstantData)0; } diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl index 142eb59f87..0604feeaec 100644 --- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl @@ -27,37 +27,37 @@ struct LongVecTpl { vector vec; }; -vector global_vec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} -vector global_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} -TYPE global_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} -TYPE global_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} +vector global_vec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} +vector global_vec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} +TYPE global_vec_rec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} +TYPE global_vec_rec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} cbuffer BadBuffy { - vector cb_vec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} - vector cb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} - TYPE cb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} - TYPE cb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} + vector cb_vec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + vector cb_vec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE cb_vec_rec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE cb_vec_rec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} }; tbuffer BadTuffy { - vector tb_vec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} - vector tb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} - TYPE tb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} - TYPE tb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}} + vector tb_vec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + vector tb_vec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE tb_vec_rec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE tb_vec_rec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} }; -ConstantBuffer< TYPE > const_buf; // expected-error{{Vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}} -TextureBuffer< TYPE > tex_buf; // expected-error{{Vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}} +ConstantBuffer< TYPE > const_buf; // expected-error{{vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}} +TextureBuffer< TYPE > tex_buf; // expected-error{{vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}} [shader("pixel")] -vector main( // expected-error{{Vectors of over 4 elements in entry function return type are not supported}} - vector vec : V) : SV_Target { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} +vector main( // expected-error{{vectors of over 4 elements in entry function return type are not supported}} + vector vec : V) : SV_Target { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} return vec; } [shader("vertex")] -TYPE vs_main( // expected-error{{Vectors of over 4 elements in entry function return type are not supported}} - TYPE parm : P) : SV_Target { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} +TYPE vs_main( // expected-error{{vectors of over 4 elements in entry function return type are not supported}} + TYPE parm : P) : SV_Target { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} parm.f = 0; return parm; } @@ -65,33 +65,33 @@ TYPE vs_main( // expected-error{{Vectors of over 4 elements in entry function re [shader("geometry")] [maxvertexcount(3)] -void gs_point(line TYPE e, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} - inout PointStream OutputStream0) {} // expected-error{{Vectors of over 4 elements in geometry streams are not supported}} +void gs_point(line TYPE e, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + inout PointStream OutputStream0) {} // expected-error{{vectors of over 4 elements in geometry streams are not supported}} [shader("geometry")] [maxvertexcount(12)] -void gs_line(line TYPE a, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} - inout LineStream OutputStream0) {} // expected-error{{Vectors of over 4 elements in geometry streams are not supported}} +void gs_line(line TYPE a, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + inout LineStream OutputStream0) {} // expected-error{{vectors of over 4 elements in geometry streams are not supported}} [shader("geometry")] [maxvertexcount(12)] -void gs_line(line TYPE a, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} - inout TriangleStream OutputStream0) {} // expected-error{{Vectors of over 4 elements in geometry streams are not supported}} +void gs_line(line TYPE a, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + inout TriangleStream OutputStream0) {} // expected-error{{vectors of over 4 elements in geometry streams are not supported}} [shader("domain")] [domain("tri")] -void ds_main(OutputPatch TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} +void ds_main(OutputPatch TrianglePatch) {} // expected-error{{vectors of over 4 elements in tessellation patches are not supported}} -void patch_const(InputPatch inpatch, // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} - OutputPatch outpatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} +void patch_const(InputPatch inpatch, // expected-error{{vectors of over 4 elements in tessellation patches are not supported}} + OutputPatch outpatch) {} // expected-error{{vectors of over 4 elements in tessellation patches are not supported}} [shader("hull")] [domain("tri")] [outputtopology("triangle_cw")] [outputcontrolpoints(32)] [patchconstantfunc("patch_const")] -void hs_main(InputPatch TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}} +void hs_main(InputPatch TrianglePatch) {} // expected-error{{vectors of over 4 elements in tessellation patches are not supported}} RaytracingAccelerationStructure RTAS; @@ -116,42 +116,42 @@ struct [raypayload] DXRLongVecTpl { void raygen() { RTTYPE p = (RTTYPE)0; RayDesc ray = (RayDesc)0; - TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} - CallShader(0, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} + TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} + CallShader(0, p); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} } [shader("closesthit")] -void closesthit(inout RTTYPE payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} - in RTTYPE attribs ) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} +void closesthit(inout RTTYPE payload, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + in RTTYPE attribs ) { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} RayDesc ray; - TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} - CallShader(0, payload); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} + CallShader(0, payload); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} } [shader("anyhit")] -void AnyHit( inout RTTYPE payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} - in RTTYPE attribs ) // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} +void AnyHit( inout RTTYPE payload, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + in RTTYPE attribs ) // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} { } [shader("miss")] -void Miss(inout RTTYPE payload){ // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} +void Miss(inout RTTYPE payload){ // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} RayDesc ray; - TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} - CallShader(0, payload); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} + CallShader(0, payload); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} } [shader("intersection")] void Intersection() { float hitT = RayTCurrent(); RTTYPE attr = (RTTYPE)0; - bool bReported = ReportHit(hitT, 0, attr); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} + bool bReported = ReportHit(hitT, 0, attr); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} } [shader("callable")] -void callable1(inout RTTYPE p) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}} - CallShader(0, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} +void callable1(inout RTTYPE p) { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + CallShader(0, p); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} } groupshared LongVec as_pld; @@ -159,7 +159,7 @@ groupshared LongVec as_pld; [shader("amplification")] [numthreads(1,1,1)] void Amp() { - DispatchMesh(1,1,1,as_pld); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}} + DispatchMesh(1,1,1,as_pld); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} } struct NodeLongVec { @@ -183,18 +183,18 @@ struct NodeLongVecTpl { [NodeLaunch("broadcasting")] [NumThreads(8,1,1)] [NodeMaxDispatchGrid(8,1,1)] -void broadcast(DispatchNodeInputRecord input, // expected-error{{Vectors of over 4 elements in node records are not supported}} - NodeOutput output) // expected-error{{Vectors of over 4 elements in node records are not supported}} +void broadcast(DispatchNodeInputRecord input, // expected-error{{vectors of over 4 elements in node records are not supported}} + NodeOutput output) // expected-error{{vectors of over 4 elements in node records are not supported}} { - ThreadNodeOutputRecords touts; // expected-error{{Vectors of over 4 elements in node records are not supported}} - GroupNodeOutputRecords gouts; // expected-error{{Vectors of over 4 elements in node records are not supported}} + ThreadNodeOutputRecords touts; // expected-error{{vectors of over 4 elements in node records are not supported}} + GroupNodeOutputRecords gouts; // expected-error{{vectors of over 4 elements in node records are not supported}} } [Shader("node")] [NodeLaunch("coalescing")] [NumThreads(8,1,1)] -void coalesce(GroupNodeInputRecords input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}} +void coalesce(GroupNodeInputRecords input) {} // expected-error{{vectors of over 4 elements in node records are not supported}} [Shader("node")] [NodeLaunch("thread")] -void threader(ThreadNodeInputRecord input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}} +void threader(ThreadNodeInputRecord input) {} // expected-error{{vectors of over 4 elements in node records are not supported}} From cc0ddc23b5a8b6bc16329871b3c91900676090dd Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Mon, 10 Mar 2025 05:04:29 -0600 Subject: [PATCH 20/88] Rename long vector check func again --- tools/clang/include/clang/Sema/SemaHLSL.h | 2 +- tools/clang/lib/Sema/SemaDXR.cpp | 2 +- tools/clang/lib/Sema/SemaHLSL.cpp | 16 ++++++++-------- tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h index 7e7400d390..d6103b55e6 100644 --- a/tools/clang/include/clang/Sema/SemaHLSL.h +++ b/tools/clang/include/clang/Sema/SemaHLSL.h @@ -128,7 +128,7 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema, const clang::InitListExpr *InitList, const clang::QualType EltTy); -bool ContainsLongVector(clang::QualType qt); +bool containsLongVector(clang::QualType qt); bool IsConversionToLessOrEqualElements(clang::Sema *self, const clang::ExprResult &sourceExpr, diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index 73ea9dd93c..c3dfdb7c9f 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -810,7 +810,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, return; } - if (ContainsLongVector(Payload->getType())) { + if (containsLongVector(Payload->getType())) { S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector) << "payload parameters"; return; diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 2de7004532..dffa680a35 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -5257,7 +5257,7 @@ class HLSLExternalSource : public ExternalSemaSource { m_sema->RequireCompleteType(argSrcLoc, argType, diag::err_typecheck_decl_incomplete_type); - if (ContainsLongVector(argType)) { + if (containsLongVector(argType)) { m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector) << "ConstantBuffers or TextureBuffers"; return true; @@ -5331,7 +5331,7 @@ class HLSLExternalSource : public ExternalSemaSource { CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); if (Decl && !Decl->isCompleteDefinition()) return true; - if (ContainsLongVector(arg.getAsType())) { + if (containsLongVector(arg.getAsType())) { m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) << "tessellation patches"; @@ -5349,7 +5349,7 @@ class HLSLExternalSource : public ExternalSemaSource { CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); if (Decl && !Decl->isCompleteDefinition()) return true; - if (ContainsLongVector(arg.getAsType())) { + if (containsLongVector(arg.getAsType())) { m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) << "geometry streams"; @@ -12090,7 +12090,7 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType, return false; } -bool hlsl::ContainsLongVector(QualType qt) { +bool hlsl::containsLongVector(QualType qt) { if (qt.isNull() || qt->isDependentType()) return false; @@ -14742,7 +14742,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) {} } SD; RequireCompleteType(D.getLocStart(), qt, SD); - if (ContainsLongVector(qt)) { + if (containsLongVector(qt)) { Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) << "cbuffers or tbuffers"; result = false; @@ -15639,7 +15639,7 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) { // Verify that user-defined intrinsic struct args contain no long vectors static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) { - if (ContainsLongVector(Arg->getType())) { + if (containsLongVector(Arg->getType())) { S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector) << "user-defined struct parameter"; return true; @@ -16381,12 +16381,12 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) { // Would be nice to check for resources here as they crash the compiler now. // See issue #7186. for (const auto *param : FD->params()) { - if (ContainsLongVector(param->getType())) + if (containsLongVector(param->getType())) S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) << "entry function parameters"; } - if (ContainsLongVector(FD->getReturnType())) + if (containsLongVector(FD->getReturnType())) S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector) << "entry function return type"; diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index a11f72b306..11bb4c4f2f 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -521,12 +521,12 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { } } for (const auto *param : pPatchFnDecl->params()) - if (ContainsLongVector(param->getType())) + if (containsLongVector(param->getType())) self->Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) << "patch constant function parameters"; - if (ContainsLongVector(pPatchFnDecl->getReturnType())) + if (containsLongVector(pPatchFnDecl->getReturnType())) self->Diag(pPatchFnDecl->getLocation(), diag::err_hlsl_unsupported_long_vector) << "patch constant function return type"; From 66e7d23937c1608576942e73ad4bf97eeb2185fb Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Mon, 10 Mar 2025 13:08:58 -0600 Subject: [PATCH 21/88] Respond to feedback Correct UAVness of consume/append buffers. Add HLSL notes for changes to DeclCXX Share more code in IsHLSLVecMatType --- lib/DXIL/DxilUtil.cpp | 2 +- tools/clang/lib/AST/DeclCXX.cpp | 7 ++++++- tools/clang/lib/AST/HlslTypes.cpp | 13 +------------ tools/clang/lib/Sema/SemaHLSL.cpp | 6 ++---- 4 files changed, 10 insertions(+), 18 deletions(-) diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp index f6ffd7f7e2..865fad487c 100644 --- a/lib/DXIL/DxilUtil.cpp +++ b/lib/DXIL/DxilUtil.cpp @@ -439,7 +439,7 @@ GetHLSLResourceProperties(llvm::Type *Ty) { if (name.startswith("ConsumeStructuredBuffer<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::StructuredBuffer, - /*UAV*/ false, /*ROV*/ false, + /*UAV*/ true, /*ROV*/ false, /*cmp or counter*/ true)); if (name == "RaytracingAccelerationStructure") diff --git a/tools/clang/lib/AST/DeclCXX.cpp b/tools/clang/lib/AST/DeclCXX.cpp index 5f8c186919..baed44667f 100644 --- a/tools/clang/lib/AST/DeclCXX.cpp +++ b/tools/clang/lib/AST/DeclCXX.cpp @@ -48,6 +48,7 @@ void LazyASTUnresolvedSet::getFromExternalSource(ASTContext &C) const { } CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D) + // HLSL Change Begin - Add HasLongVector and clang-format : UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0), Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false), Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true), @@ -73,6 +74,7 @@ CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D) HasDeclaredCopyAssignmentWithConstParam(false), IsLambda(false), IsParsingBaseSpecifiers(false), HasHLSLLongVector(false), NumBases(0), NumVBases(0), Bases(), VBases(), Definition(D), FirstFriend() {} +// HLSL Change End - Add HasLongVector and clang-format CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const { return Bases.get(Definition->getASTContext().getExternalSource()); @@ -201,9 +203,10 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases, if (!BaseClassDecl->isStandardLayout()) data().IsStandardLayout = false; - // Propagate presence of long vector to child classes. + // HLSL Change Begin - Propagate presence of long vector to child classes. if (BaseClassDecl->hasHLSLLongVector()) data().HasHLSLLongVector = true; + // HLSL Change End // Record if this base is the first non-literal field or base. if (!hasNonLiteralTypeFieldsOrBases() && !BaseType->isLiteralType(C)) @@ -387,8 +390,10 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) { data().NeedOverloadResolutionForDestructor = true; } + // HLSL Change Begin - Propagate presence of long vector to child classes. if (Subobj->hasHLSLLongVector()) data().HasHLSLLongVector = true; + // HLSL Change End } /// Callback function for CXXRecordDecl::forallBases that acknowledges diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index 41175e3d37..630e969881 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -68,18 +68,7 @@ template static AttrType *getAttr(clang::QualType type) { } bool IsHLSLVecMatType(clang::QualType type) { - type = type.getCanonicalType(); - if (const RecordType *RT = type->getAs()) { - if (const auto *Spec = - dyn_cast(RT->getDecl())) - if (const auto *Template = - dyn_cast(Spec->getSpecializedTemplate())) - return Template->getTemplatedDecl()->getAttr() || - Template->getTemplatedDecl()->getAttr(); - if (const auto *Decl = dyn_cast(RT->getDecl())) - return Decl->getAttr() || Decl->getAttr(); - } - return false; + return getAttr(type) || getAttr(type); } bool IsHLSLMatType(clang::QualType type) { diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index dffa680a35..858b964cdf 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -4736,16 +4736,14 @@ class HLSLExternalSource : public ExternalSemaSource { ResKind = DXIL::ResourceKind::RawBuffer; ResClass = DXIL::ResourceClass::UAV; return true; - case AR_OBJECT_CONSUME_STRUCTURED_BUFFER: - case AR_OBJECT_APPEND_STRUCTURED_BUFFER: - // It may seem incorrect to make these SRV, - // but it is consistent with GetHLSLResourceProperties(). case AR_OBJECT_STRUCTURED_BUFFER: ResKind = DXIL::ResourceKind::StructuredBuffer; ResClass = DXIL::ResourceClass::SRV; return true; case AR_OBJECT_RWSTRUCTURED_BUFFER: case AR_OBJECT_ROVSTRUCTURED_BUFFER: + case AR_OBJECT_CONSUME_STRUCTURED_BUFFER: + case AR_OBJECT_APPEND_STRUCTURED_BUFFER: ResKind = DXIL::ResourceKind::StructuredBuffer; ResClass = DXIL::ResourceClass::UAV; return true; From 0102b3c4592682deacbae140af462f512ce325d1 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Mon, 10 Mar 2025 14:32:02 -0600 Subject: [PATCH 22/88] Use select indices instead of strings as parameters to longvec error --- .../clang/Basic/DiagnosticSemaKinds.td | 8 ++++- tools/clang/lib/Sema/SemaDXR.cpp | 3 +- tools/clang/lib/Sema/SemaHLSL.cpp | 30 ++++++++++++------- tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp | 12 +++++--- 4 files changed, 37 insertions(+), 16 deletions(-) diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 9be040b8a0..de59f01c5d 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7852,7 +7852,13 @@ def err_hlsl_load_from_mesh_out_arrays: Error< def err_hlsl_out_indices_array_incorrect_access: Error< "a vector in out indices array must be accessed as a whole">; def err_hlsl_unsupported_long_vector - : Error<"vectors of over 4 elements in %0 are not supported">; + : Error<"vectors of over 4 elements in %select{ + ConstantBuffers or TextureBuffers| + tessellation patches|geometry streams|node records| + cbuffers or tbuffers|user-defined struct parameter| + entry function parameters|entry function return type| + patch constant function parameters|patch constant function return type| + payload parameters}0 are not supported">; def err_hlsl_logical_binop_scalar : Error< "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">; def err_hlsl_ternary_scalar : Error< diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index c3dfdb7c9f..0f27de8291 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -811,8 +811,9 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, } if (containsLongVector(Payload->getType())) { + const unsigned PayloadParametersIdx = 10; S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector) - << "payload parameters"; + << PayloadParametersIdx; return; } diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 858b964cdf..2d1873fd55 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -5256,8 +5256,9 @@ class HLSLExternalSource : public ExternalSemaSource { diag::err_typecheck_decl_incomplete_type); if (containsLongVector(argType)) { + const unsigned ConstantBuffersOrTextureBuffersIdx = 0; m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector) - << "ConstantBuffers or TextureBuffers"; + << ConstantBuffersOrTextureBuffersIdx; return true; } } @@ -5330,9 +5331,10 @@ class HLSLExternalSource : public ExternalSemaSource { if (Decl && !Decl->isCompleteDefinition()) return true; if (containsLongVector(arg.getAsType())) { + const unsigned TessellationPatchesIDx = 1; m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) - << "tessellation patches"; + << TessellationPatchesIDx; return true; } } else if (Template->getTemplatedDecl()->hasAttr()) { @@ -5348,9 +5350,10 @@ class HLSLExternalSource : public ExternalSemaSource { if (Decl && !Decl->isCompleteDefinition()) return true; if (containsLongVector(arg.getAsType())) { + const unsigned GeometryStreamsIdx = 2; m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) - << "geometry streams"; + << GeometryStreamsIdx; return true; } } @@ -11617,8 +11620,9 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc, switch (shapeKind) { case AR_TOBJ_VECTOR: if (GetHLSLVecSize(ArgTy) > DXIL::kDefaultMaxVectorLength) { + const unsigned NodeRecordsIdx = 3; self->Diag(ArgLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) - << "node records"; + << NodeRecordsIdx; Empty = false; return false; } @@ -14741,8 +14745,9 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, } SD; RequireCompleteType(D.getLocStart(), qt, SD); if (containsLongVector(qt)) { + unsigned CbuffersOrTbuffersIdx = 4; Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) - << "cbuffers or tbuffers"; + << CbuffersOrTbuffersIdx; result = false; } } @@ -15638,8 +15643,9 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) { // Verify that user-defined intrinsic struct args contain no long vectors static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) { if (containsLongVector(Arg->getType())) { + const unsigned UserDefinedStructParameterIdx = 5; S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector) - << "user-defined struct parameter"; + << UserDefinedStructParameterIdx; return true; } return false; @@ -16379,14 +16385,18 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) { // Would be nice to check for resources here as they crash the compiler now. // See issue #7186. for (const auto *param : FD->params()) { - if (containsLongVector(param->getType())) + if (containsLongVector(param->getType())) { + const unsigned EntryFunctionParametersIdx = 6; S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) - << "entry function parameters"; + << EntryFunctionParametersIdx; + } } - if (containsLongVector(FD->getReturnType())) + if (containsLongVector(FD->getReturnType())) { + const unsigned EntryFunctionReturnIdx = 7; S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector) - << "entry function return type"; + << EntryFunctionReturnIdx; + } DXIL::ShaderKind Stage = ShaderModel::KindFromFullName(shaderAttr->getStage()); diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index 11bb4c4f2f..feefd4f625 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -521,15 +521,19 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { } } for (const auto *param : pPatchFnDecl->params()) - if (containsLongVector(param->getType())) + if (containsLongVector(param->getType())) { + const unsigned PatchConstantFunctionParametersIdx = 8; self->Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) - << "patch constant function parameters"; + << PatchConstantFunctionParametersIdx; + } - if (containsLongVector(pPatchFnDecl->getReturnType())) + if (containsLongVector(pPatchFnDecl->getReturnType())) { + const unsigned PatchConstantFunctionReturnIdx = 9; self->Diag(pPatchFnDecl->getLocation(), diag::err_hlsl_unsupported_long_vector) - << "patch constant function return type"; + << PatchConstantFunctionReturnIdx; + } } DXIL::ShaderKind EntrySK = shaderModel->GetKind(); From 88479cfeb11657031606f6d8a3bcb1076d5f6746 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Mon, 10 Mar 2025 15:14:08 -0600 Subject: [PATCH 23/88] fix formatting induced build break --- .../include/clang/Basic/DiagnosticSemaKinds.td | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index de59f01c5d..8d428073bd 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7852,13 +7852,13 @@ def err_hlsl_load_from_mesh_out_arrays: Error< def err_hlsl_out_indices_array_incorrect_access: Error< "a vector in out indices array must be accessed as a whole">; def err_hlsl_unsupported_long_vector - : Error<"vectors of over 4 elements in %select{ - ConstantBuffers or TextureBuffers| - tessellation patches|geometry streams|node records| - cbuffers or tbuffers|user-defined struct parameter| - entry function parameters|entry function return type| - patch constant function parameters|patch constant function return type| - payload parameters}0 are not supported">; + : Error<"vectors of over 4 elements in " + "%select{ConstantBuffers or TextureBuffers|" + "tessellation patches|geometry streams|node records|" + "cbuffers or tbuffers|user-defined struct parameter|" + "entry function parameters|entry function return type|" + "patch constant function parameters|patch constant function return type|" + "payload parameters}0 are not supported">; def err_hlsl_logical_binop_scalar : Error< "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">; def err_hlsl_ternary_scalar : Error< From a2979e7d014a4a7a55a9d1d1ebc6e9697dce4ed9 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Tue, 11 Mar 2025 10:51:22 -0700 Subject: [PATCH 24/88] Resolve some default error warnings (#7191) DxilContainerValidation.cpp has some int / bool comparisons that cause default error warnings in some of the private builds. This needs to be addressed. This PR changes the comparisons by converting the numerical expressions into the appropriate boolean, then comparing the booleans. --- lib/DxilValidation/DxilContainerValidation.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/DxilValidation/DxilContainerValidation.cpp b/lib/DxilValidation/DxilContainerValidation.cpp index 890e90e354..c21e588cf5 100644 --- a/lib/DxilValidation/DxilContainerValidation.cpp +++ b/lib/DxilValidation/DxilContainerValidation.cpp @@ -337,9 +337,12 @@ void PSVContentVerifier::VerifySignatureElement( PSVSignatureElement PSVSE(StrTab, IndexTab, PSVSE0); if (SE.IsArbitrary()) - Mismatch |= strcmp(PSVSE.GetSemanticName(), SE.GetName()); + Mismatch |= + strcmp(PSVSE.GetSemanticName(), SE.GetName()) == 0 ? false : true; else - Mismatch |= PSVSE0->SemanticKind != static_cast(SE.GetKind()); + Mismatch |= PSVSE0->SemanticKind != static_cast(SE.GetKind()) == 0 + ? false + : true; ModulePSVSE0.SemanticName = PSVSE0->SemanticName; // Compare all fields. @@ -494,7 +497,8 @@ void PSVContentVerifier::Verify(unsigned ValMajor, unsigned ValMinor, std::to_string(ShaderStage)); return; } - if (PSV1->UsesViewID != DM.m_ShaderFlags.GetViewID()) + bool ViewIDUsed = PSV1->UsesViewID == 0 ? false : true; + if (ViewIDUsed != DM.m_ShaderFlags.GetViewID()) EmitMismatchError("UsesViewID", std::to_string(PSV1->UsesViewID), std::to_string(DM.m_ShaderFlags.GetViewID())); From 3d6917137ec429d3050f6a6256afdd3d9e1b3e20 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Wed, 12 Mar 2025 16:13:11 -0700 Subject: [PATCH 25/88] Shorten bool conversion, remove unneeded change (#7197) Polishing up changes made to improve dxc buildability in different environments. --- lib/DxilValidation/DxilContainerValidation.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/DxilValidation/DxilContainerValidation.cpp b/lib/DxilValidation/DxilContainerValidation.cpp index c21e588cf5..89e23767fe 100644 --- a/lib/DxilValidation/DxilContainerValidation.cpp +++ b/lib/DxilValidation/DxilContainerValidation.cpp @@ -337,12 +337,9 @@ void PSVContentVerifier::VerifySignatureElement( PSVSignatureElement PSVSE(StrTab, IndexTab, PSVSE0); if (SE.IsArbitrary()) - Mismatch |= - strcmp(PSVSE.GetSemanticName(), SE.GetName()) == 0 ? false : true; + Mismatch |= strcmp(PSVSE.GetSemanticName(), SE.GetName()) != 0; else - Mismatch |= PSVSE0->SemanticKind != static_cast(SE.GetKind()) == 0 - ? false - : true; + Mismatch |= PSVSE0->SemanticKind != static_cast(SE.GetKind()); ModulePSVSE0.SemanticName = PSVSE0->SemanticName; // Compare all fields. @@ -497,7 +494,7 @@ void PSVContentVerifier::Verify(unsigned ValMajor, unsigned ValMinor, std::to_string(ShaderStage)); return; } - bool ViewIDUsed = PSV1->UsesViewID == 0 ? false : true; + bool ViewIDUsed = PSV1->UsesViewID != 0; if (ViewIDUsed != DM.m_ShaderFlags.GetViewID()) EmitMismatchError("UsesViewID", std::to_string(PSV1->UsesViewID), std::to_string(DM.m_ShaderFlags.GetViewID())); From ec5324d66215cd748c162e0e5efed9a85b402ff9 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Thu, 13 Mar 2025 12:29:13 -0700 Subject: [PATCH 26/88] NFC: Update HLSL_INTRINSIC struct for Flags and MinShaderModel fields (#7199) HLSL_INTRINSIC will need to be updated for SM 6.9, specifically: - to add a new flag - to encode minimum shader model version for an availability attribute Changing this structure is a breaking change to the internal intrinsic table protocol, which is used for the extension mechanism. This change separates out the breaking change with no functional changes for simpler review and testing. For the new flag, this change switches to using a UINT Flags field to make flags extensible without breaking the table format. For the version, a UINT MinShaderModel will be the encoded version format used elsewhere: (Major << 4) | (Minor & 0xF) Commented code for using the MinShaderModel is provided for when a subsequent change will implement the availability attribute checks. --- include/dxc/dxcapi.internal.h | 11 ++- tools/clang/lib/Sema/SemaHLSL.cpp | 15 +++- tools/clang/unittests/HLSL/ExtensionTest.cpp | 77 +++++++++++--------- utils/hct/hctdb.py | 29 ++++++++ utils/hct/hctdb_instrhelp.py | 18 ++++- 5 files changed, 106 insertions(+), 44 deletions(-) diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h index b0f9a467a4..4b8e237201 100644 --- a/include/dxc/dxcapi.internal.h +++ b/include/dxc/dxcapi.internal.h @@ -160,11 +160,16 @@ struct HLSL_INTRINSIC_ARGUMENT { // matching input constraints. }; +// HLSL_INTRINSIC flags +static const UINT INTRIN_FLAG_READ_ONLY = 1U << 0; +static const UINT INTRIN_FLAG_READ_NONE = 1U << 1; +static const UINT INTRIN_FLAG_IS_WAVE = 1U << 2; + struct HLSL_INTRINSIC { UINT Op; // Intrinsic Op ID - BOOL bReadOnly; // Only read memory - BOOL bReadNone; // Not read memory - BOOL bIsWave; // Is a wave-sensitive op + UINT Flags; // INTRIN_FLAG_* flags + UINT MinShaderModel; // Encoded minimum shader model, 0 = no minimum + // (Major << 4) + (Minor & 0xf) INT iOverloadParamIndex; // Parameter decide the overload type, -1 means ret // type UINT uNumArgs; // Count of arguments in pArgs. diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 2d1873fd55..c41e899278 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -1810,12 +1810,21 @@ static void AddHLSLIntrinsicAttr(FunctionDecl *FD, ASTContext &context, } FD->addAttr( HLSLIntrinsicAttr::CreateImplicit(context, tableName, lowering, opcode)); - if (pIntrinsic->bReadNone) + if (pIntrinsic->Flags & INTRIN_FLAG_READ_NONE) FD->addAttr(ConstAttr::CreateImplicit(context)); - if (pIntrinsic->bReadOnly) + if (pIntrinsic->Flags & INTRIN_FLAG_READ_ONLY) FD->addAttr(PureAttr::CreateImplicit(context)); - if (pIntrinsic->bIsWave) + if (pIntrinsic->Flags & INTRIN_FLAG_IS_WAVE) FD->addAttr(HLSLWaveSensitiveAttr::CreateImplicit(context)); + // TBD: Add availability attribute if MinShaderModel is set. + // if (pIntrinsic->MinShaderModel) { + // unsigned Major = pIntrinsic->MinShaderModel >> 4; + // unsigned Minor = pIntrinsic->MinShaderModel & 0xF; + // FD->addAttr(AvailabilityAttr::CreateImplicit( + // context, &context.Idents.get(""), clang::VersionTuple(Major, Minor), + // clang::VersionTuple(), clang::VersionTuple(), false, + // "HLSL Intrinsic availability limited by shader model.")); + //} } static FunctionDecl * diff --git a/tools/clang/unittests/HLSL/ExtensionTest.cpp b/tools/clang/unittests/HLSL/ExtensionTest.cpp index 51dda5533c..65407291ca 100644 --- a/tools/clang/unittests/HLSL/ExtensionTest.cpp +++ b/tools/clang/unittests/HLSL/ExtensionTest.cpp @@ -204,79 +204,86 @@ Intrinsic Intrinsics[] = { {L"test_fn", DEFAULT_NAME, "r", - {1, false, true, false, -1, countof(TestFnArgs), TestFnArgs}}, + {1, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnArgs), TestFnArgs}}, {L"test_proc", DEFAULT_NAME, "r", - {2, false, false, false, -1, countof(TestProcArgs), TestProcArgs}}, + {2, 0, 0, -1, countof(TestProcArgs), TestProcArgs}}, {L"test_poly", "test_poly.$o", "r", - {3, false, true, false, -1, countof(TestFnCustomArgs), TestFnCustomArgs}}, + {3, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnCustomArgs), + TestFnCustomArgs}}, {L"test_int", "test_int", "r", - {4, false, true, false, -1, countof(TestFnIntArgs), TestFnIntArgs}}, + {4, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnIntArgs), TestFnIntArgs}}, {L"test_nolower", "test_nolower.$o", "n", - {5, false, true, false, -1, countof(TestFnNoLowerArgs), + {5, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnNoLowerArgs), TestFnNoLowerArgs}}, {L"test_pack_0", "test_pack_0.$o", "p", - {6, false, false, false, -1, countof(TestFnPack0), TestFnPack0}}, + {6, 0, 0, -1, countof(TestFnPack0), TestFnPack0}}, {L"test_pack_1", "test_pack_1.$o", "p", - {7, false, true, false, -1, countof(TestFnPack1), TestFnPack1}}, + {7, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnPack1), TestFnPack1}}, {L"test_pack_2", "test_pack_2.$o", "p", - {8, false, true, false, -1, countof(TestFnPack2), TestFnPack2}}, + {8, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnPack2), TestFnPack2}}, {L"test_pack_3", "test_pack_3.$o", "p", - {9, false, true, false, -1, countof(TestFnPack3), TestFnPack3}}, + {9, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnPack3), TestFnPack3}}, {L"test_pack_4", "test_pack_4.$o", "p", - {10, false, false, false, -1, countof(TestFnPack4), TestFnPack4}}, + {10, 0, 0, -1, countof(TestFnPack4), TestFnPack4}}, {L"test_rand", "test_rand", "r", - {11, false, false, false, -1, countof(TestRand), TestRand}}, + {11, 0, 0, -1, countof(TestRand), TestRand}}, {L"test_isinf", "test_isinf", "d", - {13, true, true, false, -1, countof(TestIsInf), TestIsInf}}, + {13, INTRIN_FLAG_READ_ONLY | INTRIN_FLAG_READ_NONE, 0, -1, + countof(TestIsInf), TestIsInf}}, {L"test_ibfe", "test_ibfe", "d", - {14, true, true, false, -1, countof(TestIBFE), TestIBFE}}, + {14, INTRIN_FLAG_READ_ONLY | INTRIN_FLAG_READ_NONE, 0, -1, + countof(TestIBFE), TestIBFE}}, // Make this intrinsic have the same opcode as an hlsl intrinsic with an // unsigned counterpart for testing purposes. {L"test_unsigned", "test_unsigned", "n", - {static_cast(hlsl::IntrinsicOp::IOP_min), false, true, false, -1, - countof(TestUnsigned), TestUnsigned}}, + {static_cast(hlsl::IntrinsicOp::IOP_min), INTRIN_FLAG_READ_NONE, + 0, -1, countof(TestUnsigned), TestUnsigned}}, {L"wave_proc", DEFAULT_NAME, "r", - {16, false, true, true, -1, countof(WaveProcArgs), WaveProcArgs}}, + {16, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1, + countof(WaveProcArgs), WaveProcArgs}}, {L"test_o_1", "test_o_1.$o:1", "r", - {18, false, true, true, -1, countof(TestOverloadArgs), TestOverloadArgs}}, + {18, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1, + countof(TestOverloadArgs), TestOverloadArgs}}, {L"test_o_2", "test_o_2.$o:2", "r", - {19, false, true, true, -1, countof(TestOverloadArgs), TestOverloadArgs}}, + {19, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1, + countof(TestOverloadArgs), TestOverloadArgs}}, {L"test_o_3", "test_o_3.$o:3", "r", - {20, false, true, true, -1, countof(TestOverloadArgs), TestOverloadArgs}}, + {20, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1, + countof(TestOverloadArgs), TestOverloadArgs}}, // custom lowering with both optional arguments and vector exploding. // Arg 0 = Opcode // Arg 1 = Pass as is @@ -286,16 +293,17 @@ Intrinsic Intrinsics[] = { {L"CustomLoadOp", "CustomLoadOp", "c:{\"default\" : \"0,1,2:?i1,3.0:?i32,3.1:?i32\"}", - {21, true, false, false, -1, countof(TestCustomLoadOp), TestCustomLoadOp}}, + {21, INTRIN_FLAG_READ_ONLY, 0, -1, countof(TestCustomLoadOp), + TestCustomLoadOp}}, {L"CustomLoadOp", "CustomLoadOp", "c:{\"default\" : \"0,1,2:?i1,3.0:?i32,3.1:?i32\"}", - {21, true, false, false, -1, countof(TestCustomLoadOpBool), + {21, INTRIN_FLAG_READ_ONLY, 0, -1, countof(TestCustomLoadOpBool), TestCustomLoadOpBool}}, {L"CustomLoadOp", "CustomLoadOp", "c:{\"default\" : \"0,1,2:?i1,3.0:?i32,3.1:?i32\"}", - {21, true, false, false, -1, countof(TestCustomLoadOpSubscript), + {21, INTRIN_FLAG_READ_ONLY, 0, -1, countof(TestCustomLoadOpSubscript), TestCustomLoadOpSubscript}}, }; @@ -303,7 +311,8 @@ Intrinsic BufferIntrinsics[] = { {L"MyBufferOp", "MyBufferOp", "m", - {12, false, true, false, -1, countof(TestMyBufferOp), TestMyBufferOp}}, + {12, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyBufferOp), + TestMyBufferOp}}, }; // Test adding a method to an object that normally has no methods (SamplerState @@ -312,7 +321,8 @@ Intrinsic SamplerIntrinsics[] = { {L"MySamplerOp", "MySamplerOp", "m", - {15, false, true, false, -1, countof(TestMySamplerOp), TestMySamplerOp}}, + {15, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMySamplerOp), + TestMySamplerOp}}, }; // Define a lowering string to target a common dxil extension operation defined @@ -345,12 +355,12 @@ Intrinsic Texture1DIntrinsics[] = { {L"MyTextureOp", "MyTextureOp", MyTextureOp_LoweringInfo, - {17, false, true, false, -1, countof(TestMyTexture1DOp_0), + {17, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyTexture1DOp_0), TestMyTexture1DOp_0}}, {L"MyTextureOp", "MyTextureOp", MyTextureOp_LoweringInfo, - {17, false, true, false, -1, countof(TestMyTexture1DOp_1), + {17, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyTexture1DOp_1), TestMyTexture1DOp_1}}, }; @@ -358,7 +368,7 @@ Intrinsic Texture2DIntrinsics[] = { {L"MyTextureOp", "MyTextureOp", MyTextureOp_LoweringInfo, - {17, false, true, false, -1, countof(TestMyTexture2DOp), + {17, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyTexture2DOp), TestMyTexture2DOp}}, }; @@ -1497,8 +1507,8 @@ TEST_F(ExtensionTest, EvalAttributeCollision) { Intrinsic Intrinsic = {L"collide_proc", "collide_proc", "r", - {static_cast(op), true, false, false, -1, - countof(Args), Args}}; + {static_cast(op), INTRIN_FLAG_READ_ONLY, 0, + -1, countof(Args), Args}}; Compiler c(m_dllSupport); c.RegisterIntrinsicTable(new TestIntrinsicTable(&Intrinsic, 1)); c.Compile(R"( @@ -1532,10 +1542,8 @@ TEST_F(ExtensionTest, NoUnwind) { IA_C}, {"value", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_NUMERIC, 1, IA_C}}; - Intrinsic Intrinsic = {L"test_proc", - "test_proc", - "r", - {1, false, false, false, -1, countof(Args), Args}}; + Intrinsic Intrinsic = { + L"test_proc", "test_proc", "r", {1, 0, 0, -1, countof(Args), Args}}; Compiler c(m_dllSupport); c.RegisterIntrinsicTable(new TestIntrinsicTable(&Intrinsic, 1)); c.Compile(R"( @@ -1572,7 +1580,8 @@ TEST_F(ExtensionTest, DCE) { Intrinsic Intrinsic = {L"test_proc", "test_proc", "r", - {1, true, true, false, -1, countof(Args), Args}}; + {1, INTRIN_FLAG_READ_ONLY | INTRIN_FLAG_READ_NONE, 0, + -1, countof(Args), Args}}; Compiler c(m_dllSupport); c.RegisterIntrinsicTable(new TestIntrinsicTable(&Intrinsic, 1)); c.Compile(R"( diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 66376c3b9b..1c3fd0f717 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -8208,6 +8208,7 @@ def __init__( unsigned_op, overload_idx, hidden, + min_shader_model, ): self.name = name # Function name self.idx = idx # Unique number within namespace @@ -8235,6 +8236,12 @@ def __init__( overload_idx # Parameter determines the overload type, -1 means ret type ) self.hidden = hidden # Internal high-level op, not exposed to HLSL + # Encoded minimum shader model for this intrinsic + self.min_shader_model = 0 + if min_shader_model: + self.min_shader_model = (min_shader_model[0] << 4) | ( + min_shader_model[1] & 0x0F + ) self.key = ( ("%3d" % ns_idx) + "!" @@ -8612,6 +8619,7 @@ def process_attr(attr): -1 ) # Parameter determines the overload type, -1 means ret type. hidden = False + min_shader_model = (0, 0) for a in attrs: if a == "": continue @@ -8644,6 +8652,24 @@ def process_attr(attr): if d == "overload": overload_param_index = int(v) continue + if d == "min_sm": + # min_sm is a string like "6.0" or "6.5" + # Convert to a tuple of integers (major, minor) + try: + major_minor = v.split(".") + if len(major_minor) != 2: + raise ValueError + major, minor = major_minor + major = int(major) + minor = int(minor) + # minor of 15 has special meaning, and larger values + # cannot be encoded in the version DWORD. + if major < 0 or minor < 0 or minor > 14: + raise ValueError + min_shader_model = (major, minor) + except ValueError: + assert False, "invalid min_sm: %s" % (v) + continue assert False, "invalid attr %s" % (a) return ( @@ -8654,6 +8680,7 @@ def process_attr(attr): unsigned_op, overload_param_index, hidden, + min_shader_model, ) current_namespace = None @@ -8701,6 +8728,7 @@ def process_attr(attr): unsigned_op, overload_param_index, hidden, + min_shader_model, ) = process_attr(attr) # Add an entry for this intrinsic. if bracket_cleanup_re.search(opts): @@ -8739,6 +8767,7 @@ def process_attr(attr): unsigned_op, overload_param_index, hidden, + min_shader_model, ) ) num_entries += 1 diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py index 17eefd4918..353f8f9634 100644 --- a/utils/hct/hctdb_instrhelp.py +++ b/utils/hct/hctdb_instrhelp.py @@ -989,13 +989,23 @@ def get_hlsl_intrinsics(): result += "#ifdef ENABLE_SPIRV_CODEGEN\n\n" # SPIRV Change Ends arg_idx = 0 - ns_table += " {(UINT)%s::%s_%s, %s, %s, %s, %d, %d, g_%s_Args%s},\n" % ( + flags = [] + if i.readonly: + flags.append("INTRIN_FLAG_READ_ONLY") + if i.readnone: + flags.append("INTRIN_FLAG_READ_NONE") + if i.wave: + flags.append("INTRIN_FLAG_IS_WAVE") + if flags: + flags = " | ".join(flags) + else: + flags = "0" + ns_table += " {(UINT)%s::%s_%s, %s, 0x%x, %d, %d, g_%s_Args%s},\n" % ( opcode_namespace, id_prefix, i.name, - str(i.readonly).lower(), - str(i.readnone).lower(), - str(i.wave).lower(), + flags, + i.min_shader_model, i.overload_param_index, len(i.params), last_ns, From 24dedfde13cfe6bdcf6206a1ce00bbaf584a90dd Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 14 Mar 2025 02:24:39 -0700 Subject: [PATCH 27/88] [OMM] Implement front end diagnostics for OMM, including on TraceRayInline, and add Availability Attributes (#7156) This PR addresses the front end part of OMM, defining the new flags defined in the spec, and implementing the relevant diagnostics should the flags be incompatible. It also adds the second template argument to the RayQuery object, which is set to have a default value of 0 if no explicit template argument is provided. Fixes #7145 --------- Co-authored-by: github-actions[bot] --- include/dxc/DXIL/DxilConstants.h | 2 +- tools/clang/include/clang/Basic/Attr.td | 7 + .../include/clang/Basic/DiagnosticGroups.td | 2 + .../clang/Basic/DiagnosticSemaKinds.td | 9 + tools/clang/lib/AST/ASTContextHLSL.cpp | 79 +++++--- .../lib/CodeGen/CGHLSLMSFinishCodeGen.cpp | 7 +- tools/clang/lib/Sema/SemaHLSL.cpp | 89 ++++++++- tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp | 171 +++++++++++++++--- .../SemaHLSL/rayquery-ast-dump-implicit.hlsl | 14 ++ .../test/SemaHLSL/rayquery-ast-dump.hlsl | 26 +++ .../rayquery-omm-diag-TU-export-sm65.hlsl | 11 ++ .../rayquery-omm-diag-TU-sm65-warnings.hlsl | 11 ++ .../SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl | 46 +++++ .../test/SemaHLSL/rayquery-omm-diag-sm65.hlsl | 22 +++ .../test/SemaHLSL/rayquery-omm-type-diag.hlsl | 8 + 15 files changed, 447 insertions(+), 57 deletions(-) create mode 100644 tools/clang/test/SemaHLSL/rayquery-ast-dump-implicit.hlsl create mode 100644 tools/clang/test/SemaHLSL/rayquery-ast-dump.hlsl create mode 100644 tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-export-sm65.hlsl create mode 100644 tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65-warnings.hlsl create mode 100644 tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl create mode 100644 tools/clang/test/SemaHLSL/rayquery-omm-diag-sm65.hlsl create mode 100644 tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index b3c510a038..54131f3948 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -1827,7 +1827,7 @@ enum class RayFlag : uint32_t { CullNonOpaque = 0x80, SkipTriangles = 0x100, SkipProceduralPrimitives = 0x200, - ForceOMM2State = 0x400, // Force 2-state in Opacity Micromaps + ForceOMM2State = 0x400 }; // Corresponds to RAYQUERY_FLAG_* in HLSL diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index 9e48df51fd..3afbaa91c7 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -1149,6 +1149,13 @@ def HLSLNodeObject : InheritableAttr { }]; } +// HLSL Ray Query Attribute + +def HLSLRayQueryObject : InheritableAttr { + let Spellings = []; // No spellings! + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} // HLSL Parameter Attributes diff --git a/tools/clang/include/clang/Basic/DiagnosticGroups.td b/tools/clang/include/clang/Basic/DiagnosticGroups.td index 39618aed04..ff21b34652 100644 --- a/tools/clang/include/clang/Basic/DiagnosticGroups.td +++ b/tools/clang/include/clang/Basic/DiagnosticGroups.td @@ -799,10 +799,12 @@ def HLSLPayloadAccessQualifer: DiagGroup<"payload-access-qualifier", [ HLSLPayloadAccessQualiferPerf, HLSLPayloadAccessQualiferCall ]>; +def HLSLRayQueryFlags : DiagGroup<"hlsl-rayquery-flags">; def HLSLSemanticIdentifierCollision : DiagGroup<"semantic-identifier-collision">; def HLSLStructurizeExitsLifetimeMarkersConflict: DiagGroup<"structurize-exits-lifetime-markers-conflict">; def HLSLParameterUsage : DiagGroup<"parameter-usage">; def HLSLAvailability: DiagGroup<"hlsl-availability">; +def HLSLAvailabilityConstant: DiagGroup<"hlsl-availability-constant">; def HLSLBarrier : DiagGroup<"hlsl-barrier">; def HLSLLegacyLiterals : DiagGroup<"hlsl-legacy-literal">; // HLSL Change Ends diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 8d428073bd..b8a772b3a8 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7652,8 +7652,17 @@ def err_payload_fields_is_payload_and_overqualified : Error< "payload field '%0' is a payload struct. Payload access qualifiers are not allowed on payload types.">; def warn_hlsl_payload_qualifer_dropped : Warning< "payload access qualifiers ignored. These are only supported for lib_6_7+ targets and lib_6_6 with with the -enable-payload-qualifiers flag.">, InGroup; +def warn_hlsl_rayquery_flags_disallowed : Warning< + "A non-zero value for the RayQueryFlags template argument requires" + " shader model 6.9 or above.">, DefaultError, InGroup; +def warn_hlsl_rayquery_flags_conflict : Warning< + "When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags" + " must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.">, DefaultError, InGroup; def err_hlsl_unsupported_builtin_op: Error< "operator cannot be used with built-in type %0">; +def warn_hlsl_builtin_constant_unavailable: Warning< + "potential misuse of built-in constant %0 in shader model %1; introduced" + " in shader model %2">, InGroup; def err_hlsl_unsupported_char_literal : Error< "unsupported style of char literal - use a single-character char-based literal">; def err_hlsl_unsupported_clipplane_argument_expression : Error< diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index 870d032d39..1b6c346acd 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -545,10 +545,19 @@ hlsl::DeclareRecordTypeWithHandle(ASTContext &context, StringRef name, return typeDeclBuilder.getRecordDecl(); } +AvailabilityAttr *ConstructAvailabilityAttribute(clang::ASTContext &context, + VersionTuple Introduced) { + AvailabilityAttr *AAttr = AvailabilityAttr::CreateImplicit( + context, &context.Idents.get(""), clang::VersionTuple(6, 9), + clang::VersionTuple(), clang::VersionTuple(), false, ""); + return AAttr; +} + // creates a global static constant unsigned integer with value. // equivalent to: static const uint name = val; static void AddConstUInt(clang::ASTContext &context, DeclContext *DC, - StringRef name, unsigned val) { + StringRef name, unsigned val, + AvailabilityAttr *AAttr = nullptr) { IdentifierInfo &Id = context.Idents.get(name, tok::TokenKind::identifier); QualType type = context.getConstType(context.UnsignedIntTy); VarDecl *varDecl = VarDecl::Create(context, DC, NoLoc, NoLoc, &Id, type, @@ -558,6 +567,9 @@ static void AddConstUInt(clang::ASTContext &context, DeclContext *DC, context, llvm::APInt(context.getIntWidth(type), val), type, NoLoc); varDecl->setInit(exprVal); varDecl->setImplicit(true); + if (AAttr) + varDecl->addAttr(AAttr); + DC->addDecl(varDecl); } @@ -570,6 +582,7 @@ static void AddConstUInt(clang::ASTContext &context, StringRef name, struct Enumerant { StringRef name; unsigned value; + AvailabilityAttr *avail = nullptr; }; static void AddTypedefPseudoEnum(ASTContext &context, StringRef name, @@ -585,33 +598,45 @@ static void AddTypedefPseudoEnum(ASTContext &context, StringRef name, enumDecl->setImplicit(true); // static const uint = ; for (const Enumerant &enumerant : enumerants) { - AddConstUInt(context, curDC, enumerant.name, enumerant.value); + AddConstUInt(context, curDC, enumerant.name, enumerant.value, + enumerant.avail); } } /// Adds all constants and enums for ray tracing void hlsl::AddRaytracingConstants(ASTContext &context) { + + // Create aversion tuple for availability attributes + // for the RAYQUERY_FLAG enum + VersionTuple VT69 = VersionTuple(6, 9); + AddTypedefPseudoEnum( context, "RAY_FLAG", - { - {"RAY_FLAG_NONE", (unsigned)DXIL::RayFlag::None}, - {"RAY_FLAG_FORCE_OPAQUE", (unsigned)DXIL::RayFlag::ForceOpaque}, - {"RAY_FLAG_FORCE_NON_OPAQUE", - (unsigned)DXIL::RayFlag::ForceNonOpaque}, - {"RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH", - (unsigned)DXIL::RayFlag::AcceptFirstHitAndEndSearch}, - {"RAY_FLAG_SKIP_CLOSEST_HIT_SHADER", - (unsigned)DXIL::RayFlag::SkipClosestHitShader}, - {"RAY_FLAG_CULL_BACK_FACING_TRIANGLES", - (unsigned)DXIL::RayFlag::CullBackFacingTriangles}, - {"RAY_FLAG_CULL_FRONT_FACING_TRIANGLES", - (unsigned)DXIL::RayFlag::CullFrontFacingTriangles}, - {"RAY_FLAG_CULL_OPAQUE", (unsigned)DXIL::RayFlag::CullOpaque}, - {"RAY_FLAG_CULL_NON_OPAQUE", (unsigned)DXIL::RayFlag::CullNonOpaque}, - {"RAY_FLAG_SKIP_TRIANGLES", (unsigned)DXIL::RayFlag::SkipTriangles}, - {"RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES", - (unsigned)DXIL::RayFlag::SkipProceduralPrimitives}, - }); + {{"RAY_FLAG_NONE", (unsigned)DXIL::RayFlag::None}, + {"RAY_FLAG_FORCE_OPAQUE", (unsigned)DXIL::RayFlag::ForceOpaque}, + {"RAY_FLAG_FORCE_NON_OPAQUE", (unsigned)DXIL::RayFlag::ForceNonOpaque}, + {"RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH", + (unsigned)DXIL::RayFlag::AcceptFirstHitAndEndSearch}, + {"RAY_FLAG_SKIP_CLOSEST_HIT_SHADER", + (unsigned)DXIL::RayFlag::SkipClosestHitShader}, + {"RAY_FLAG_CULL_BACK_FACING_TRIANGLES", + (unsigned)DXIL::RayFlag::CullBackFacingTriangles}, + {"RAY_FLAG_CULL_FRONT_FACING_TRIANGLES", + (unsigned)DXIL::RayFlag::CullFrontFacingTriangles}, + {"RAY_FLAG_CULL_OPAQUE", (unsigned)DXIL::RayFlag::CullOpaque}, + {"RAY_FLAG_CULL_NON_OPAQUE", (unsigned)DXIL::RayFlag::CullNonOpaque}, + {"RAY_FLAG_SKIP_TRIANGLES", (unsigned)DXIL::RayFlag::SkipTriangles}, + {"RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES", + (unsigned)DXIL::RayFlag::SkipProceduralPrimitives}, + {"RAY_FLAG_FORCE_OMM_2_STATE", (unsigned)DXIL::RayFlag::ForceOMM2State, + ConstructAvailabilityAttribute(context, VT69)}}); + + AddTypedefPseudoEnum( + context, "RAYQUERY_FLAG", + {{"RAYQUERY_FLAG_NONE", (unsigned)DXIL::RayQueryFlag::None}, + {"RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS", + (unsigned)DXIL::RayQueryFlag::AllowOpacityMicromaps, + ConstructAvailabilityAttribute(context, VT69)}}); AddTypedefPseudoEnum( context, "COMMITTED_STATUS", @@ -1161,7 +1186,14 @@ CXXRecordDecl *hlsl::DeclareRayQueryType(ASTContext &context) { // template RayQuery { ... } BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(), "RayQuery"); - typeDeclBuilder.addIntegerTemplateParam("flags", context.UnsignedIntTy); + typeDeclBuilder.addIntegerTemplateParam("constRayFlags", + context.UnsignedIntTy); + // create an optional second template argument with default value + // that contains the value of DXIL::RayFlag::None + llvm::Optional DefaultRayQueryFlag = + static_cast(DXIL::RayFlag::None); + typeDeclBuilder.addIntegerTemplateParam( + "RayQueryFlags", context.UnsignedIntTy, DefaultRayQueryFlag); typeDeclBuilder.startDefinition(); typeDeclBuilder.addField( "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle. @@ -1178,7 +1210,8 @@ CXXRecordDecl *hlsl::DeclareRayQueryType(ASTContext &context) { context.DeclarationNames.getCXXConstructorName(canQualType), false, &pConstructorDecl, &pTypeSourceInfo); typeDeclBuilder.getRecordDecl()->addDecl(pConstructorDecl); - + typeDeclBuilder.getRecordDecl()->addAttr( + HLSLRayQueryObjectAttr::CreateImplicit(context)); return typeDeclBuilder.getRecordDecl(); } diff --git a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp index 8af96cc3cd..16f268f102 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp +++ b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp @@ -2839,8 +2839,11 @@ void TranslateRayQueryConstructor(HLModule &HLM) { HLM.GetTypeSystem().GetStructAnnotation(pRQType); DXASSERT(SA, "otherwise, could not find type annoation for RayQuery " "specialization"); - DXASSERT(SA->GetNumTemplateArgs() == 1 && - SA->GetTemplateArgAnnotation(0).IsIntegral(), + DXASSERT((SA->GetNumTemplateArgs() == 1 && + SA->GetTemplateArgAnnotation(0).IsIntegral()) || + (SA->GetNumTemplateArgs() == 2 && + SA->GetTemplateArgAnnotation(0).IsIntegral() && + SA->GetTemplateArgAnnotation(1).IsIntegral()), "otherwise, RayQuery has changed, or lacks template args"); llvm::IRBuilder<> Builder(CI); llvm::Value *rayFlags = diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index c41e899278..031e49408f 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -3992,13 +3992,6 @@ class HLSLExternalSource : public ExternalSemaSource { return IsSubobjectBasicKind(GetTypeElementKind(type)); } - bool IsRayQueryBasicKind(ArBasicKind kind) { - return kind == AR_OBJECT_RAY_QUERY; - } - bool IsRayQueryType(QualType type) { - return IsRayQueryBasicKind(GetTypeElementKind(type)); - } - void WarnMinPrecision(QualType Type, SourceLocation Loc) { Type = Type->getCanonicalTypeUnqualified(); if (IsVectorType(m_sema, Type) || IsMatrixType(m_sema, Type)) { @@ -5326,6 +5319,39 @@ class HLSLExternalSource : public ExternalSemaSource { return true; } return false; + } else if (Template->getTemplatedDecl() + ->hasAttr()) { + int numArgs = TemplateArgList.size(); + DXASSERT(numArgs == 1 || numArgs == 2, + "otherwise the template has not been declared properly"); + + // first, determine if the rayquery flag AllowOpacityMicromaps is set + bool HasRayQueryFlagAllowOpacityMicromaps = false; + if (numArgs > 1) { + const TemplateArgument &Arg2 = TemplateArgList[1].getArgument(); + Expr *Expr2 = Arg2.getAsExpr(); + llvm::APSInt Arg2val; + Expr2->isIntegerConstantExpr(Arg2val, m_sema->getASTContext()); + if (Arg2val.getZExtValue() & + (unsigned)DXIL::RayQueryFlag::AllowOpacityMicromaps) + HasRayQueryFlagAllowOpacityMicromaps = true; + } + + // next, get the first template argument, to check if + // the ForceOMM2State flag is set + const TemplateArgument &Arg1 = TemplateArgList[0].getArgument(); + Expr *Expr1 = Arg1.getAsExpr(); + llvm::APSInt Arg1val; + bool HasRayFlagForceOMM2State = + Expr1->isIntegerConstantExpr(Arg1val, m_sema->getASTContext()) && + (Arg1val.getLimitedValue() & + (uint64_t)DXIL::RayFlag::ForceOMM2State) != 0; + + // finally, if ForceOMM2State is set and AllowOpacityMicromaps + // isn't, emit a warning + if (HasRayFlagForceOMM2State && !HasRayQueryFlagAllowOpacityMicromaps) + m_sema->Diag(TemplateArgList[0].getLocation(), + diag::warn_hlsl_rayquery_flags_conflict); } else if (Template->getTemplatedDecl()->hasAttr()) { DXASSERT(TemplateArgList.size() > 0, "Tessellation patch should have at least one template args"); @@ -11568,6 +11594,52 @@ static void DiagnoseReachableBarrier(Sema &S, CallExpr *CE, } } +bool IsRayFlagForceOMM2StateSet(Sema &sema, const CallExpr *CE) { + const Expr *Expr1 = CE->getArg(1); + llvm::APSInt constantResult; + return Expr1->isIntegerConstantExpr(constantResult, sema.getASTContext()) && + (constantResult.getLimitedValue() & + (uint64_t)DXIL::RayFlag::ForceOMM2State) != 0; +} + +void DiagnoseTraceRayInline(Sema &sema, CallExpr *callExpr) { + // Validate if the RayFlag parameter has RAY_FLAG_FORCE_OMM_2_STATE set, + // the RayQuery decl must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set, + // otherwise emit a diagnostic. + if (IsRayFlagForceOMM2StateSet(sema, callExpr)) { + CXXMemberCallExpr *CXXCallExpr = dyn_cast(callExpr); + if (!CXXCallExpr) { + return; + } + const DeclRefExpr *DRE = + dyn_cast(CXXCallExpr->getImplicitObjectArgument()); + assert(DRE); + QualType QT = DRE->getType(); + auto *typeRecordDecl = QT->getAsCXXRecordDecl(); + ClassTemplateSpecializationDecl *SpecDecl = + llvm::dyn_cast(typeRecordDecl); + + if (!SpecDecl) + return; + + // Guaranteed 2 arguments since the rayquery constructor + // automatically creates 2 template args + DXASSERT(SpecDecl->getTemplateArgs().size() == 2, + "else rayquery constructor template args are not 2"); + llvm::APSInt Arg2val = SpecDecl->getTemplateArgs()[1].getAsIntegral(); + bool IsRayQueryAllowOMMSet = + Arg2val.getZExtValue() & + (unsigned)DXIL::RayQueryFlag::AllowOpacityMicromaps; + if (!IsRayQueryAllowOMMSet) { + // Diagnose the call + sema.Diag(CXXCallExpr->getExprLoc(), + diag::warn_hlsl_rayquery_flags_conflict); + sema.Diag(DRE->getDecl()->getLocation(), diag::note_previous_decl) + << "RayQueryFlags"; + } + } +} + static bool isStringLiteral(QualType type) { if (!type->isConstantArrayType()) return false; @@ -11612,6 +11684,9 @@ void Sema::DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM, DiagnoseReachableBarrier(*this, CE, SM, EntrySK, NodeLaunchTy, EntryDecl, Diags); break; + case hlsl::IntrinsicOp::MOP_TraceRayInline: + DiagnoseTraceRayInline(*this, CE); + break; default: break; } diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index feefd4f625..827798a852 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -10,6 +10,7 @@ /////////////////////////////////////////////////////////////////////////////// #include "dxc/DXIL/DxilShaderModel.h" +#include "dxc/HLSL/HLOperations.h" #include "dxc/HlslIntrinsicOp.h" #include "dxc/Support/Global.h" #include "clang/AST/ASTContext.h" @@ -18,6 +19,8 @@ #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Sema/SemaDiagnostic.h" #include "clang/Sema/SemaHLSL.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Debug.h" #include @@ -142,13 +145,21 @@ class CallGraphWithRecurseGuard { } public: - void BuildForEntry(FunctionDecl *EntryFnDecl) { + void BuildForEntry(FunctionDecl *EntryFnDecl, + llvm::ArrayRef GlobalsWithInit) { DXASSERT_NOMSG(EntryFnDecl); EntryFnDecl = getFunctionWithBody(EntryFnDecl); PendingFunctions pendingFunctions; FnReferenceVisitor visitor(m_visitedFunctions, pendingFunctions, m_callNodes); - pendingFunctions.push_back(EntryFnDecl); + + // First, traverse all initializers, then entry function. + m_visitedFunctions.insert(EntryFnDecl); + visitor.setSourceFn(EntryFnDecl); + for (VarDecl *VD : GlobalsWithInit) + visitor.TraverseDecl(VD); + visitor.TraverseDecl(EntryFnDecl); + while (!pendingFunctions.empty()) { FunctionDecl *pendingDecl = pendingFunctions.pop_back_val(); if (m_visitedFunctions.insert(pendingDecl).second == true) { @@ -284,33 +295,56 @@ std::vector GetAllExportedFDecls(clang::Sema *self) { return AllExportedFDecls; } +void GatherGlobalsWithInitializers( + DeclContext *DC, llvm::SmallVectorImpl &GlobalsWithInit) { + for (auto *D : DC->decls()) { + // Skip built-ins and function decls. + if (D->isImplicit() || isa(D)) + continue; + if (auto *VD = dyn_cast(D)) { + // Add if user-defined static or groupshared global with initializer. + if (VD->hasInit() && VD->hasGlobalStorage() && + (VD->getStorageClass() == SC_Static || + VD->hasAttr())) + GlobalsWithInit.push_back(VD); + } else if (auto *DC = dyn_cast(D)) { + // Recurse into DeclContexts like namespace, cbuffer, class/struct, etc. + GatherGlobalsWithInitializers(DC, GlobalsWithInit); + } + } +} + // in the non-library case, this function will be run only once, // but in the library case, this function will be run for each // viable top-level function declaration by // ValidateNoRecursionInTranslationUnit. // (viable as in, is exported) -clang::FunctionDecl *ValidateNoRecursion(CallGraphWithRecurseGuard &callGraph, - clang::FunctionDecl *FD) { +clang::FunctionDecl * +ValidateNoRecursion(CallGraphWithRecurseGuard &callGraph, + clang::FunctionDecl *FD, + llvm::ArrayRef GlobalsWithInit) { // Validate that there is no recursion reachable by this function declaration // NOTE: the information gathered here could be used to bypass code generation // on functions that are unreachable (as an early form of dead code // elimination). if (FD) { - callGraph.BuildForEntry(FD); + callGraph.BuildForEntry(FD, GlobalsWithInit); return callGraph.CheckRecursion(FD); } return nullptr; } -class HLSLCallDiagnoseVisitor +class HLSLCallDiagnoseVisitor // Could rename to HLSLReachableDiagnoseVisitor : public RecursiveASTVisitor { public: explicit HLSLCallDiagnoseVisitor( Sema *S, const hlsl::ShaderModel *SM, DXIL::ShaderKind EntrySK, DXIL::NodeLaunchType NodeLaunchTy, const FunctionDecl *EntryDecl, - llvm::SmallPtrSetImpl &DiagnosedCalls) + llvm::SmallPtrSetImpl &DiagnosedCalls, + llvm::SmallPtrSetImpl &DeclAvailabilityChecked) : sema(S), SM(SM), EntrySK(EntrySK), NodeLaunchTy(NodeLaunchTy), - EntryDecl(EntryDecl), DiagnosedCalls(DiagnosedCalls) {} + EntryDecl(EntryDecl), DiagnosedCalls(DiagnosedCalls), + DeclAvailabilityChecked(DeclAvailabilityChecked) {} bool VisitCallExpr(CallExpr *CE) { // Set flag if already diagnosed from another entry, allowing some @@ -325,6 +359,86 @@ class HLSLCallDiagnoseVisitor return true; } + bool VisitVarDecl(VarDecl *VD) { + QualType VarType = VD->getType(); + if (const TemplateSpecializationType *TST = + dyn_cast(VarType.getTypePtr())) { + const TemplateDecl *TD = TST->getTemplateName().getAsTemplateDecl(); + if (!TD) + return true; + + // verify this is a rayquery decl + if (TD->getTemplatedDecl()->hasAttr()) { + if (TST->getNumArgs() == 1) { + return true; + } + // now guaranteed 2 args + const TemplateArgument &Arg2 = TST->getArg(1); + Expr *Expr2 = Arg2.getAsExpr(); + llvm::APSInt Arg2val; + Expr2->isIntegerConstantExpr(Arg2val, sema->getASTContext()); + + const ShaderModel *SM = hlsl::ShaderModel::GetByName( + sema->getLangOpts().HLSLProfile.c_str()); + + if (Arg2val.getZExtValue() != 0 && !SM->IsSMAtLeast(6, 9)) { + // if it's an integer literal, emit + // warn_hlsl_rayquery_flags_disallowed + if (Arg2.getKind() == TemplateArgument::Expression) { + if (auto *castExpr = dyn_cast( + Arg2.getAsExpr()->IgnoreParens())) { + // Now check if the sub-expression is a DeclRefExpr + Expr *subExpr = castExpr->getSubExpr(); + if (auto *IL = dyn_cast(subExpr)) + sema->Diag(VD->getLocStart(), + diag::warn_hlsl_rayquery_flags_disallowed); + return true; + } + } + } + } + } + return true; + } + + bool VisitDeclRefExpr(DeclRefExpr *DRE) { + // Diagnose availability for referenced decl. + if (AvailabilityAttr *AAttr = GetAvailabilityAttrOnce(DRE)) { + NamedDecl *ND = DRE->getDecl(); + DiagnoseAvailability(AAttr, ND, DRE->getExprLoc()); + } + + return true; + } + + AvailabilityAttr *GetAvailabilityAttrOnce(DeclRefExpr *DRE) { + AvailabilityAttr *AAttr = DRE->getDecl()->getAttr(); + if (!AAttr) + return nullptr; + // Skip redundant availability diagnostics for the same Decl. + if (!DeclAvailabilityChecked.insert(DRE).second) + return nullptr; + + return AAttr; + } + + void DiagnoseAvailability(AvailabilityAttr *AAttr, NamedDecl *ND, + SourceLocation Loc) { + VersionTuple AAttrVT = AAttr->getIntroduced(); + VersionTuple SMVT = VersionTuple(SM->GetMajor(), SM->GetMinor()); + + // if the current shader model is lower than what + // is stated in the availability attribute, emit + // the availability warning. + + if (SMVT < AAttrVT) { + // TBD: Determine best way to distinguish between builtin constant decls + // and other decls. + sema->Diag(Loc, diag::warn_hlsl_builtin_constant_unavailable) + << ND << SM->GetName() << AAttrVT.getAsString(); + } + } + clang::Sema *getSema() { return sema; } private: @@ -334,6 +448,7 @@ class HLSLCallDiagnoseVisitor DXIL::NodeLaunchType NodeLaunchTy; const FunctionDecl *EntryDecl; llvm::SmallPtrSetImpl &DiagnosedCalls; + llvm::SmallPtrSetImpl &DeclAvailabilityChecked; }; std::optional @@ -428,18 +543,26 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { const auto *shaderModel = hlsl::ShaderModel::GetByName(self->getLangOpts().HLSLProfile.c_str()); - std::set DiagnosedDecls; + llvm::SmallVector GlobalsWithInit; + GatherGlobalsWithInitializers(self->getASTContext().getTranslationUnitDecl(), + GlobalsWithInit); + + std::set DiagnosedRecursiveDecls; llvm::SmallPtrSet DiagnosedCalls; + llvm::SmallPtrSet DeclAvailabilityChecked; // for each FDecl, check for recursion for (FunctionDecl *FDecl : FDeclsToCheck) { CallGraphWithRecurseGuard callGraph; - FunctionDecl *result = ValidateNoRecursion(callGraph, FDecl); + ArrayRef InitGlobals = {}; + // if entry function, include globals with initializers. + if (FDecl->hasAttr()) + InitGlobals = GlobalsWithInit; + FunctionDecl *result = ValidateNoRecursion(callGraph, FDecl, InitGlobals); if (result) { // don't emit duplicate diagnostics for the same recursive function // if A and B call recursive function C, only emit 1 diagnostic for C. - if (DiagnosedDecls.find(result) == DiagnosedDecls.end()) { - DiagnosedDecls.insert(result); + if (DiagnosedRecursiveDecls.insert(result).second) { self->Diag(result->getSourceRange().getBegin(), diag::err_hlsl_no_recursion) << FDecl->getQualifiedNameAsString() @@ -463,12 +586,12 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { } if (pPatchFnDecl) { - FunctionDecl *patchResult = ValidateNoRecursion(callGraph, pPatchFnDecl); + FunctionDecl *patchResult = + ValidateNoRecursion(callGraph, pPatchFnDecl, GlobalsWithInit); // In this case, recursion was detected in the patch-constant function if (patchResult) { - if (DiagnosedDecls.find(patchResult) == DiagnosedDecls.end()) { - DiagnosedDecls.insert(patchResult); + if (DiagnosedRecursiveDecls.insert(patchResult).second) { self->Diag(patchResult->getSourceRange().getBegin(), diag::err_hlsl_no_recursion) << pPatchFnDecl->getQualifiedNameAsString() @@ -482,15 +605,12 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { // disconnected with respect to the call graph. // Only check this if neither function decl is recursive if (!result && !patchResult) { - CallGraphWithRecurseGuard CG; - CG.BuildForEntry(pPatchFnDecl); - if (CG.CheckReachability(pPatchFnDecl, FDecl)) { + if (callGraph.CheckReachability(pPatchFnDecl, FDecl)) { self->Diag(FDecl->getSourceRange().getBegin(), diag::err_hlsl_patch_reachability_not_allowed) << 1 << FDecl->getName() << 0 << pPatchFnDecl->getName(); } - CG.BuildForEntry(FDecl); - if (CG.CheckReachability(FDecl, pPatchFnDecl)) { + if (callGraph.CheckReachability(FDecl, pPatchFnDecl)) { self->Diag(FDecl->getSourceRange().getBegin(), diag::err_hlsl_patch_reachability_not_allowed) << 0 << pPatchFnDecl->getName() << 1 << FDecl->getName(); @@ -553,10 +673,13 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { } // Visit all visited functions in call graph to collect illegal intrinsic // calls. - for (FunctionDecl *FD : callGraph.GetVisitedFunctions()) { - HLSLCallDiagnoseVisitor Visitor(self, shaderModel, EntrySK, NodeLaunchTy, - FDecl, DiagnosedCalls); + HLSLCallDiagnoseVisitor Visitor(self, shaderModel, EntrySK, NodeLaunchTy, + FDecl, DiagnosedCalls, + DeclAvailabilityChecked); + // Visit globals with initializers when processing entry point. + for (VarDecl *VD : InitGlobals) + Visitor.TraverseDecl(VD); + for (FunctionDecl *FD : callGraph.GetVisitedFunctions()) Visitor.TraverseDecl(FD); - } } } diff --git a/tools/clang/test/SemaHLSL/rayquery-ast-dump-implicit.hlsl b/tools/clang/test/SemaHLSL/rayquery-ast-dump-implicit.hlsl new file mode 100644 index 0000000000..55b4623725 --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-ast-dump-implicit.hlsl @@ -0,0 +1,14 @@ +// RUN: %dxc -T vs_6_9 -E main -ast-dump-implicit %s | FileCheck %s + +float main(RayDesc rayDesc : RAYDESC) : OUT { + return 0; +} + +// CHECK: VarDecl 0x{{.+}} <> implicit RAY_FLAG_FORCE_OMM_2_STATE 'const unsigned int' static cinit +// CHECK: IntegerLiteral 0x{{.+}} <> 'const unsigned int' 1024 +// CHECK: AvailabilityAttr 0x{{.+}} <> Implicit 6.9 0 0 "" + +// CHECK: VarDecl 0x{{.+}} <> implicit RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS 'const unsigned int' static cinit +// CHECK: IntegerLiteral 0x{{.+}} <> 'const unsigned int' 1 +// CHECK: AvailabilityAttr 0x{{.+}} <> Implicit 6.9 0 0 "" + diff --git a/tools/clang/test/SemaHLSL/rayquery-ast-dump.hlsl b/tools/clang/test/SemaHLSL/rayquery-ast-dump.hlsl new file mode 100644 index 0000000000..2ec79a060f --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-ast-dump.hlsl @@ -0,0 +1,26 @@ +// RUN: %dxc -T vs_6_9 -E main -ast-dump %s | FileCheck %s + +RaytracingAccelerationStructure RTAS; + + +float main(RayDesc rayDesc : RAYDESC) : OUT { + RayQuery<0, RAYQUERY_FLAG_NONE> rayQuery1; + RayQuery rayQuery2; + rayQuery1.TraceRayInline(RTAS, 1, 2, rayDesc); + rayQuery2.TraceRayInline(RTAS, RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc); + return 0; +} + +// CHECK: -DeclStmt 0x{{.+}} +// CHECK-NEXT: `-VarDecl 0x{{.+}} used rayQuery1 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>' callinit +// CHECK-NEXT: `-CXXConstructExpr 0x{{.+}} 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>' 'void ()' +// CHECK-NEXT: -DeclStmt 0x{{.+}} +// CHECK-NEXT: `-VarDecl 0x{{.+}} used rayQuery2 'RayQuery':'RayQuery<1024, 1>' callinit +// CHECK-NEXT: `-CXXConstructExpr 0x{{.+}} 'RayQuery':'RayQuery<1024, 1>' 'void ()' +// CHECK-NEXT: -CXXMemberCallExpr 0x{{.+}} 'void' +// CHECK-NEXT: -MemberExpr 0x{{.+}} '' .TraceRayInline +// CHECK-NEXT: `-DeclRefExpr 0x{{.+}} 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>' lvalue Var 0x{{.+}} 'rayQuery1' 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>' + +// CHECK: -CXXMemberCallExpr 0x{{.+}} 'void' +// CHECK-NEXT: -MemberExpr 0x{{.+}} '' .TraceRayInline +// CHECK-NEXT: `-DeclRefExpr 0x{{.+}} 'RayQuery':'RayQuery<1024, 1>' lvalue Var 0x{{.+}} 'rayQuery2' 'RayQuery':'RayQuery<1024, 1>' diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-export-sm65.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-export-sm65.hlsl new file mode 100644 index 0000000000..3e2031e0a7 --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-export-sm65.hlsl @@ -0,0 +1,11 @@ +// RUN: %dxc -T lib_6_5 -verify %s + +// expect no diagnostics here, since global variables +// are not picked up through the recursive AST visitor's +// traversal of the exported function. +int x = RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS; + +export float4 MyExportedFunction(float4 color) { + // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_5; introduced in shader model 6.9}} + return color * RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS; +} diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65-warnings.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65-warnings.hlsl new file mode 100644 index 0000000000..476c1a503e --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65-warnings.hlsl @@ -0,0 +1,11 @@ +// RUN: %dxc -Wno-error-hlsl-rayquery-flags -Wno-error-hlsl-availability -T vs_6_5 -E main -verify %s + +RaytracingAccelerationStructure RTAS; +void main(uint i : IDX, RayDesc rayDesc : RAYDESC) { + + // expected-warning@+3{{A non-zero value for the RayQueryFlags template argument requires shader model 6.9 or above.}} + // expected-warning@+2{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}} + // expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}} + RayQuery rayQuery0a; + +} diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl new file mode 100644 index 0000000000..6904f58c7d --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl @@ -0,0 +1,46 @@ +// RUN: %dxc -T vs_6_5 -E main -verify %s + +// tests that RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS usage will emit +// one warning for each incompatible availability attribute decl, +// when the compilation target is less than shader model 6.9. + +namespace MyNamespace { + // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}} + static const int badVar = RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS; +} + +// expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}} +groupshared const int otherBadVar = RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS; + +int retNum(){ + // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}} + return RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS; +} + +int retNumUncalled(){ + // no diagnostic expected here + return RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS; +} + +RaytracingAccelerationStructure RTAS; +void main(uint i : IDX, RayDesc rayDesc : RAYDESC) { + + int x = MyNamespace::badVar + otherBadVar + retNum(); + RayQuery<0> rayQuery0a; + + if (x > 4){ + rayQuery0a.TraceRayInline(RTAS, 8, 2, rayDesc); + } + else{ + rayQuery0a.TraceRayInline(RTAS, 16, 2, rayDesc); + } + + // expected-error@+2{{A non-zero value for the RayQueryFlags template argument requires shader model 6.9 or above.}} + // expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}} + RayQuery rayQuery0b; + + // expected-warning@+2{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}} + // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}} + RayQuery rayQuery0d; + +} diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-sm65.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-sm65.hlsl new file mode 100644 index 0000000000..d31d9bf289 --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-sm65.hlsl @@ -0,0 +1,22 @@ +// RUN: %dxc -T vs_6_5 -E main -verify %s + +// Test that at the call site of any TraceRayInline call, a default error +// warning is emitted that indicates the ray query object has the +// RAY_FLAG_FORCE_OMM_2_STATE set, but doesn't have +// RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set + +RaytracingAccelerationStructure RTAS; +void main(RayDesc rayDesc : RAYDESC) : OUT { + // expected-note@+1 2 {{RayQueryFlags declared here}} + RayQuery<0> rayQuery; // implicitly, the second arg is 0. + + // expected-error@+2{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}} + // expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}} + rayQuery.TraceRayInline(RTAS, RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc); + + // expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}} + rayQuery.TraceRayInline(RTAS, 1024, 2, rayDesc); + + // expected-error@+1{{A non-zero value for the RayQueryFlags template argument requires shader model 6.9 or above.}} + RayQuery<0, 1> rayQueryInvalid; +} diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl new file mode 100644 index 0000000000..981788a688 --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl @@ -0,0 +1,8 @@ +// RUN: %dxc -T vs_6_9 -E RayQueryTests -verify %s +// RUN: %dxc -T vs_6_5 -E RayQueryTests2 -verify %s + +// validate 2nd template argument flags +// expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}} +typedef RayQuery BadRayQuery; +// expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}} +typedef RayQuery BadRayQuery2; From ebc8c5cd7eb8fc191b1fd8e76ccdd388a52c85bc Mon Sep 17 00:00:00 2001 From: Lumina Date: Mon, 17 Mar 2025 22:55:49 +0800 Subject: [PATCH 28/88] [NFC][Doc] Update HLSL to SPIR-V document (#7204) This is all about updating urls and tables in docs/SPIR-V.rst. I'm currently working on a demo & tutorial about how to work with Vulkan+HLSL+dxc toolchain so I made this PR. Thanks a lot for your contribution to the ecosystem, and glad to see your feedback! Signed-off-by: lumina37 --- docs/SPIR-V.rst | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst index c30286e4e6..072a2fe9c1 100644 --- a/docs/SPIR-V.rst +++ b/docs/SPIR-V.rst @@ -282,7 +282,7 @@ Right now the following ```` are supported: Need ``SPV_KHR_device_group`` extension. * ``ViewportMaskNV``: The GLSL equivalent is ``gl_ViewportMask``. -Please see Vulkan spec. `14.6. Built-In Variables `_ +Please see Vulkan spec. `15.9. Built-In Variables `_ for detailed explanation of these builtins. Supported extensions @@ -446,7 +446,7 @@ environment (hence SPIR-V version) and SPIR-V extension control: ``-fspv-target-env=`` accepts a Vulkan target environment (see ``-help`` for supported values). If such an option is not given, the CodeGen defaults to ``vulkan1.0``. When targeting ``vulkan1.0``, trying to use features that are only -available in Vulkan 1.1 (SPIR-V 1.3), like `Shader Model 6.0 wave intrinsics`_, +available in Vulkan 1.1 (SPIR-V 1.3), like `Shader Model 6.0 wave intrinsic `_, will trigger a compiler error. If ``-fspv-extension=`` is not specified, the CodeGen will select suitable @@ -494,7 +494,7 @@ Specifically, we need to legalize the following HLSL source code patterns: Legalization transformations will not run unless the above patterns are encountered in the source code. -For more details, please see the `SPIR-V cookbook `_, +For more details, please see the `SPIR-V cookbook `_, which contains examples of what HLSL code patterns will be accepted and generate valid SPIR-V for Vulkan. @@ -561,7 +561,7 @@ So if you want to run loop unrolling additionally after the default optimization recipe, you can specify ``-Oconfig=-O,--loop-unroll``. For the whole list of accepted passes and details about each one, please see -``spirv-opt``'s help manual (``spirv-opt --help``), or the SPIRV-Tools `optimizer header file `_. +``spirv-opt``'s help manual (``spirv-opt --help``), or the SPIRV-Tools `optimizer header file `_. Validation ~~~~~~~~~~ @@ -640,7 +640,7 @@ HLSL Semantic HLSL semantic strings are by default not emitted into the SPIR-V binary module. If you need them, by specifying ``-fspv-reflect``, the compiler will use -the ``Op*DecorateStringGOOGLE`` instruction in `SPV_GOOGLE_hlsl_funtionality1 `_ +the ``Op*DecorateStringGOOGLE`` instruction in `SPV_GOOGLE_hlsl_funtionality1 `_ extension to emit them. HLSL User Types @@ -661,7 +661,7 @@ Counter buffers for RW/Append/Consume StructuredBuffer The association between a counter buffer and its main RW/Append/Consume StructuredBuffer is conveyed by ``OpDecorateId HLSLCounterBufferGOOGLE `` instruction from the -`SPV_GOOGLE_hlsl_funtionality1 `_ +`SPV_GOOGLE_hlsl_funtionality1 `_ extension. This information is by default missing; you need to specify ``-fspv-reflect`` to direct the compiler to emit them. @@ -911,7 +911,7 @@ For example, RWTexture2D Tex2; // Works like before -``rgba8`` means ``Rgba8`` `SPIR-V Image Format `_. +``rgba8`` means ``Rgba8`` `SPIR-V Image Format `_. The following table lists the mapping between ``FORMAT`` of ``[[vk::image_format("FORMAT")]]`` and its corresponding SPIR-V Image Format. @@ -994,7 +994,7 @@ Please see the following sections for the details of each type. As a summary: =========================== ================== ================================ ==================== ================= To know more about the Vulkan buffer types, please refer to the Vulkan spec -`13.1 Descriptor Types `_. +`14.1 Descriptor Types `_. Memory layout rules ~~~~~~~~~~~~~~~~~~~ @@ -1004,7 +1004,7 @@ right now: 1. Vector-relaxed OpenGL ``std140`` for uniform buffers and vector-relaxed OpenGL ``std430`` for storage buffers: these rules satisfy Vulkan `"Standard - Uniform Buffer Layout" and "Standard Storage Buffer Layout" `_, + Uniform Buffer Layout" and "Standard Storage Buffer Layout" `_, respectively. They are the default. 2. DirectX memory layout rules for uniform buffers and storage buffers: @@ -1027,7 +1027,7 @@ In the above, "vector-relaxed OpenGL ``std140``/``std430``" rules mean OpenGL alignment: 1. The alignment of a vector type is set to be the alignment of its element type -2. If the above causes an `improper straddle `_, +2. If the above causes an `improper straddle `_, the alignment will be set to 16 bytes. As an exmaple, for the following HLSL definition: @@ -1471,7 +1471,7 @@ Without hints from the developer, the compiler will try its best to map semantics to ``Location`` numbers. However, there is no single rule for this mapping; semantic strings should be handled case by case. -Firstly, under certain `SigPoints `_, +Firstly, under certain `SigPoints `_, some system-value (SV) semantic strings will be translated into SPIR-V ``BuiltIn`` decorations: @@ -1655,7 +1655,7 @@ some system-value (SV) semantic strings will be translated into SPIR-V | +-------------+----------------------------------------+-----------------------+-----------------------------+ | | MSOut | ``PrimitiveShadingRateKHR`` | N/A | ``FragmentShadingRate`` | +---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+ -| SV_CullPrimitive | MSOut | ``CullPrimitiveEXT`` | N/A | ``MeshShadingEXT `` | +| SV_CullPrimitive | MSOut | ``CullPrimitiveEXT`` | N/A | ``MeshShadingEXT`` | +---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+ @@ -3596,8 +3596,8 @@ Mesh and Amplification Shaders | Amplification shaders corresponds to Task Shaders in Vulkan. | | Refer to following HLSL and SPIR-V specs for details: -| https://docs.microsoft.com/ -| https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/NV/SPV_NV_mesh_shader.asciidoc +| https://microsoft.github.io/DirectX-Specs/d3d/MeshShader.html +| https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_mesh_shader.asciidoc | | This section describes how Mesh and Amplification shaders are translated to SPIR-V for Vulkan. @@ -3704,8 +3704,8 @@ Raytracing in Vulkan and SPIRV | SPIR-V codegen is currently supported for NVIDIA platforms via SPV_NV_ray_tracing extension or | on other platforms via provisional cross vendor SPV_KHR_ray_tracing extension. | SPIR-V specification for reference: -| https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/NV/SPV_NV_ray_tracing.asciidoc -| https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/KHR/SPV_KHR_ray_tracing.asciidoc +| https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_ray_tracing.asciidoc +| https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/KHR/SPV_KHR_ray_tracing.asciidoc | Vulkan ray tracing samples: | https://developer.nvidia.com/rtx/raytracing/vkray @@ -3868,7 +3868,7 @@ Ray Query in SPIRV ~~~~~~~~~~~~~~~~~~ RayQuery SPIR-V codegen is currently supported via SPV_KHR_ray_query extension SPIR-V specification for reference: -https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/KHR/SPV_KHR_ray_query.asciidoc +https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/KHR/SPV_KHR_ray_query.asciidoc Object Type ~~~~~~~~~~~ @@ -4081,7 +4081,7 @@ This intrinsic funcion has the following signature: uint64_t ReadClock(in uint scope); -It translates to performing ``OpReadClockKHR`` defined in `VK_KHR_shader_clock `_. +It translates to performing ``OpReadClockKHR`` defined in `VK_KHR_shader_clock `_. One can use the predefined scopes in the ``vk`` namepsace to specify the scope argument. For example: @@ -4091,11 +4091,11 @@ For example: RawBufferLoad and RawBufferStore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The Vulkan extension `VK_KHR_buffer_device_address `_ +The Vulkan extension `VK_KHR_buffer_device_address `_ supports getting the 64-bit address of a buffer and passing it to SPIR-V as a Uniform buffer. SPIR-V can use the address to load and store data without a descriptor. We add the following intrinsic functions to expose a subset of the -`VK_KHR_buffer_device_address `_ +`VK_KHR_buffer_device_address `_ and `SPV_KHR_physical_storage_buffer `_ functionality to HLSL: From 909c552458acdcfeddf077e790d419a15ca8a3b6 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Mon, 17 Mar 2025 08:57:54 -0700 Subject: [PATCH 29/88] Create new raw buffer load lowering function (#7144) Disentangles the raw, structured, and typed buffer lowering implementations into an isolated function. Alters the various places that lowering took place to call into the common function. The Load lowering takes place in a few phases now. The basic information about the load is gathered as part of the ResLoadHelper constructor. One variant extracts most of this information from a call instruction. The other sets a lot of things such as offsets more explicitly, usually for subscripted or matrix loads. The helper is used to assemble call instruction arguments appropriate for the call. The call is issued possibly repeatedly for raw buffers of types greater than 4 elements. The results are then packaged and converted from memory storage type into a vector of register types. When raw buffers use a templated load with a struct, they reuse the subscript path also used for subscripted structured buffers. Such loads with structs containing vectors or matrices will invoke the load lowering from within this recursive call that traverses GEPs and other users of the original call to set up correct offsets etc. This adapts that code to use the common load lowering that enables long vectors within structs to be correctly loaded. This requires the ability to override the type used by the resloadhelper explicitly, so a member is added to accommodate the matrices vector representation that doesn't match the types of the load call. This also requires removing the bufIdx and offset swapping that was done, confusingly throughout the TranslateStructBufSubscriptUser code to account for the fact that byte address buffers have to represent offsets using the main coord parameter in favor of passing the Resource Kind down such that the right parameter can receive the incrementation when necessary for longer types such as matrices. This is enabled also by adding ResKind appropriate offset calculation in the ResLoadHelper. ResLoadHelper also gets an opcode set based on the ResKind for both overloads in preparation for further expansion to different resource kinds. Adds filecheck, verify, and IR pass tests. Lays groundwork for #7118 --- lib/HLSL/HLOperationLower.cpp | 765 ++++++++---------- .../intrinsics/buffer-agg-load-stores.hlsl | 36 +- .../buffer-load-stores-scalars.hlsl | 162 ++++ .../hlsl/intrinsics/buffer-load-stores.hlsl | 189 ++++- .../hlsl/intrinsics/buffer-load.hlsl | 152 ++++ .../hlsl/intrinsics/buffer-load.ll | 404 +++++++++ .../hlsl/intrinsics/buffer-typed-load.hlsl | 112 +++ .../hlsl/intrinsics/buffer-typed-load.ll | 346 ++++++++ 8 files changed, 1722 insertions(+), 444 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.ll create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.ll diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index bc293357d6..9c3ad76b92 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -2985,23 +2985,6 @@ static Value *ScalarizeResRet(Type *RetTy, Value *ResRet, return retVal; } -static Value *ScalarizeElements(Type *RetTy, ArrayRef Elts, - IRBuilder<> &Builder) { - // Extract value part. - Value *retVal = llvm::UndefValue::get(RetTy); - if (RetTy->isVectorTy()) { - unsigned vecSize = RetTy->getVectorNumElements(); - DXASSERT(vecSize <= Elts.size(), "vector size mismatch"); - for (unsigned i = 0; i < vecSize; i++) { - Value *retComp = Elts[i]; - retVal = Builder.CreateInsertElement(retVal, retComp, i); - } - } else { - retVal = Elts[0]; - } - return retVal; -} - void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder, hlsl::OP *hlslOp) { if (status && !isa(status)) { @@ -3941,14 +3924,36 @@ TranslateWriteSamplerFeedback(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, } // Load/Store intrinsics. +OP::OpCode LoadOpFromResKind(DxilResource::Kind RK) { + switch (RK) { + case DxilResource::Kind::RawBuffer: + case DxilResource::Kind::StructuredBuffer: + return OP::OpCode::RawBufferLoad; + case DxilResource::Kind::TypedBuffer: + return OP::OpCode::BufferLoad; + case DxilResource::Kind::Invalid: + DXASSERT(0, "invalid resource kind"); + break; + default: + return OP::OpCode::TextureLoad; + } + return OP::OpCode::TextureLoad; +} + struct ResLoadHelper { + // Default constructor uses CI load intrinsic call + // to get the retval and various location indicators. ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC, - Value *h, IntrinsicOp IOP, bool bForSubscript = false); - // For double subscript. - ResLoadHelper(Instruction *ldInst, Value *h, Value *idx, Value *mip) - : opcode(OP::OpCode::TextureLoad), - intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(ldInst), - addr(idx), offset(nullptr), status(nullptr), mipLevel(mip) {} + Value *h, IntrinsicOp IOP, LoadInst *TyBufSubLoad = nullptr); + // Alternative constructor explicitly sets the index. + // Used for some subscript operators that feed the generic HL call inst + // into a load op and by the matrixload call instruction. + ResLoadHelper(Instruction *Inst, DxilResource::Kind RK, Value *h, Value *idx, + Value *Offset, Value *mip = nullptr) + : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst), + addr(idx), offset(Offset), status(nullptr), mipLevel(mip) { + opcode = LoadOpFromResKind(RK); + } OP::OpCode opcode; IntrinsicOp intrinsicOpCode; unsigned dxilMajor; @@ -3961,122 +3966,85 @@ struct ResLoadHelper { Value *mipLevel; }; +// Uses CI arguments to determine the index, offset, and mipLevel also depending +// on the RK/RC resource kind and class, which determine the opcode. +// Handle and IOP are set explicitly. +// For typed buffer loads, the call instruction feeds into a load +// represented by TyBufSubLoad which determines the instruction to replace. +// Otherwise, CI is replaced. ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC, Value *hdl, - IntrinsicOp IOP, bool bForSubscript) + IntrinsicOp IOP, LoadInst *TyBufSubLoad) : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) { - switch (RK) { - case DxilResource::Kind::RawBuffer: - case DxilResource::Kind::StructuredBuffer: - opcode = OP::OpCode::RawBufferLoad; - break; - case DxilResource::Kind::TypedBuffer: - opcode = OP::OpCode::BufferLoad; - break; - case DxilResource::Kind::Invalid: - DXASSERT(0, "invalid resource kind"); - break; - default: - opcode = OP::OpCode::TextureLoad; - break; - } - retVal = CI; + opcode = LoadOpFromResKind(RK); + bool bForSubscript = false; + if (TyBufSubLoad) { + bForSubscript = true; + retVal = TyBufSubLoad; + } else + retVal = CI; const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx; addr = CI->getArgOperand(kAddrIdx); unsigned argc = CI->getNumArgOperands(); + Type *i32Ty = Type::getInt32Ty(CI->getContext()); + unsigned StatusIdx = HLOperandIndex::kBufLoadStatusOpIdx; + unsigned OffsetIdx = HLOperandIndex::kInvalidIdx; if (opcode == OP::OpCode::TextureLoad) { - // mip at last channel - unsigned coordSize = DxilResource::GetNumCoords(RK); - - if (RC == DxilResourceBase::Class::SRV) { - if (bForSubscript) { - // Use 0 when access by []. - mipLevel = IRBuilder<>(CI).getInt32(0); - } else { - if (coordSize == 1 && !addr->getType()->isVectorTy()) { - // Use addr when access by Load. - mipLevel = addr; - } else { - mipLevel = IRBuilder<>(CI).CreateExtractElement(addr, coordSize); - } - } - } else { - // Set mip level to undef for UAV. - mipLevel = UndefValue::get(Type::getInt32Ty(addr->getContext())); - } - - if (RC == DxilResourceBase::Class::SRV) { - unsigned offsetIdx = HLOperandIndex::kTexLoadOffsetOpIdx; - unsigned statusIdx = HLOperandIndex::kTexLoadStatusOpIdx; - if (RK == DxilResource::Kind::Texture2DMS || - RK == DxilResource::Kind::Texture2DMSArray) { - offsetIdx = HLOperandIndex::kTex2DMSLoadOffsetOpIdx; - statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx; + bool IsMS = (RK == DxilResource::Kind::Texture2DMS || + RK == DxilResource::Kind::Texture2DMSArray); + // Set mip and status index. + offset = UndefValue::get(i32Ty); + if (IsMS) { + // Retrieve appropriate MS parameters. + StatusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx; + // MS textures keep the sample param (mipLevel) regardless of writability. + if (bForSubscript) + mipLevel = ConstantInt::get(i32Ty, 0); + else mipLevel = CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx); - } - - if (argc > offsetIdx) - offset = CI->getArgOperand(offsetIdx); - - if (argc > statusIdx) - status = CI->getArgOperand(statusIdx); - } else if (RC == DxilResourceBase::Class::UAV && - (RK == DxilResource::Kind::Texture2DMS || - RK == DxilResource::Kind::Texture2DMSArray)) { - unsigned statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx; - mipLevel = CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx); - - if (argc > statusIdx) - status = CI->getArgOperand(statusIdx); - + } else if (RC == DxilResourceBase::Class::UAV) { + // DXIL requires that non-MS UAV accesses set miplevel to undef. + mipLevel = UndefValue::get(i32Ty); + StatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx; } else { - const unsigned kStatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx; - - if (argc > kStatusIdx) - status = CI->getArgOperand(kStatusIdx); + // Non-MS SRV case. + StatusIdx = HLOperandIndex::kTexLoadStatusOpIdx; + if (bForSubscript) + // Having no miplevel param, single subscripted SRVs default to 0. + mipLevel = ConstantInt::get(i32Ty, 0); + else + // Mip is stored at the last channel of the coordinate vector. + mipLevel = IRBuilder<>(CI).CreateExtractElement( + addr, DxilResource::GetNumCoords(RK)); } - } else { - const unsigned kStatusIdx = HLOperandIndex::kBufLoadStatusOpIdx; - if (argc > kStatusIdx) - status = CI->getArgOperand(kStatusIdx); - } + if (RC == DxilResourceBase::Class::SRV) + OffsetIdx = IsMS ? HLOperandIndex::kTex2DMSLoadOffsetOpIdx + : HLOperandIndex::kTexLoadOffsetOpIdx; + } + + // Set offset. + if (DXIL::IsStructuredBuffer(RK)) + // Structured buffers receive no exterior offset in this constructor, + // but may need to increment it later. + offset = ConstantInt::get(i32Ty, 0U); + else if (argc > OffsetIdx) + // Textures may set the offset from an explicit argument. + offset = CI->getArgOperand(OffsetIdx); + else + // All other cases use undef. + offset = UndefValue::get(i32Ty); + + // Retrieve status value if provided. + if (argc > StatusIdx) + status = CI->getArgOperand(StatusIdx); } void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status, hlsl::OP *OP, HLResource::Kind RK, const DataLayout &DL); -// Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi } -void Make64bitResultForLoad(Type *EltTy, ArrayRef resultElts32, - unsigned size, MutableArrayRef resultElts, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { - Type *i64Ty = Builder.getInt64Ty(); - Type *doubleTy = Builder.getDoubleTy(); - if (EltTy == doubleTy) { - Function *makeDouble = - hlslOP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy); - Value *makeDoubleOpArg = - Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble); - for (unsigned i = 0; i < size; i++) { - Value *lo = resultElts32[2 * i]; - Value *hi = resultElts32[2 * i + 1]; - Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi}); - resultElts[i] = V; - } - } else { - for (unsigned i = 0; i < size; i++) { - Value *lo = resultElts32[2 * i]; - Value *hi = resultElts32[2 * i + 1]; - lo = Builder.CreateZExt(lo, i64Ty); - hi = Builder.CreateZExt(hi, i64Ty); - hi = Builder.CreateShl(hi, 32); - resultElts[i] = Builder.CreateOr(lo, hi); - } - } -} - static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents, hlsl::OP *OP) { unsigned mask = 0; @@ -4108,183 +4076,194 @@ Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset, IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment); -static Value *TranslateRawBufVecLd(Type *VecEltTy, unsigned VecElemCount, - IRBuilder<> &Builder, Value *handle, - hlsl::OP *OP, Value *status, Value *bufIdx, - Value *baseOffset, const DataLayout &DL, - std::vector &bufLds, - unsigned baseAlign, bool isScalarTy = false); - -void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK, - IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) { - - Type *Ty = helper.retVal->getType(); - if (Ty->isPointerTy()) { - DXASSERT(!DxilResource::IsAnyTexture(RK), - "Textures should not be treated as structured buffers."); - TranslateStructBufSubscript(cast(helper.retVal), helper.handle, - helper.status, OP, RK, DL); - return; - } - +// Sets up arguments for buffer load call. +static SmallVector GetBufLoadArgs(ResLoadHelper helper, + HLResource::Kind RK, + IRBuilder<> Builder, Type *EltTy, + unsigned LdSize) { OP::OpCode opcode = helper.opcode; + llvm::Constant *opArg = Builder.getInt32((uint32_t)opcode); - Type *i32Ty = Builder.getInt32Ty(); - Type *i64Ty = Builder.getInt64Ty(); - Type *doubleTy = Builder.getDoubleTy(); - Type *EltTy = Ty->getScalarType(); - unsigned numComponents = 1; - if (Ty->isVectorTy()) { - numComponents = Ty->getVectorNumElements(); - } - - if (DXIL::IsStructuredBuffer(RK) || DXIL::IsRawBuffer(RK)) { - std::vector bufLds; - const bool isBool = EltTy->isIntegerTy(1); + unsigned alignment = RK == DxilResource::Kind::RawBuffer ? 4U : 8U; + alignment = std::min(alignment, LdSize); + Constant *alignmentVal = Builder.getInt32(alignment); - // Bool are represented as i32 in memory - Type *MemReprTy = isBool ? Builder.getInt32Ty() : EltTy; - bool isScalarTy = !Ty->isVectorTy(); - - Value *retValNew = nullptr; - if (DXIL::IsStructuredBuffer(RK)) { - retValNew = TranslateRawBufVecLd( - MemReprTy, numComponents, Builder, helper.handle, OP, helper.status, - helper.addr, OP->GetU32Const(0), DL, bufLds, - /*baseAlign (in bytes)*/ 8, isScalarTy); - } else { - retValNew = - TranslateRawBufVecLd(MemReprTy, numComponents, Builder, helper.handle, - OP, helper.status, nullptr, helper.addr, DL, - bufLds, /*baseAlign (in bytes)*/ 4, isScalarTy); - } + // Assemble args specific to the type bab/struct/typed: + // - Typed needs to handle the possibility of vector coords + // - Raws need to calculate alignment and mask values. + SmallVector Args; + Args.emplace_back(opArg); // opcode @0. + Args.emplace_back(helper.handle); // Resource handle @1 - DXASSERT_NOMSG(!bufLds.empty()); - dxilutil::MigrateDebugValue(helper.retVal, bufLds.front()); + // Set offsets appropriate for the load operation. + bool isVectorAddr = helper.addr->getType()->isVectorTy(); + if (opcode == OP::OpCode::TextureLoad) { + llvm::Value *undefI = llvm::UndefValue::get(Builder.getInt32Ty()); - if (isBool) { - // Convert result back to register representation. - retValNew = Builder.CreateICmpNE( - retValNew, Constant::getNullValue(retValNew->getType())); + // Set mip level or sample for MS texutures @2. + Args.emplace_back(helper.mipLevel); + // Set texture coords according to resource kind @3-5 + // Coords unused by the resource kind are undefs. + unsigned coordSize = DxilResource::GetNumCoords(RK); + for (unsigned i = 0; i < 3; i++) + if (i < coordSize) + Args.emplace_back(isVectorAddr + ? Builder.CreateExtractElement(helper.addr, i) + : helper.addr); + else + Args.emplace_back(undefI); + + // Set texture offsets according to resource kind @7-9 + // Coords unused by the resource kind are undefs. + unsigned offsetSize = DxilResource::GetNumOffsets(RK); + if (!helper.offset || isa(helper.offset)) + offsetSize = 0; + for (unsigned i = 0; i < 3; i++) + if (i < offsetSize) + Args.emplace_back(Builder.CreateExtractElement(helper.offset, i)); + else + Args.emplace_back(undefI); + } else { + // If not TextureLoad, it could be a typed or raw buffer load. + // They have mostly similar arguments. + DXASSERT(opcode == OP::OpCode::RawBufferLoad || + opcode == OP::OpCode::BufferLoad, + "Wrong opcode in get load args"); + Args.emplace_back( + isVectorAddr ? Builder.CreateExtractElement(helper.addr, (uint64_t)0) + : helper.addr); + Args.emplace_back(helper.offset); + if (opcode == OP::OpCode::RawBufferLoad) { + // Unlike typed buffer load, raw buffer load has mask and alignment. + Args.emplace_back(nullptr); // Mask will be added later %4. + Args.emplace_back(alignmentVal); // alignment @5. } - - helper.retVal->replaceAllUsesWith(retValNew); - helper.retVal = retValNew; - return; - } - - bool isTyped = opcode == OP::OpCode::TextureLoad || - RK == DxilResource::Kind::TypedBuffer; - bool is64 = EltTy == i64Ty || EltTy == doubleTy; - if (is64 && isTyped) { - EltTy = i32Ty; - } - bool isBool = EltTy->isIntegerTy(1); - if (isBool) { - // Value will be loaded in its memory representation. - EltTy = i32Ty; - if (Ty->isVectorTy()) - Ty = VectorType::get(EltTy, numComponents); } + return Args; +} - Function *F = OP->GetOpFunc(opcode, EltTy); - llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode); - - llvm::Value *undefI = llvm::UndefValue::get(i32Ty); +// Emits as many calls as needed to load the full vector +// Performs any needed extractions and conversions of the results. +Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, + IRBuilder<> &Builder, hlsl::OP *OP, + const DataLayout &DL) { + OP::OpCode opcode = helper.opcode; + Type *Ty = helper.retVal->getType(); - SmallVector loadArgs; - loadArgs.emplace_back(opArg); // opcode - loadArgs.emplace_back(helper.handle); // resource handle + unsigned NumComponents = 1; + if (Ty->isVectorTy()) + NumComponents = Ty->getVectorNumElements(); - if (opcode == OP::OpCode::TextureLoad) { - // set mip level - loadArgs.emplace_back(helper.mipLevel); - } + const bool isTyped = DXIL::IsTyped(RK); + Type *EltTy = Ty->getScalarType(); + const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy()); + const bool isBool = EltTy->isIntegerTy(1); + // Values will be loaded in memory representations. + if (isBool || (is64 && isTyped)) + EltTy = Builder.getInt32Ty(); - if (opcode == OP::OpCode::TextureLoad) { - // texture coord - unsigned coordSize = DxilResource::GetNumCoords(RK); - bool isVectorAddr = helper.addr->getType()->isVectorTy(); - for (unsigned i = 0; i < 3; i++) { - if (i < coordSize) { - loadArgs.emplace_back(isVectorAddr - ? Builder.CreateExtractElement(helper.addr, i) - : helper.addr); - } else - loadArgs.emplace_back(undefI); + // 64-bit types are stored as int32 pairs in typed buffers. + if (is64 && isTyped) { + DXASSERT(NumComponents <= 2, "Typed buffers only allow 4 dwords."); + NumComponents *= 2; + } + + unsigned LdSize = DL.getTypeAllocSize(EltTy); + + SmallVector Elts(NumComponents); + + SmallVector Args = + GetBufLoadArgs(helper, RK, Builder, EltTy, LdSize); + + // Keep track of the first load for debug info migration. + Value *FirstLd = nullptr; + + unsigned OffsetIdx = 0; + if (RK == DxilResource::Kind::RawBuffer) + // Raw buffers can't use offset param. Add to coord index. + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx; + else if (RK == DxilResource::Kind::StructuredBuffer) + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx; + + // Create calls to function object. + // Typed buffer loads are limited to one load of up to 4 32-bit values. + // Raw buffer loads might need multiple loads in chunks of 4. + for (unsigned i = 0; i < NumComponents;) { + // Load 4 elements or however many less than 4 are left to load. + unsigned chunkSize = std::min(NumComponents - i, 4U); + + // Assign mask for raw buffer loads. + if (opcode == OP::OpCode::RawBufferLoad) { + Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] = + GetRawBufferMaskForETy(EltTy, chunkSize, OP); + // If we've loaded a chunk already, update offset to next chunk. + if (FirstLd != nullptr && opcode == OP::OpCode::RawBufferLoad) + Args[OffsetIdx] = + Builder.CreateAdd(Args[OffsetIdx], OP->GetU32Const(4 * LdSize)); } - } else { - if (helper.addr->getType()->isVectorTy()) { - Value *scalarOffset = - Builder.CreateExtractElement(helper.addr, (uint64_t)0); - - // TODO: calculate the real address based on opcode - loadArgs.emplace_back(scalarOffset); // offset - } else { - // TODO: calculate the real address based on opcode - - loadArgs.emplace_back(helper.addr); // offset - } - } - // offset 0 - if (opcode == OP::OpCode::TextureLoad) { - if (helper.offset && !isa(helper.offset)) { - unsigned offsetSize = DxilResource::GetNumOffsets(RK); - for (unsigned i = 0; i < 3; i++) { - if (i < offsetSize) - loadArgs.emplace_back(Builder.CreateExtractElement(helper.offset, i)); - else - loadArgs.emplace_back(undefI); + Function *F = OP->GetOpFunc(opcode, EltTy); + Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode)); + + // Extract elements from returned ResRet. + for (unsigned j = 0; j < chunkSize; j++, i++) + Elts[i] = Builder.CreateExtractValue(Ld, j); + + // Update status. + UpdateStatus(Ld, helper.status, Builder, OP); + + if (!FirstLd) + FirstLd = Ld; + } + DXASSERT(FirstLd, "No loads created by TranslateBufLoad"); + + // Convert loaded 32-bit integers to intended 64-bit type representation. + if (isTyped) { + Type *RegEltTy = Ty->getScalarType(); + if (RegEltTy->isDoubleTy()) { + Function *makeDouble = OP->GetOpFunc(DXIL::OpCode::MakeDouble, RegEltTy); + Value *makeDoubleOpArg = + Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble); + NumComponents /= 2; // Convert back to number of doubles. + for (unsigned i = 0; i < NumComponents; i++) { + Value *lo = Elts[2 * i]; + Value *hi = Elts[2 * i + 1]; + Elts[i] = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi}); } - } else { - loadArgs.emplace_back(undefI); - loadArgs.emplace_back(undefI); - loadArgs.emplace_back(undefI); + EltTy = RegEltTy; + } else if (RegEltTy->isIntegerTy(64)) { + NumComponents /= 2; // Convert back to number of int64s. + for (unsigned i = 0; i < NumComponents; i++) { + Value *lo = Elts[2 * i]; + Value *hi = Elts[2 * i + 1]; + lo = Builder.CreateZExt(lo, RegEltTy); + hi = Builder.CreateZExt(hi, RegEltTy); + hi = Builder.CreateShl(hi, 32); + Elts[i] = Builder.CreateOr(lo, hi); + } + EltTy = RegEltTy; } } - // Offset 1 - if (RK == DxilResource::Kind::TypedBuffer) { - loadArgs.emplace_back(undefI); - } - - Value *ResRet = Builder.CreateCall(F, loadArgs, OP->GetOpCodeName(opcode)); - dxilutil::MigrateDebugValue(helper.retVal, ResRet); - + // Package elements into a vector. Value *retValNew = nullptr; - if (!is64 || !isTyped) { - retValNew = ScalarizeResRet(Ty, ResRet, Builder); + if (!Ty->isVectorTy()) { + retValNew = Elts[0]; } else { - unsigned size = numComponents; - DXASSERT(size <= 2, "typed buffer only allow 4 dwords"); - EltTy = Ty->getScalarType(); - Value *Elts[2]; - - Make64bitResultForLoad(Ty->getScalarType(), - { - Builder.CreateExtractValue(ResRet, 0), - Builder.CreateExtractValue(ResRet, 1), - Builder.CreateExtractValue(ResRet, 2), - Builder.CreateExtractValue(ResRet, 3), - }, - size, Elts, OP, Builder); - - retValNew = ScalarizeElements(Ty, Elts, Builder); + retValNew = UndefValue::get(VectorType::get(EltTy, NumComponents)); + for (unsigned i = 0; i < NumComponents; i++) + retValNew = Builder.CreateInsertElement(retValNew, Elts[i], i); } - if (isBool) { - // Convert result back to register representation. + // Convert loaded int32 bool results to i1 register representation. + if (isBool) retValNew = Builder.CreateICmpNE( retValNew, Constant::getNullValue(retValNew->getType())); - } - // replace helper.retVal->replaceAllUsesWith(retValNew); - // Save new ret val. helper.retVal = retValNew; - // get status - UpdateStatus(ResRet, helper.status, Builder, OP); + + return FirstLd; } Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -4292,6 +4271,7 @@ Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; + DataLayout &DL = helper.dataLayout; Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); IRBuilder<> Builder(CI); @@ -4299,9 +4279,19 @@ Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, DXIL::ResourceClass RC = pObjHelper->GetRC(handle); DXIL::ResourceKind RK = pObjHelper->GetRK(handle); - ResLoadHelper loadHelper(CI, RK, RC, handle, IOP); - TranslateLoad(loadHelper, RK, Builder, hlslOP, helper.dataLayout); - // CI is replaced in TranslateLoad. + ResLoadHelper ldHelper(CI, RK, RC, handle, IOP); + Type *Ty = CI->getType(); + Value *Ld = nullptr; + if (Ty->isPointerTy()) { + DXASSERT(!DxilResource::IsAnyTexture(RK), + "Textures should not be treated as structured buffers."); + TranslateStructBufSubscript(cast(ldHelper.retVal), handle, + ldHelper.status, hlslOP, RK, DL); + } else { + Ld = TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL); + dxilutil::MigrateDebugValue(CI, Ld); + } + // CI is replaced by above translation calls.. return nullptr; } @@ -7887,69 +7877,21 @@ void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset, Builder.CreateCall(dxilF, Args); } -static Value *TranslateRawBufVecLd(Type *VecEltTy, unsigned ElemCount, - IRBuilder<> &Builder, Value *handle, - hlsl::OP *OP, Value *status, Value *bufIdx, - Value *baseOffset, const DataLayout &DL, - std::vector &bufLds, - unsigned baseAlign, bool isScalarTy) { - - unsigned EltSize = DL.getTypeAllocSize(VecEltTy); - unsigned alignment = std::min(baseAlign, EltSize); - Constant *alignmentVal = OP->GetI32Const(alignment); - - if (baseOffset == nullptr) { - baseOffset = OP->GetU32Const(0); - } - - std::vector elts(ElemCount); - unsigned rest = (ElemCount % 4); - for (unsigned i = 0; i < ElemCount - rest; i += 4) { - Value *ResultElts[4]; - Value *bufLd = - GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy, - ResultElts, OP, Builder, 4, alignmentVal); - bufLds.emplace_back(bufLd); - elts[i] = ResultElts[0]; - elts[i + 1] = ResultElts[1]; - elts[i + 2] = ResultElts[2]; - elts[i + 3] = ResultElts[3]; - - baseOffset = Builder.CreateAdd(baseOffset, OP->GetU32Const(4 * EltSize)); - } - - if (rest) { - Value *ResultElts[4]; - Value *bufLd = - GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy, - ResultElts, OP, Builder, rest, alignmentVal); - bufLds.emplace_back(bufLd); - for (unsigned i = 0; i < rest; i++) - elts[ElemCount - rest + i] = ResultElts[i]; - } - - // If the expected return type is scalar then skip building a vector - if (isScalarTy) { - return elts[0]; - } - - Value *Vec = HLMatrixLower::BuildVector(VecEltTy, elts, Builder); - return Vec; -} - -Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder, - Value *handle, hlsl::OP *OP, Value *status, - Value *bufIdx, Value *baseOffset, +Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder, + Value *handle, HLResource::Kind RK, hlsl::OP *OP, + Value *status, Value *bufIdx, Value *baseOffset, const DataLayout &DL) { + + ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset); +#ifndef NDEBUG + Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx); + Type *matType = ptr->getType()->getPointerElementType(); HLMatrixType MatTy = HLMatrixType::cast(matType); - Type *EltTy = MatTy.getElementTypeForMem(); - unsigned matSize = MatTy.getNumElements(); - std::vector bufLds; - Value *Vec = - TranslateRawBufVecLd(EltTy, matSize, Builder, handle, OP, status, bufIdx, - baseOffset, DL, bufLds, /*baseAlign (in bytes)*/ 8); - Vec = MatTy.emitLoweredMemToReg(Vec, Builder); - return Vec; + DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) == + helper.retVal->getType(), + "helper type should match vectorized matrix"); +#endif + return TranslateBufLoad(helper, RK, Builder, OP, DL); } void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle, @@ -7991,9 +7933,9 @@ void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle, } } -void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP, - Value *status, Value *bufIdx, Value *baseOffset, - const DataLayout &DL) { +void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, HLResource::Kind RK, + hlsl::OP *OP, Value *status, Value *bufIdx, + Value *baseOffset, const DataLayout &DL) { IRBuilder<> Builder(CI); HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()); unsigned opcode = GetHLOpcode(CI); @@ -8006,13 +7948,10 @@ void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP, // orientation. switch (matOp) { case HLMatLoadStoreOpcode::RowMatLoad: - case HLMatLoadStoreOpcode::ColMatLoad: { - Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx); - Value *NewLd = TranslateStructBufMatLd( - ptr->getType()->getPointerElementType(), Builder, handle, OP, status, - bufIdx, baseOffset, DL); - CI->replaceAllUsesWith(NewLd); - } break; + case HLMatLoadStoreOpcode::ColMatLoad: + TranslateStructBufMatLd(CI, Builder, handle, RK, OP, status, bufIdx, + baseOffset, DL); + break; case HLMatLoadStoreOpcode::RowMatStore: case HLMatLoadStoreOpcode::ColMatStore: { Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx); @@ -8283,57 +8222,47 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle, } userCall->eraseFromParent(); } else if (group == HLOpcodeGroup::HLMatLoadStore) - TranslateStructBufMatLdSt(userCall, handle, OP, status, bufIdx, + // Load/Store matrix within a struct + TranslateStructBufMatLdSt(userCall, handle, ResKind, OP, status, bufIdx, baseOffset, DL); else if (group == HLOpcodeGroup::HLSubscript) { + // Subscript of matrix within a struct TranslateStructBufMatSubscript(userCall, handle, ResKind, bufIdx, baseOffset, status, OP, DL); } - } else if (isa(user) || isa(user)) { - LoadInst *LdInst = dyn_cast(user); - StoreInst *StInst = dyn_cast(user); - - Type *Ty = isa(user) ? LdInst->getType() - : StInst->getValueOperand()->getType(); + } else if (LoadInst *LdInst = dyn_cast(user)) { + // Load of scalar/vector within a struct or structured raw load. + ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset); + TranslateBufLoad(helper, ResKind, Builder, OP, DL); + + LdInst->eraseFromParent(); + } else if (StoreInst *StInst = dyn_cast(user)) { + // Store of scalar/vector within a struct or structured raw store. + Type *Ty = StInst->getValueOperand()->getType(); Type *pOverloadTy = Ty->getScalarType(); - Value *Offset = baseOffset; + Value *offset = baseOffset; - if (LdInst) { - unsigned NumComponents = 0; - if (VectorType *VTy = dyn_cast(Ty)) - NumComponents = VTy->getNumElements(); - else - NumComponents = 1; - Value *ResultElts[4]; - Constant *Alignment = - OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType())); - GenerateRawBufLd(handle, bufIdx, Offset, status, pOverloadTy, ResultElts, - OP, Builder, NumComponents, Alignment); - Value *NewLd = ScalarizeElements(Ty, ResultElts, Builder); - LdInst->replaceAllUsesWith(NewLd); - } else { - Value *val = StInst->getValueOperand(); - Value *undefVal = llvm::UndefValue::get(pOverloadTy); - Value *vals[] = {undefVal, undefVal, undefVal, undefVal}; - uint8_t mask = 0; - if (Ty->isVectorTy()) { - unsigned vectorNumElements = Ty->getVectorNumElements(); - DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector"); - assert(vectorNumElements <= 4); - for (unsigned i = 0; i < vectorNumElements; i++) { - vals[i] = Builder.CreateExtractElement(val, i); - mask |= (1 << i); - } - } else { - vals[0] = val; - mask = DXIL::kCompMask_X; + Value *val = StInst->getValueOperand(); + Value *undefVal = llvm::UndefValue::get(pOverloadTy); + Value *vals[] = {undefVal, undefVal, undefVal, undefVal}; + uint8_t mask = 0; + if (Ty->isVectorTy()) { + unsigned vectorNumElements = Ty->getVectorNumElements(); + DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector"); + assert(vectorNumElements <= 4); + for (unsigned i = 0; i < vectorNumElements; i++) { + vals[i] = Builder.CreateExtractElement(val, i); + mask |= (1 << i); } - Constant *alignment = - OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType())); - GenerateStructBufSt(handle, bufIdx, Offset, pOverloadTy, OP, Builder, - vals, mask, alignment); + } else { + vals[0] = val; + mask = DXIL::kCompMask_X; } - user->eraseFromParent(); + Constant *alignment = + OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType())); + GenerateStructBufSt(handle, bufIdx, offset, pOverloadTy, OP, Builder, vals, + mask, alignment); + StInst->eraseFromParent(); } else if (BitCastInst *BCI = dyn_cast(user)) { // Recurse users for (auto U = BCI->user_begin(); U != BCI->user_end();) { @@ -8368,13 +8297,18 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle, DXASSERT_LOCALVAR(Ty, offset->getType() == Type::getInt32Ty(Ty->getContext()), "else bitness is wrong"); - offset = Builder.CreateAdd(offset, baseOffset); + // No offset into element for Raw buffers; byte offset is in bufIdx. + if (DXIL::IsRawBuffer(ResKind)) + bufIdx = Builder.CreateAdd(offset, bufIdx); + else + baseOffset = Builder.CreateAdd(offset, baseOffset); for (auto U = GEP->user_begin(); U != GEP->user_end();) { Value *GEPUser = *(U++); TranslateStructBufSubscriptUser(cast(GEPUser), handle, - ResKind, bufIdx, offset, status, OP, DL); + ResKind, bufIdx, baseOffset, status, OP, + DL); } // delete the inst GEP->eraseFromParent(); @@ -8388,13 +8322,12 @@ void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status, CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx); Value *bufIdx = nullptr; Value *offset = nullptr; - if (ResKind == HLResource::Kind::RawBuffer) { - offset = subscriptIndex; - } else { + bufIdx = subscriptIndex; + if (ResKind == HLResource::Kind::RawBuffer) + offset = UndefValue::get(Type::getInt32Ty(CI->getContext())); + else // StructuredBuffer, TypedBuffer, etc. - bufIdx = subscriptIndex; offset = OP->GetU32Const(0); - } for (auto U = CI->user_begin(); U != CI->user_end();) { Value *user = *(U++); @@ -8408,19 +8341,14 @@ void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status, // HLSubscript. namespace { -Value *TranslateTypedBufLoad(CallInst *CI, DXIL::ResourceKind RK, - DXIL::ResourceClass RC, Value *handle, - LoadInst *ldInst, IRBuilder<> &Builder, - hlsl::OP *hlslOP, const DataLayout &DL) { - ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, - /*bForSubscript*/ true); - // Default sampleIdx for 2DMS textures. - if (RK == DxilResource::Kind::Texture2DMS || - RK == DxilResource::Kind::Texture2DMSArray) - ldHelper.mipLevel = hlslOP->GetU32Const(0); - // use ldInst as retVal - ldHelper.retVal = ldInst; - TranslateLoad(ldHelper, RK, Builder, hlslOP, DL); +Value *TranslateTypedBufSubscript(CallInst *CI, DXIL::ResourceKind RK, + DXIL::ResourceClass RC, Value *handle, + LoadInst *ldInst, IRBuilder<> &Builder, + hlsl::OP *hlslOP, const DataLayout &DL) { + // The arguments to the call instruction are used to determine the access, + // the return value and type come from the load instruction. + ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, ldInst); + TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL); // delete the ld ldInst->eraseFromParent(); return ldHelper.retVal; @@ -8463,9 +8391,9 @@ Value *UpdateVectorElt(Value *VecVal, Value *EltVal, Value *EltIdx, return VecVal; } -void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, - bool &Translated) { +void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx); hlsl::OP *hlslOP = &helper.hlslOP; @@ -8481,8 +8409,8 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, Instruction *I = cast(user); IRBuilder<> Builder(I); if (LoadInst *ldInst = dyn_cast(user)) { - TranslateTypedBufLoad(CI, RK, RC, handle, ldInst, Builder, hlslOP, - helper.dataLayout); + TranslateTypedBufSubscript(CI, RK, RC, handle, ldInst, Builder, hlslOP, + helper.dataLayout); } else if (StoreInst *stInst = dyn_cast(user)) { Value *val = stInst->getValueOperand(); TranslateStore(RK, handle, val, @@ -8504,7 +8432,7 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, // Generate Ld. LoadInst *tmpLd = StBuilder.CreateLoad(CI); - Value *ldVal = TranslateTypedBufLoad( + Value *ldVal = TranslateTypedBufSubscript( CI, RK, RC, handle, tmpLd, StBuilder, hlslOP, helper.dataLayout); // Update vector. ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx, @@ -8524,7 +8452,7 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, // Generate tmp vector load with vector type & translate it LoadInst *tmpLd = LdBuilder.CreateLoad(CI); - Value *ldVal = TranslateTypedBufLoad( + Value *ldVal = TranslateTypedBufSubscript( CI, RK, RC, handle, tmpLd, LdBuilder, hlslOP, helper.dataLayout); // get the single element @@ -8697,8 +8625,9 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, DXASSERT(CI->hasOneUse(), "subscript should only have one use"); IRBuilder<> Builder(CI); if (LoadInst *ldInst = dyn_cast(*U)) { - ResLoadHelper ldHelper(ldInst, handle, coord, mipLevel); - TranslateLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout); + Value *Offset = UndefValue::get(Builder.getInt32Ty()); + ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset, mipLevel); + TranslateBufLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout); ldInst->eraseFromParent(); } else { StoreInst *stInst = cast(*U); @@ -8736,7 +8665,7 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK, helper.dataLayout); else - TranslateDefaultSubscript(CI, helper, pObjHelper, Translated); + TranslateTypedBufferSubscript(CI, helper, pObjHelper, Translated); return; } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl index e6246845b3..9f7a487a05 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl @@ -3,14 +3,34 @@ // RUN: %dxc -T vs_6_6 -DETY=uint64_t -DCOLS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DETY=double -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=float1 -DCOLS=4 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=bool1 -DCOLS=4 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=uint64_t1 -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=double1 -DCOLS=2 %s | FileCheck %s + +// RUN: %dxc -T vs_6_6 -DETY=float4 -DCOLS=4 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=bool4 -DCOLS=4 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=uint64_t4 -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=double4 -DCOLS=2 %s | FileCheck %s + // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s + // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT + // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=float -DCOLS=4 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=bool -DCOLS=4 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=uint64_t -DCOLS=2 %s | FileCheck %s @@ -26,8 +46,6 @@ // for different aggregate buffer types and indices. /////////////////////////////////////////////////////////////////////// - - // CHECK: %dx.types.ResRet.[[TY:[a-z][0-9][0-9]]] = type { [[TYPE:[a-z0-9]*]], #if !defined(ATY) @@ -68,6 +86,16 @@ struct OffVector { } }; +template +struct Matrix { + matrix m; + Matrix operator+(Matrix mat) { + Matrix ret; + ret.m = m + mat.m; + return ret; + } +}; + ByteAddressBuffer RoByBuf : register(t1); RWByteAddressBuffer RwByBuf : register(u1); @@ -156,6 +184,8 @@ void main(uint ix[2] : IX) { // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[BOFF]] + // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] + // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -163,6 +193,8 @@ void main(uint ix[2] : IX) { TYPE stbElt1 SS = RwStBuf.Load(ix[0]); // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[BOFF]] + // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p4]] + // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl new file mode 100644 index 0000000000..03735cb968 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl @@ -0,0 +1,162 @@ +// RUN: %dxc -DTYPE=float -T vs_6_6 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I64 +// RUN: %dxc -DTYPE=double -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,F64 + +// RUN: %dxc -DTYPE=float1 -T vs_6_6 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool1 -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t1 -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I64 +// RUN: %dxc -DTYPE=double1 -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,F64 + +// Confirm that 6.9 doesn't use vector loads for scalars and vec1s +// RUN: %dxc -DTYPE=float -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I64 +// RUN: %dxc -DTYPE=double -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,F64 + +// RUN: %dxc -DTYPE=float1 -T vs_6_9 %s | FileCheck %s +// RUiN: %dxc -DTYPE=bool1 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t1 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I64 +// RUN: %dxc -DTYPE=double1 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,F64 + +/////////////////////////////////////////////////////////////////////// +// Test codegen for various load and store operations and conversions +// for different scalar buffer types and confirm that the proper +// loads, stores, and conversion operations take place. +/////////////////////////////////////////////////////////////////////// + + +// These -DAGs must match the same line. That is the only reason for the -DAG. +// The first match will assign [[TY]] to the native type +// For most runs, the second match will assign [[TY32]] to the same thing. +// For 64-bit types, the memory representation is i32 and a separate variable is needed. +// For these cases, there is another line that will always match i32. +// This line will also force the previous -DAGs to match the same line since the most +// This shader can produce is two ResRet types. +// CHECK-DAG: %dx.types.ResRet.[[TY:[a-z][0-9][0-9]]] = type { [[TYPE:[a-z0-9]*]], +// CHECK-DAG: %dx.types.ResRet.[[TY32:[a-z][0-9][0-9]]] = type { [[TYPE]], +// I64: %dx.types.ResRet.[[TY32:i32]] +// F64: %dx.types.ResRet.[[TY32:i32]] + + ByteAddressBuffer RoByBuf : register(t1); +RWByteAddressBuffer RwByBuf : register(u1); + + StructuredBuffer< TYPE > RoStBuf : register(t2); +RWStructuredBuffer< TYPE > RwStBuf : register(u2); + + Buffer< TYPE > RoTyBuf : register(t3); +RWBuffer< TYPE > RwTyBuf : register(u3); + +ConsumeStructuredBuffer CnStBuf : register(u4); +AppendStructuredBuffer ApStBuf : register(u5); + +void main(uint ix[2] : IX) { + // ByteAddressBuffer Tests + + // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) + // CHECK-DAG: [[HDLRWBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) + + // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false) + // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false) + + // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false) + // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false) + + // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false) + // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false) + + // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE babElt1 = RwByBuf.Load< TYPE >(ix[0]); + + // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE babElt2 = RoByBuf.Load< TYPE >(ix[0]); + + // I1: zext i1 %{{.*}} to i32 + // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] + RwByBuf.Store< TYPE >(ix[0], babElt1 + babElt2); + + // StructuredBuffer Tests + // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt1 = RwStBuf.Load(ix[0]); + // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt2 = RwStBuf[ix[1]]; + + // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt3 = RoStBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt4 = RoStBuf[ix[1]]; + + // I1: zext i1 %{{.*}} to i32 + // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] + RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + + // {Append/Consume}StructuredBuffer Tests + // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] + // CHECK: [[CONIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLCON]], i8 -1) + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE cnElt = CnStBuf.Consume(); + + // CHECK: [[ANHDLAPP:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLAPP]] + // CHECK: [[APPIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLAPP]], i8 1) + // I1: zext i1 %{{.*}} to i32 + // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]] + ApStBuf.Append(cnElt); + + // TypedBuffer Tests + // CHECK: [[ANHDLRWTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTY]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt1 = RwTyBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX1]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt2 = RwTyBuf[ix[1]]; + // CHECK: [[ANHDLROTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTY]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX0]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt3 = RoTyBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX1]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt4 = RoTyBuf[ix[1]]; + + // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 + // I64: trunc i64 %{{.*}} to i32 + // I64: lshr i64 %{{.*}}, 32 + // I64: trunc i64 %{{.*}} to i32 + // I1: zext i1 %{{.*}} to i32 + // CHECK: all void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] + RwTyBuf[ix[0]] = typElt1 + typElt2 + typElt3 + typElt4; +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl index ea44fef604..8dcf5ead1c 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl @@ -27,13 +27,20 @@ RWByteAddressBuffer RwByBuf : register(u1); StructuredBuffer< TYPE > RoStBuf : register(t2); RWStructuredBuffer< TYPE > RwStBuf : register(u2); - Buffer< TYPE > RoTyBuf : register(t3); -RWBuffer< TYPE > RwTyBuf : register(u3); +ConsumeStructuredBuffer CnStBuf : register(u3); +AppendStructuredBuffer ApStBuf : register(u4); -ConsumeStructuredBuffer CnStBuf : register(u4); -AppendStructuredBuffer ApStBuf : register(u5); + Buffer< TYPE > RoTyBuf : register(t5); +RWBuffer< TYPE > RwTyBuf : register(u5); -void main(uint ix[2] : IX) { + Texture1D< TYPE > RoTex1d : register(t6); +RWTexture1D< TYPE > RwTex1d : register(u6); + Texture2D< TYPE > RoTex2d : register(t7); +RWTexture2D< TYPE > RwTex2d : register(u7); + Texture3D< TYPE > RoTex3d : register(t8); +RWTexture3D< TYPE > RwTex3d : register(u8); + +void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // ByteAddressBuffer Tests // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) @@ -42,13 +49,27 @@ void main(uint ix[2] : IX) { // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false) // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false) - // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false) - // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false) + // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false) + // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false) + + // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 0 }, i32 5, i1 false) + // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false) - // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false) - // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false) + // CHECK-DAG: [[HDLROTX1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 6, i32 6, i32 0, i8 0 }, i32 6, i1 false) + // CHECK-DAG: [[HDLRWTX1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 6, i32 6, i32 0, i8 1 }, i32 6, i1 false) + // CHECK-DAG: [[HDLROTX2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 0, i8 0 }, i32 7, i1 false) + // CHECK-DAG: [[HDLRWTX2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 0, i8 1 }, i32 7, i1 false) + // CHECK-DAG: [[HDLROTX3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 0, i8 0 }, i32 8, i1 false) + // CHECK-DAG: [[HDLRWTX3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 0, i8 1 }, i32 8, i1 false) - // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + + // CHECK-DAG: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0 + // CHECK-DAG: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0 + // CHECK-DAG: [[IX20:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 0 + // CHECK-DAG: [[IX21:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 1 + // CHECK-DAG: [[IX30:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 0 + // CHECK-DAG: [[IX31:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 1 + // CHECK-DAG: [[IX32:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 2 // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] @@ -56,7 +77,7 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE babElt1 = RwByBuf.Load< TYPE >(ix[0]); + TYPE babElt1 = RwByBuf.Load< TYPE >(ix0); // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]] @@ -64,14 +85,14 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE babElt2 = RoByBuf.Load< TYPE >(ix[0]); + TYPE babElt2 = RoByBuf.Load< TYPE >(ix0); // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] - RwByBuf.Store< TYPE >(ix[0], babElt1 + babElt2); + RwByBuf.Store< TYPE >(ix0, babElt1 + babElt2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] @@ -80,14 +101,13 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE stbElt1 = RwStBuf.Load(ix[0]); - // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + TYPE stbElt1 = RwStBuf.Load(ix0); // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE stbElt2 = RwStBuf[ix[1]]; + TYPE stbElt2 = RwStBuf[ix1]; // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]] @@ -95,20 +115,20 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE stbElt3 = RoStBuf.Load(ix[0]); + TYPE stbElt3 = RoStBuf.Load(ix0); // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE stbElt4 = RoStBuf[ix[1]]; + TYPE stbElt4 = RoStBuf[ix1]; // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] - RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + RwStBuf[ix0] = stbElt1 + stbElt2 + stbElt3 + stbElt4; // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] @@ -146,7 +166,7 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE typElt1 = RwTyBuf.Load(ix[0]); + TYPE typElt1 = RwTyBuf.Load(ix0); // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX1]] // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -162,7 +182,7 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE typElt2 = RwTyBuf[ix[1]]; + TYPE typElt2 = RwTyBuf[ix1]; // CHECK: [[ANHDLROTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTY]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX0]] // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -179,7 +199,7 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE typElt3 = RoTyBuf.Load(ix[0]); + TYPE typElt3 = RoTyBuf.Load(ix0); // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX1]] // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -195,7 +215,7 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE typElt4 = RoTyBuf[ix[1]]; + TYPE typElt4 = RoTyBuf[ix1]; // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 @@ -210,5 +230,126 @@ void main(uint ix[2] : IX) { // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] - RwTyBuf[ix[0]] = typElt1 + typElt2 + typElt3 + typElt4; + RwTyBuf[ix0] = typElt1 + typElt2 + typElt3 + typElt4; + + // Texture Tests + // CHECK: [[ANHDLROTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX1]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLROTX1]], i32 0, i32 [[IX0]], i32 undef, i32 undef + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt1 = RoTex1d[ix0]; + // CHECK: [[ANHDLRWTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX1]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX1]], i32 undef, i32 [[IX0]], i32 undef, i32 undef + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt2 = RwTex1d[ix0]; + + // CHECK: [[ANHDLROTX2:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX2]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLROTX2]], i32 0, i32 [[IX20]], i32 [[IX21]], i32 undef + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt3 = RoTex2d[ix2]; + // CHECK: [[ANHDLRWTX2:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX2]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX2]], i32 undef, i32 [[IX20]], i32 [[IX21]], i32 undef + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt4 = RwTex2d[ix2]; + + // CHECK: [[ANHDLROTX3:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX3]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLROTX3]], i32 0, i32 [[IX30]], i32 [[IX31]], i32 [[IX32]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt5 = RoTex3d[ix3]; + // CHECK: [[ANHDLRWTX3:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX3]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX3]], i32 undef, i32 [[IX30]], i32 [[IX31]], i32 [[IX32]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt6 = RwTex3d[ix3]; + + // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 + // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 + // I64: trunc i64 %{{.*}} to i32 + // lshr i64 %{{.*}}, 32 + // I64: trunc i64 %{{.*}} to i32 + // I64: trunc i64 %{{.*}} to i32 + // lshr i64 %{{.*}}, 32 + // I64: trunc i64 %{{.*}} to i32 + // I1: zext i1 %{{.*}} to i32 + // I1: zext i1 %{{.*}} to i32 + // I1: zext i1 %{{.*}} to i32 + // I1: zext i1 %{{.*}} to i32 + // CHECK: call void @dx.op.textureStore.[[TY32]](i32 67, %dx.types.Handle [[ANHDLRWTX3]], i32 [[IX30]], i32 [[IX31]], i32 [[IX32]] + RwTex3d[ix3] = texElt1 + texElt2 + texElt3 + texElt4 + texElt5 + texElt6; } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.hlsl new file mode 100644 index 0000000000..7cd54e0387 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.hlsl @@ -0,0 +1,152 @@ +// RUN: %dxc -fcgl -T vs_6_6 %s | FileCheck %s + +// Source file for DxilGen IR test for buffer load lowering +// Much of this mirrors buffer-load-store and buffer-agg-load-store + +template +struct Vector { + float4 pad1; + double pad2; + vector v; + Vector operator+(Vector vec) { + Vector ret; + ret.pad1 = 0.0; + ret.pad2 = 0.0; + ret.v = v + vec.v; + return ret; + } +}; + +template +struct Matrix { + float4 pad1; + matrix m; + Matrix operator+(Matrix mat) { + Matrix ret; + ret.m = m + mat.m; + return ret; + } +}; + +RWByteAddressBuffer BabBuf : register(u1); +RWStructuredBuffer< float2 > VecBuf : register(u2); + StructuredBuffer< float[2] > ArrBuf : register(t3); + StructuredBuffer< Vector > SVecBuf : register(t4); + StructuredBuffer< float2x2 > MatBuf : register(t5); + StructuredBuffer< Matrix > SMatBuf : register(t6); + +void main(uint ix0 : IX0) { + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 0 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + bool2 Bab0 = BabBuf.Load< bool2 >(ix0 + 0); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 1 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Bab1 = (float2)BabBuf.Load< float[2] >(ix0 + 1); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 2 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Bab2 = BabBuf.Load< Vector >(ix0 + 2).v; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 3 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call %class.matrix.float.2.2 @"dx.hl.op.ro.%class.matrix.float.2.2 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Bab3 = BabBuf.Load< float2x2 >(ix0 + 3)[1]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 4 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: [[MSS:%.*]] = call %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Bab4 = BabBuf.Load< Matrix >(ix0 + 4).m[1]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 5 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32 277, %dx.types.Handle [[ANHDL]], i32 [[IX]], <2 x float> + BabBuf.Store< float2 >(ix0+5, select(Bab0, Bab1+Bab2, Bab3+Bab4)); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 0 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Sld0 = VecBuf.Load(ix0 + 0); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 1 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer\22)"(i32 0, %"class.StructuredBuffer" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer" undef) + // CHECK: call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Sld1 = (float2)ArrBuf.Load(ix0 + 1); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 2 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer >" undef) + // CHECK: call %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Sld2 = SVecBuf.Load(ix0 + 2).v; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 3 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer >" undef) + // CHECK: call %class.matrix.float.2.2 @"dx.hl.op.ro.%class.matrix.float.2.2 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Sld3 = MatBuf.Load(ix0 + 3)[1]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 4 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer >" undef) + // CHECK: [[MSS:%.*]] = call %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + // CHECK: [[GEP:%.*]] = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* [[MSS]], i32 0, i32 1 + // CHECK: call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* [[GEP]], i32 1, i32 3) + float2 Sld4 = SMatBuf.Load(ix0 + 4).m[1]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 5 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + VecBuf[ix0+5] = select(Sld0, Sld1+Sld2, Sld3+Sld4); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 6 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]] + float2 Sss0 = VecBuf[ix0 + 6]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 7 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer\22)"(i32 0, %"class.StructuredBuffer" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer" undef) + // CHECK: call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Sss1 = (float2)ArrBuf[ix0 + 7]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 8 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer >" undef) + // CHECK: call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Sss2 = SVecBuf[ix0 + 8].v; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 9 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer >" undef) + // CHECK: [[SS:%.*]] = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + // CHECK: call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* [[SS]], i32 1, i32 3) + float2 Sss3 = MatBuf[ix0 + 9][1]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 10 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer >" undef) + // CHECK: [[MSS:%.*]] = call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + // CHECK: [[GEP:%.*]] = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* [[MSS]], i32 0, i32 1 + // CHECK: call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* [[GEP]], i32 1, i32 3) + float2 Sss4 = SMatBuf[ix0 + 10].m[1]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 11 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + VecBuf[ix0+11] = select(Sss0, Sss1+Sss2, Sss3+Sss4); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.ll new file mode 100644 index 0000000000..6b01120f7b --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.ll @@ -0,0 +1,404 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RWByteAddressBuffer = type { i32 } +%"class.RWStructuredBuffer >" = type { <2 x float> } +%"class.StructuredBuffer" = type { [2 x float] } +%"class.StructuredBuffer >" = type { %"struct.Vector" } +%"struct.Vector" = type { <4 x float>, double, <2 x float> } +%"class.StructuredBuffer >" = type { %class.matrix.float.2.2 } +%class.matrix.float.2.2 = type { [2 x <2 x float>] } +%"class.StructuredBuffer >" = type { %"struct.Matrix" } +%"struct.Matrix" = type { <4 x float>, %class.matrix.float.2.2 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?BabBuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 +@"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A" = external global %"class.StructuredBuffer", align 4 +@"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.StructuredBuffer >", align 8 +@"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.StructuredBuffer >", align 4 +@"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.StructuredBuffer >", align 4 + +; Function Attrs: nounwind +define void @main(i32 %ix0) #0 { + %1 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" + + ; Booleans require some conversion after being loaded + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %10, i32 %7, i32 undef, i8 3, i32 4) + ; CHECK: [[EL0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0 + ; CHECK: [[EL1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1 + ; CHECK: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[EL0]], i64 0 + ; CHECK: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[EL1]], i64 1 + ; CHECK: {{%.*}} = icmp ne <2 x i32> [[VEC1]], zeroinitializer + %2 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %1) + %3 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) + %4 = call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %3, i32 %ix0) + %5 = zext <2 x i1> %4 to <2 x i32> + %6 = add i32 %ix0, 1 + %7 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" + + ; Array loads do so one element at a time. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + %8 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %7) + %9 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %8, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) + %10 = call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %9, i32 %6) + + %11 = getelementptr inbounds [2 x float], [2 x float]* %10, i32 0, i32 0 + %12 = load float, float* %11 + %13 = getelementptr inbounds [2 x float], [2 x float]* %10, i32 0, i32 1 + %14 = load float, float* %13 + %15 = insertelement <2 x float> undef, float %12, i32 0 + %16 = insertelement <2 x float> %15, float %14, i32 1 + %17 = add i32 %ix0, 3 + %18 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" + + ; Vector inside a struct is a simple load. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 3, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1 + %19 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %18) + %20 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %19, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) + %21 = call %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %20, i32 %17) + %22 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %21, i32 0, i32 2 + %23 = load <2 x float>, <2 x float>* %22, align 4 + %24 = add i32 %ix0, 4 + %25 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" + + ; 2x2 matrix loads the full storage vector and converts the orientation. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 15, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 2 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 3 + %26 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %25) + %27 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %26, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) + %28 = call <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %27, i32 %24) + %row2col = shufflevector <4 x float> %28, <4 x float> %28, <4 x i32> + %29 = shufflevector <4 x float> %row2col, <4 x float> %row2col, <2 x i32> + %30 = add i32 %ix0, 5 + %31 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" + + ; Matrix struct members get their elements extracted with individual loads on account of already dealing with GEPs + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + %32 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %31) + %33 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %32, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) + %34 = call %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %33, i32 %30) + %35 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %34, i32 0, i32 1 + %36 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %35, i32 1, i32 3) + %37 = load <2 x float>, <2 x float>* %36 + %38 = fadd <2 x float> %29, %37 + %39 = fadd <2 x float> %16, %23 + %40 = icmp ne <2 x i32> %5, zeroinitializer + %41 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %40, <2 x float> %39, <2 x float> %38) + %42 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" + + %43 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %42) + %44 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %43, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32 277, %dx.types.Handle %44, i32 %ix0, <2 x float> %41) + %45 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" + + ; Normal vector. Standard load. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 3, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1 + %46 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %45) + %47 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %46, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" zeroinitializer) + %48 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %47, i32 %ix0) + %49 = add i32 %ix0, 1 + %50 = load %"class.StructuredBuffer", %"class.StructuredBuffer"* @"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A" + + ; Array loads do so one element at a time. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer"(i32 160, %"class.StructuredBuffer" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 4, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + %51 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer\22)"(i32 0, %"class.StructuredBuffer" %50) + %52 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer\22)"(i32 14, %dx.types.Handle %51, %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer" zeroinitializer) + %53 = call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %52, i32 %49) + %54 = getelementptr inbounds [2 x float], [2 x float]* %53, i32 0, i32 0 + %55 = load float, float* %54 + %56 = getelementptr inbounds [2 x float], [2 x float]* %53, i32 0, i32 1 + %57 = load float, float* %56 + %58 = insertelement <2 x float> undef, float %55, i32 0 + %59 = insertelement <2 x float> %58, float %57, i32 1 + %60 = add i32 %ix0, 3 + %61 = load %"class.StructuredBuffer >", %"class.StructuredBuffer >"* @"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A" + + ; Vector inside a struct is a simple load. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer >"(i32 160, %"class.StructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 24, i8 3, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1 + %62 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" %61) + %63 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle %62, %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer >" zeroinitializer) + %64 = call %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %63, i32 %60) + %65 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %64, i32 0, i32 2 + %66 = load <2 x float>, <2 x float>* %65, align 4 + %67 = add i32 %ix0, 4 + %68 = load %"class.StructuredBuffer >", %"class.StructuredBuffer >"* @"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A" + + ; 2x2 matrix loads the full storage vector and converts the orientation. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer >"(i32 160, %"class.StructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 15, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 2 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 3 + %69 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" %68) + %70 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle %69, %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer >" zeroinitializer) + %71 = call <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %70, i32 %67) + %row2col1 = shufflevector <4 x float> %71, <4 x float> %71, <4 x i32> + %72 = shufflevector <4 x float> %row2col1, <4 x float> %row2col1, <2 x i32> + %73 = add i32 %ix0, 5 + %74 = load %"class.StructuredBuffer >", %"class.StructuredBuffer >"* @"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A" + + ; Matrix struct members get their elements extracted with individual loads on account of already dealing with GEPs + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer >"(i32 160, %"class.StructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 20, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 28, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + %75 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" %74) + %76 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle %75, %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer >" zeroinitializer) + %77 = call %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %76, i32 %73) + %78 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %77, i32 0, i32 1 + %79 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %78, i32 1, i32 3) + %80 = load <2 x float>, <2 x float>* %79 + %81 = fadd <2 x float> %72, %80 + %82 = fadd <2 x float> %59, %66 + %83 = fcmp une <2 x float> %48, zeroinitializer + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDL]] + %84 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %83, <2 x float> %82, <2 x float> %81) + %85 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" + + ; Normal vector. Standard load. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 3, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1 + %86 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %85) + %87 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %86, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" zeroinitializer) + %88 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %87, i32 %ix0) + store <2 x float> %84, <2 x float>* %88 + %89 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" + + %90 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %89) + %91 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %90, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" zeroinitializer) + %92 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %91, i32 %ix0) + %93 = load <2 x float>, <2 x float>* %92 + %94 = add i32 %ix0, 1 + %95 = load %"class.StructuredBuffer", %"class.StructuredBuffer"* @"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A" + + ; Array loads do so one element at a time. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer"(i32 160, %"class.StructuredBuffer" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 4, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + %96 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer\22)"(i32 0, %"class.StructuredBuffer" %95) + %97 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer\22)"(i32 14, %dx.types.Handle %96, %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer" zeroinitializer) + %98 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %97, i32 %94) + %99 = getelementptr inbounds [2 x float], [2 x float]* %98, i32 0, i32 0 + %100 = load float, float* %99 + %101 = getelementptr inbounds [2 x float], [2 x float]* %98, i32 0, i32 1 + %102 = load float, float* %101 + %103 = insertelement <2 x float> undef, float %100, i32 0 + %104 = insertelement <2 x float> %103, float %102, i32 1 + %105 = add i32 %ix0, 3 + %106 = load %"class.StructuredBuffer >", %"class.StructuredBuffer >"* @"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A" + + ; Vector inside a struct is a simple load. + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer >"(i32 160, %"class.StructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 24, i8 3, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1 + %107 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" %106) + %108 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle %107, %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer >" zeroinitializer) + %109 = call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %108, i32 %105) + %110 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %109, i32 0, i32 2 + %111 = load <2 x float>, <2 x float>* %110, align 4 + %112 = add i32 %ix0, 4 + %113 = load %"class.StructuredBuffer >", %"class.StructuredBuffer >"* @"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A" + + ; Subscripted matrices get their elements extracted with individual loads on account of already dealing with GEPs + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer >"(i32 160, %"class.StructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 4, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 12, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + %114 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" %113) + %115 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle %114, %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer >" zeroinitializer) + %116 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %115, i32 %112) + %117 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %116, i32 1, i32 3) + %118 = load <2 x float>, <2 x float>* %117 + %119 = add i32 %ix0, 5 + %120 = load %"class.StructuredBuffer >", %"class.StructuredBuffer >"* @"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A" + + ; Matrix struct members get their elements extracted with individual loads on account of already dealing with GEPs + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer >"(i32 160, %"class.StructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 20, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 28, i8 1, i32 4) + ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0 + %121 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32 0, %"class.StructuredBuffer >" %120) + %122 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32 14, %dx.types.Handle %121, %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer >" zeroinitializer) + %123 = call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %122, i32 %119) + %124 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %123, i32 0, i32 1 + %125 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %124, i32 1, i32 3) + %126 = load <2 x float>, <2 x float>* %125 + %127 = fadd <2 x float> %118, %126 + %128 = fadd <2 x float> %104, %111 + %129 = fcmp une <2 x float> %93, zeroinitializer + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDL]] + %130 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %129, <2 x float> %128, <2 x float> %127) + %131 = add i32 %ix0, 1 + %132 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" + + %133 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %132) + %134 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %133, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" zeroinitializer) + %135 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %134, i32 %131) + store <2 x float> %130, <2 x float>* %135 + ret void +} + +declare <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32, %class.matrix.float.2.2*, i32, i32) #1 +declare <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #1 +declare [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32, %dx.types.Handle, i32, <2 x float>) #0 +declare <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32, <2 x i1>, <2 x float>, <2 x float>) #1 +declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer\22)"(i32, %"class.StructuredBuffer") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32, %"class.StructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32, %"class.StructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer >\22)"(i32, %"class.StructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer >") #1 +declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !43} +!dx.entryPoints = !{!50} +!dx.fnprops = !{!63} +!dx.options = !{!64, !65} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4807 (longvec_bab_ldst, 88cfe61c3-dirty)"} +!3 = !{i32 1, i32 6} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 6} +!6 = !{i32 0, %"class.RWStructuredBuffer >" undef, !7, %"class.StructuredBuffer" undef, !12, %"class.StructuredBuffer >" undef, !16, %"struct.Vector" undef, !21, %"class.StructuredBuffer >" undef, !29, %"class.StructuredBuffer >" undef, !35, %"struct.Matrix" undef, !39} +!7 = !{i32 8, !8, !9} +!8 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 0, !10} +!10 = !{!11} +!11 = !{i32 0, <2 x float> undef} +!12 = !{i32 20, !8, !13} +!13 = !{i32 0, !14} +!14 = !{!15} +!15 = !{i32 0, [2 x float] undef} +!16 = !{i32 32, !17, !18} +!17 = !{i32 6, !"h", i32 3, i32 0} +!18 = !{i32 0, !19} +!19 = !{!20} +!20 = !{i32 0, %"struct.Vector" undef} +!21 = !{i32 32, !22, !23, !24, !25} +!22 = !{i32 6, !"pad1", i32 3, i32 0, i32 7, i32 9} +!23 = !{i32 6, !"pad2", i32 3, i32 16, i32 7, i32 10} +!24 = !{i32 6, !"v", i32 3, i32 24, i32 7, i32 9} +!25 = !{i32 0, !26} +!26 = !{!27, !28} +!27 = !{i32 0, float undef} +!28 = !{i32 1, i64 2} +!29 = !{i32 24, !30, !32} +!30 = !{i32 6, !"h", i32 2, !31, i32 3, i32 0, i32 7, i32 9} +!31 = !{i32 2, i32 2, i32 2} +!32 = !{i32 0, !33} +!33 = !{!34} +!34 = !{i32 0, %class.matrix.float.2.2 undef} +!35 = !{i32 40, !17, !36} +!36 = !{i32 0, !37} +!37 = !{!38} +!38 = !{i32 0, %"struct.Matrix" undef} +!39 = !{i32 40, !22, !40, !41} +!40 = !{i32 6, !"m", i32 2, !31, i32 3, i32 16, i32 7, i32 9} +!41 = !{i32 0, !42} +!42 = !{!27, !28, !28} +!43 = !{i32 1, void (i32)* @main, !44} +!44 = !{!45, !47} +!45 = !{i32 1, !46, !46} +!46 = !{} +!47 = !{i32 0, !48, !49} +!48 = !{i32 4, !"IX0", i32 7, i32 5} +!49 = !{i32 0} +!50 = !{void (i32)* @main, !"main", null, !51, null} +!51 = !{!52, !60, null, null} +!52 = !{!53, !55, !57, !59} +!53 = !{i32 0, %"class.StructuredBuffer"* @"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A", !"ArrBuf", i32 0, i32 3, i32 1, i32 12, i32 0, !54} +!54 = !{i32 1, i32 8} +!55 = !{i32 1, %"class.StructuredBuffer >"* @"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A", !"SVecBuf", i32 0, i32 4, i32 1, i32 12, i32 0, !56} +!56 = !{i32 1, i32 32} +!57 = !{i32 2, %"class.StructuredBuffer >"* @"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A", !"MatBuf", i32 0, i32 5, i32 1, i32 12, i32 0, !58} +!58 = !{i32 1, i32 16} +!59 = !{i32 3, %"class.StructuredBuffer >"* @"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A", !"SMatBuf", i32 0, i32 6, i32 1, i32 12, i32 0, !56} +!60 = !{!61, !62} +!61 = !{i32 0, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A", !"BabBuf", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!62 = !{i32 1, %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A", !"VecBuf", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !54} +!63 = !{void (i32)* @main, i32 1} +!64 = !{i32 64} +!65 = !{i32 -1} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.hlsl new file mode 100644 index 0000000000..47355d633f --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.hlsl @@ -0,0 +1,112 @@ +// RUN: %dxc -fcgl -T vs_6_6 %s | FileCheck %s + +// Source file for DxilGen IR test for typed buffer/texture load lowering + +RWBuffer< bool2 > TyBuf : register(u1); +Texture2DMS< bool2 > Tex2dMs : register(t2); + +Texture1D< float2 > Tex1d : register(t3); +Texture2D< float2 > Tex2d : register(t4); +Texture3D< float2 > Tex3d : register(t5); +Texture2DArray< float2 > Tex2dArr : register(t6); + +RWBuffer< float2 > OutBuf : register(u7); + +void main(uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3, uint4 ix4 : IX4) { + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 1 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" undef) + // CHECK: call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + bool2 Tyb0 = TyBuf.Load(ix1 + 1); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 2 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" undef) + // CHECK: call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + bool2 Tyb1 = TyBuf[ix1 + 2]; + + // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS, 0>\22)"(i32 0, %"class.Texture2DMS, 0>" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS, 0>" undef), + // CHECK: call <2 x i1> @"dx.hl.op..<2 x i1> (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 231, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]] + bool2 TxMs0 = Tex2dMs.Load(ix2 + 3, ix1); + + // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS, 0>\22)"(i32 0, %"class.Texture2DMS, 0>" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS, 0>" undef) + // CHECK: call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]]) + bool2 TxMs1 = Tex2dMs[ix2 + 4]; + + // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D >\22)"(i32 0, %"class.Texture1D >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D >" undef) + // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <2 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]]) + float2 Tx1d0 = Tex1d.Load(ix2 + 5); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 6 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D >\22)"(i32 0, %"class.Texture1D >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + float2 Tx1d1 = Tex1d[ix1 + 6]; + + // CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D >\22)"(i32 0, %"class.Texture2D >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D >" undef) + // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <3 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <3 x i32> [[IX]]) + float2 Tx2d0 = Tex2d.Load(ix3 + 7); + + // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D >\22)"(i32 0, %"class.Texture2D >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]]) + float2 Tx2d1 = Tex2d[ix2 + 8]; + + // CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D >\22)"(i32 0, %"class.Texture3D >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D >" undef) + // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <4 x i32> [[IX]]) + float2 Tx3d0 = Tex3d.Load(ix4 + 9); + + // CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D >\22)"(i32 0, %"class.Texture3D >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <3 x i32> [[IX]]) + float2 Tx3d1 = Tex3d[ix3 + 10]; + + // CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray >\22)"(i32 0, %"class.Texture2DArray >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray >" undef) + // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <4 x i32> [[IX]]) + float2 Tx2da0 = Tex2dArr.Load(ix4 + 11); + + // CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray >\22)"(i32 0, %"class.Texture2DArray >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <3 x i32> [[IX]]) + float2 Tx2da1 = Tex2dArr[ix3 + 12]; + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 13 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + OutBuf[ix1+13] = select(Tyb0, Tx1d0, Tx1d1); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 14 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + OutBuf[ix1+14] = select(Tyb1, Tx2d0, Tx2d1); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 15 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + OutBuf[ix1+15] = select(TxMs0, Tx3d0, Tx3d1); + + // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 16 + // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]) + OutBuf[ix1+16] = select(TxMs1, Tx2da0, Tx2da1); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.ll new file mode 100644 index 0000000000..3ecb28644c --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.ll @@ -0,0 +1,346 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWBuffer >" = type { <2 x i32> } +%"class.Texture2DMS, 0>" = type { <2 x i32>, %"class.Texture2DMS, 0>::sample_type" } +%"class.Texture2DMS, 0>::sample_type" = type { i32 } +%"class.Texture1D >" = type { <2 x float>, %"class.Texture1D >::mips_type" } +%"class.Texture1D >::mips_type" = type { i32 } +%"class.Texture2D >" = type { <2 x float>, %"class.Texture2D >::mips_type" } +%"class.Texture2D >::mips_type" = type { i32 } +%"class.Texture3D >" = type { <2 x float>, %"class.Texture3D >::mips_type" } +%"class.Texture3D >::mips_type" = type { i32 } +%"class.Texture2DArray >" = type { <2 x float>, %"class.Texture2DArray >::mips_type" } +%"class.Texture2DArray >::mips_type" = type { i32 } +%"class.RWBuffer >" = type { <2 x float> } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" = external global %"class.RWBuffer >", align 4 +@"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A" = external global %"class.Texture2DMS, 0>", align 4 +@"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A" = external global %"class.Texture1D >", align 4 +@"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A" = external global %"class.Texture2D >", align 4 +@"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A" = external global %"class.Texture3D >", align 4 +@"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A" = external global %"class.Texture2DArray >", align 4 +@"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A" = external global %"class.RWBuffer >", align 4 + +; Function Attrs: nounwind +define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3, <4 x i32> %ix4) #0 { + ; CHECK: [[PIX:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, + ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 1 + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef) + ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0 + ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1 + ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0 + ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1 + ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer + %1 = add i32 %ix1, 1 + %2 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" + %3 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %2) + %4 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" zeroinitializer) + %5 = call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %4, i32 %1) + + %6 = zext <2 x i1> %5 to <2 x i32> + + ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 2 + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 }) + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef) + ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0 + ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1 + ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0 + ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1 + ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer + %7 = add i32 %ix1, 2 + %8 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" + %9 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %8) + %10 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %9, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" zeroinitializer) + %11 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %10, i32 %7) + %12 = load <2 x i32>, <2 x i32>* %11 + + %13 = icmp ne <2 x i32> %12, zeroinitializer + %14 = zext <2 x i1> %13 to <2 x i32> + + ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DMS, 0>"(i32 160, %"class.Texture2DMS, 0>" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[PIX]], i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0 + ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1 + ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0 + ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1 + ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer + %15 = add <2 x i32> %ix2, + %16 = load %"class.Texture2DMS, 0>", %"class.Texture2DMS, 0>"* @"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A" + %17 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS, 0>\22)"(i32 0, %"class.Texture2DMS, 0>" %16) + %18 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS, 0>\22)"(i32 14, %dx.types.Handle %17, %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS, 0>" zeroinitializer) + %19 = call <2 x i1> @"dx.hl.op..<2 x i1> (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 231, %dx.types.Handle %18, <2 x i32> %15, i32 %ix1) + %20 = zext <2 x i1> %19 to <2 x i32> + + ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DMS, 0>"(i32 160, %"class.Texture2DMS, 0>" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1 + ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0 + ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1 + ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0 + ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1 + ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer + %21 = add <2 x i32> %ix2, + %22 = load %"class.Texture2DMS, 0>", %"class.Texture2DMS, 0>"* @"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A" + %23 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS, 0>\22)"(i32 0, %"class.Texture2DMS, 0>" %22) + %24 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS, 0>\22)"(i32 14, %dx.types.Handle %23, %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS, 0>" zeroinitializer) + %25 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %24, <2 x i32> %21) + %26 = load <2 x i32>, <2 x i32>* %25 + + %27 = icmp ne <2 x i32> %26, zeroinitializer + %28 = zext <2 x i1> %27 to <2 x i32> + + ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture1D >"(i32 160, %"class.Texture1D >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1 + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX1]], i32 [[IX0]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) + %29 = add <2 x i32> %ix2, + %30 = load %"class.Texture1D >", %"class.Texture1D >"* @"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A" + %31 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D >\22)"(i32 0, %"class.Texture1D >" %30) + %32 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D >\22)"(i32 14, %dx.types.Handle %31, %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D >" zeroinitializer) + %33 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <2 x i32>)"(i32 231, %dx.types.Handle %32, <2 x i32> %29) + + ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 6 + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture1D >"(i32 160, %"class.Texture1D >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 }) + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) + %34 = add i32 %ix1, 6 + %35 = load %"class.Texture1D >", %"class.Texture1D >"* @"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A" + %36 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D >\22)"(i32 0, %"class.Texture1D >" %35) + %37 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D >\22)"(i32 14, %dx.types.Handle %36, %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D >" zeroinitializer) + %38 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %37, i32 %34) + %39 = load <2 x float>, <2 x float>* %38 + + ; CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2D >"(i32 160, %"class.Texture2D >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <3 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <3 x i32> [[IX]], i64 1 + ; CHECK-DAG: [[IX2:%.*]] = extractelement <3 x i32> [[IX]], i64 2 + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX2]], i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef) + %40 = add <3 x i32> %ix3, + %41 = load %"class.Texture2D >", %"class.Texture2D >"* @"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A" + %42 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D >\22)"(i32 0, %"class.Texture2D >" %41) + %43 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D >\22)"(i32 14, %dx.types.Handle %42, %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D >" zeroinitializer) + %44 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <3 x i32>)"(i32 231, %dx.types.Handle %43, <3 x i32> %40) + + ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2D >"(i32 160, %"class.Texture2D >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1 + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef) + %45 = add <2 x i32> %ix2, + %46 = load %"class.Texture2D >", %"class.Texture2D >"* @"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A" + %47 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D >\22)"(i32 0, %"class.Texture2D >" %46) + %48 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D >\22)"(i32 14, %dx.types.Handle %47, %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D >" zeroinitializer) + %49 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %48, <2 x i32> %45) + %50 = load <2 x float>, <2 x float>* %49 + + ; CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture3D >"(i32 160, %"class.Texture3D >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <4 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <4 x i32> [[IX]], i64 1 + ; CHECK-DAG: [[IX2:%.*]] = extractelement <4 x i32> [[IX]], i64 2 + ; CHECK-DAG: [[IX3:%.*]] = extractelement <4 x i32> [[IX]], i64 3 + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX3]], i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef) + %51 = add <4 x i32> %ix4, + %52 = load %"class.Texture3D >", %"class.Texture3D >"* @"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A" + %53 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D >\22)"(i32 0, %"class.Texture3D >" %52) + %54 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D >\22)"(i32 14, %dx.types.Handle %53, %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D >" zeroinitializer) + %55 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle %54, <4 x i32> %51) + + ; CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture3D >"(i32 160, %"class.Texture3D >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <3 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <3 x i32> [[IX]], i64 1 + ; CHECK-DAG: [[IX2:%.*]] = extractelement <3 x i32> [[IX]], i64 2 + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef) + %56 = add <3 x i32> %ix3, + %57 = load %"class.Texture3D >", %"class.Texture3D >"* @"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A" + %58 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D >\22)"(i32 0, %"class.Texture3D >" %57) + %59 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D >\22)"(i32 14, %dx.types.Handle %58, %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D >" zeroinitializer) + %60 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %59, <3 x i32> %56) + %61 = load <2 x float>, <2 x float>* %60 + + ; CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DArray >"(i32 160, %"class.Texture2DArray >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <4 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <4 x i32> [[IX]], i64 1 + ; CHECK-DAG: [[IX2:%.*]] = extractelement <4 x i32> [[IX]], i64 2 + ; CHECK-DAG: [[IX3:%.*]] = extractelement <4 x i32> [[IX]], i64 3 + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX3]], i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef) + %62 = add <4 x i32> %ix4, + %63 = load %"class.Texture2DArray >", %"class.Texture2DArray >"* @"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A" + %64 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray >\22)"(i32 0, %"class.Texture2DArray >" %63) + %65 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray >\22)"(i32 14, %dx.types.Handle %64, %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray >" zeroinitializer) + %66 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle %65, <4 x i32> %62) + + ; CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DArray >"(i32 160, %"class.Texture2DArray >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 }) + ; CHECK-DAG: [[IX0:%.*]] = extractelement <3 x i32> [[IX]], i64 0 + ; CHECK-DAG: [[IX1:%.*]] = extractelement <3 x i32> [[IX]], i64 1 + ; CHECK-DAG: [[IX2:%.*]] = extractelement <3 x i32> [[IX]], i64 2 + ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef) + %67 = add <3 x i32> %ix3, + %68 = load %"class.Texture2DArray >", %"class.Texture2DArray >"* @"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A" + %69 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray >\22)"(i32 0, %"class.Texture2DArray >" %68) + %70 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray >\22)"(i32 14, %dx.types.Handle %69, %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray >" zeroinitializer) + %71 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %70, <3 x i32> %67) + %72 = load <2 x float>, <2 x float>* %71 + + %73 = icmp ne <2 x i32> %6, zeroinitializer + %74 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %73, <2 x float> %33, <2 x float> %39) + + ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 13 + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }) + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef, + %75 = add i32 %ix1, 13 + %76 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A" + %77 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %76) + %78 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %77, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" zeroinitializer) + %79 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %78, i32 %75) + store <2 x float> %74, <2 x float>* %79 + + %80 = icmp ne <2 x i32> %14, zeroinitializer + %81 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %80, <2 x float> %44, <2 x float> %50) + + ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 14 + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }) + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef + %82 = add i32 %ix1, 14 + %83 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A" + %84 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %83) + %85 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %84, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" zeroinitializer) + %86 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %85, i32 %82) + store <2 x float> %81, <2 x float>* %86 + + %87 = icmp ne <2 x i32> %20, zeroinitializer + %88 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %87, <2 x float> %55, <2 x float> %61) + + ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 15 + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }) + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef + %89 = add i32 %ix1, 15 + %90 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A" + %91 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %90) + %92 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %91, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" zeroinitializer) + %93 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %92, i32 %89) + store <2 x float> %88, <2 x float>* %93 + + %94 = icmp ne <2 x i32> %28, zeroinitializer + %95 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %94, <2 x float> %66, <2 x float> %72) + + ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 16 + ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }) + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef + %96 = add i32 %ix1, 16 + %97 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A" + %98 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %97) + %99 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %98, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer >" zeroinitializer) + %100 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %99, i32 %96) + store <2 x float> %95, <2 x float>* %100 + + ret void +} + +declare <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32, %"class.RWBuffer >") #2 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer >") #2 +declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare <2 x i1> @"dx.hl.op..<2 x i1> (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #0 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS, 0>\22)"(i32, %"class.Texture2DMS, 0>") #2 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture2DMS, 0>") #2 +declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #2 +declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D >\22)"(i32, %"class.Texture1D >") #2 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture1D >") #2 +declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D >\22)"(i32, %"class.Texture2D >") #2 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture2D >") #2 +declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #2 +declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32, %dx.types.Handle, <4 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D >\22)"(i32, %"class.Texture3D >") #2 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture3D >") #2 +declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #2 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray >\22)"(i32, %"class.Texture2DArray >") #2 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture2DArray >") #2 +declare <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32, <2 x i1>, <2 x float>, <2 x float>) #2 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32, %"class.RWBuffer >") #2 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer >") #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } + +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6} +!dx.entryPoints = !{!22} +!dx.fnprops = !{!35} +!dx.options = !{!36, !37} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4807 (longvec_bab_ldst, 88cfe61c3-dirty)"} +!3 = !{i32 1, i32 6} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 6} +!6 = !{i32 1, void (i32, <2 x i32>, <3 x i32>, <4 x i32>)* @main, !7} +!7 = !{!8, !10, !13, !16, !19} +!8 = !{i32 1, !9, !9} +!9 = !{} +!10 = !{i32 0, !11, !12} +!11 = !{i32 4, !"IX1", i32 7, i32 5} +!12 = !{i32 1} +!13 = !{i32 0, !14, !15} +!14 = !{i32 4, !"IX2", i32 7, i32 5} +!15 = !{i32 2} +!16 = !{i32 0, !17, !18} +!17 = !{i32 4, !"IX3", i32 7, i32 5} +!18 = !{i32 3} +!19 = !{i32 0, !20, !21} +!20 = !{i32 4, !"IX4", i32 7, i32 5} +!21 = !{i32 4} +!22 = !{void (i32, <2 x i32>, <3 x i32>, <4 x i32>)* @main, !"main", null, !23, null} +!23 = !{!24, !32, null, null} +!24 = !{!25, !27, !29, !30, !31} +!25 = !{i32 0, %"class.Texture2DMS, 0>"* @"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A", !"Tex2dMs", i32 0, i32 2, i32 1, i32 3, i32 0, !26} +!26 = !{i32 0, i32 5} +!27 = !{i32 1, %"class.Texture1D >"* @"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A", !"Tex1d", i32 0, i32 3, i32 1, i32 1, i32 0, !28} +!28 = !{i32 0, i32 9} +!29 = !{i32 2, %"class.Texture2D >"* @"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A", !"Tex2d", i32 0, i32 4, i32 1, i32 2, i32 0, !28} +!30 = !{i32 3, %"class.Texture3D >"* @"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A", !"Tex3d", i32 0, i32 5, i32 1, i32 4, i32 0, !28} +!31 = !{i32 4, %"class.Texture2DArray >"* @"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A", !"Tex2dArr", i32 0, i32 6, i32 1, i32 7, i32 0, !28} +!32 = !{!33, !34} +!33 = !{i32 0, %"class.RWBuffer >"* @"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A", !"TyBuf", i32 0, i32 1, i32 1, i32 10, i1 false, i1 false, i1 false, !26} +!34 = !{i32 1, %"class.RWBuffer >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A", !"OutBuf", i32 0, i32 7, i32 1, i32 10, i1 false, i1 false, i1 false, !28} +!35 = !{void (i32, <2 x i32>, <3 x i32>, <4 x i32>)* @main, i32 1} +!36 = !{i32 64} +!37 = !{i32 -1} From 9ba9689e22c6861279bef0463baf75adac4aec18 Mon Sep 17 00:00:00 2001 From: Junda Liu Date: Tue, 18 Mar 2025 22:42:34 +0800 Subject: [PATCH 30/88] [SPIR-V] Set RValue for the result of bitfield extract emulation (#7200) Otherwise, the result of bitfield extract emulation is treated as LValue and may have an extra OpLoad generated. --- tools/clang/lib/SPIRV/SpirvBuilder.cpp | 2 ++ ...p.struct.access.bitfield.sized.rvalue.hlsl | 22 +++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 tools/clang/test/CodeGenSPIRV/op.struct.access.bitfield.sized.rvalue.hlsl diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp index b1e7388f16..1275e2b252 100644 --- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp +++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp @@ -994,6 +994,8 @@ SpirvInstruction *SpirvBuilder::createEmulatedBitFieldExtract( rightShift->setResultType(baseType); } + rightShift->setRValue(true); + return rightShift; } diff --git a/tools/clang/test/CodeGenSPIRV/op.struct.access.bitfield.sized.rvalue.hlsl b/tools/clang/test/CodeGenSPIRV/op.struct.access.bitfield.sized.rvalue.hlsl new file mode 100644 index 0000000000..414d8a638c --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/op.struct.access.bitfield.sized.rvalue.hlsl @@ -0,0 +1,22 @@ +// RUN: %dxc -T cs_6_2 -E main -spirv -fcgl -enable-16bit-types %s | FileCheck %s + +struct S1 +{ + uint16_t a : 8; +}; + +S1 foo() +{ + return (S1)0; +} + +[numthreads(1, 1, 1)] +void main() { + uint16_t test = foo().a; +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Function_ushort %temp_var_S1 %int_0 +// CHECK: [[raw:%[0-9]+]] = OpLoad %ushort [[ptr]] +// CHECK: [[tmp:%[0-9]+]] = OpShiftLeftLogical %ushort [[raw]] %uint_8 +// CHECK: [[out:%[0-9]+]] = OpShiftRightLogical %ushort [[tmp]] %uint_8 +// CHECK-NOT: OpLoad %ushort [[out]] +// CHECK: OpStore %test [[out]] +} From 503ef3c2bc198b1e844da53c117402d45302cdd1 Mon Sep 17 00:00:00 2001 From: Chris B Date: Tue, 18 Mar 2025 10:25:04 -0500 Subject: [PATCH 31/88] Switch from tj-actions/changed-files to step-security/changed-files (#7217) Aligning with upstream LLVM's action definition. --- .github/workflows/clang-format-checker.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/clang-format-checker.yml b/.github/workflows/clang-format-checker.yml index 7e39a5b0be..1c69d6de86 100644 --- a/.github/workflows/clang-format-checker.yml +++ b/.github/workflows/clang-format-checker.yml @@ -19,10 +19,10 @@ jobs: - name: Get changed files id: changed-files - uses: tj-actions/changed-files@v41 + uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1 with: separator: "," - fetch_depth: 100 # Fetches only the last 10 commits + skip_initial_fetch: true - name: "Listed files" env: From 3ddf29bd4384cd2b81a6b04c71ca9e8f3160714f Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Tue, 18 Mar 2025 10:13:17 -0700 Subject: [PATCH 32/88] Disallow swizzling on long vectors (#7215) This PR addresses https://github.com/microsoft/DirectXShaderCompiler/issues/7194 by adding a new error string in DiagnosticSemaKinds.td and emitting it in SemaHLSL.cpp. @pow2clk implemented most of this in his [fork](https://github.com/microsoft/DirectXShaderCompiler/commit/a41e0a69db6fd072ffe8f1c811bf3dadcc2ab8fe). I'm just helping to finish it. **How verified:** 1. Several new test cases were added and verified locally 2. Ran hcttest locally --- .../clang/Basic/DiagnosticSemaKinds.td | 2 ++ tools/clang/lib/Sema/SemaHLSL.cpp | 3 +++ .../hlsl/types/invalid-longvec-swizzle.hlsl | 27 +++++++++++++++++++ 3 files changed, 32 insertions(+) create mode 100644 tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index b8a772b3a8..16ff7777a7 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7549,6 +7549,8 @@ def err_hlsl_vector_element_index_out_of_bounds: Error< "vector element index '%0' is out of bounds">; def err_hlsl_vector_member_too_many_positions: Error< "more than four positions are referenced in '%0'">; +def err_hlsl_vector_member_on_long_vector: Error< + "invalid swizzle '%0' on vector of over 4 elements.">; def err_hlsl_missing_type_specifier : Error< // Patterened after err_missing_type_specifier "HLSL requires a type specifier for all declarations">; def err_hlsl_multiple_concrete_bases : Error< diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 031e49408f..66cbea12ce 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -8643,6 +8643,9 @@ ExprResult HLSLExternalSource::LookupVectorMemberExprForHLSL( llvm_unreachable("Unknown VectorMemberAccessError value"); } + if (colCount > 4) + msg = diag::err_hlsl_vector_member_on_long_vector; + if (msg != 0) { m_sema->Diag(MemberLoc, msg) << memberText; diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl new file mode 100644 index 0000000000..28b4a52158 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl @@ -0,0 +1,27 @@ +// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=float +// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=bool +// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=uint64_t +// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=double +// RUN: %dxc -Tlib_6_9 -verify %s -enable-16bit-types -DTYPE=float16_t +// RUN: %dxc -Tlib_6_9 -verify %s -enable-16bit-types -DTYPE=int16_t + +export +vector doit(vector vec5) { + vec5.x = 1; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}} + return vec5.xyw; // expected-error {{invalid swizzle 'xyw' on vector of over 4 elements.}} +} + +export +TYPE arr_to_vec(TYPE arr[5]) { + + TYPE val = (vector(arr, 1)).x; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}} + + TYPE val2 = ((vector)arr).x; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}} + + return val; +} + +export TYPE lv_ctor(TYPE s) { + TYPE ret = (vector(1, 2, 3, 4, 5, s)).x; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}} + return ret; +} \ No newline at end of file From 6475f98147604c315b81302f324f779442a00cd2 Mon Sep 17 00:00:00 2001 From: Chris B Date: Tue, 18 Mar 2025 15:23:45 -0500 Subject: [PATCH 33/88] Actually fix the changed-files workflow (#7226) This time I actually tested the workflow over on this PR: https://github.com/llvm-beanz/DirectXShaderCompiler/pull/6 --- .github/workflows/clang-format-checker.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/clang-format-checker.yml b/.github/workflows/clang-format-checker.yml index 1c69d6de86..d1887e4519 100644 --- a/.github/workflows/clang-format-checker.yml +++ b/.github/workflows/clang-format-checker.yml @@ -13,9 +13,16 @@ jobs: pull-requests: write steps: - name: Fetch LLVM sources - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: - fetch-depth: 2 + ref: ${{ github.event.pull_request.head.sha }} + + - name: Checkout through merge base + uses: rmacklin/fetch-through-merge-base@bfe4d03a86f9afa52bc1a70e9814fc92a07f7b75 # v0.3.0 + with: + base_ref: ${{ github.event.pull_request.base.ref }} + head_ref: ${{ github.event.pull_request.head.sha }} + deepen_length: 500 - name: Get changed files id: changed-files From 454bbf480805cae25173159465eb4769422dee5b Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Wed, 19 Mar 2025 10:20:37 -0400 Subject: [PATCH 34/88] Fix typo in SPIR-V.rst (#7224) Fixes https://github.com/microsoft/DirectXShaderCompiler/issues/7176 --- docs/SPIR-V.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst index 072a2fe9c1..9a8150a0e8 100644 --- a/docs/SPIR-V.rst +++ b/docs/SPIR-V.rst @@ -312,7 +312,7 @@ Supported extensions * SPV_NV_mesh_shader * SPV_KHR_ray_query * SPV_EXT_shader_image_int64 -* SPV_KHR_fragment_shading_barycentric +* SPV_KHR_fragment_shader_barycentric * SPV_KHR_physical_storage_buffer * SPV_KHR_vulkan_memory_model * SPV_NV_compute_shader_derivatives From 6701eeddd5c759a277ee40329ea746f4984748b1 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Wed, 19 Mar 2025 13:01:38 -0400 Subject: [PATCH 35/88] [SPIRV] Handle a cast to void (#7227) Fixes https://github.com/microsoft/DirectXShaderCompiler/issues/7134 --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 6 ++++-- .../clang/test/CodeGenSPIRV/cast.to.void.hlsl | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/cast.to.void.hlsl diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 3a67257da7..3aaa91d50a 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -3657,14 +3657,16 @@ SpirvInstruction *SpirvEmitter::doCastExpr(const CastExpr *expr, emitError("implicit cast kind '%0' unimplemented", expr->getExprLoc()) << expr->getCastKindName() << expr->getSourceRange(); expr->dump(); - return 0; + return nullptr; } } + case CastKind::CK_ToVoid: + return nullptr; default: emitError("implicit cast kind '%0' unimplemented", expr->getExprLoc()) << expr->getCastKindName() << expr->getSourceRange(); expr->dump(); - return 0; + return nullptr; } } diff --git a/tools/clang/test/CodeGenSPIRV/cast.to.void.hlsl b/tools/clang/test/CodeGenSPIRV/cast.to.void.hlsl new file mode 100644 index 0000000000..19a37d071c --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/cast.to.void.hlsl @@ -0,0 +1,18 @@ +// RUN: %dxc dxc -T cs_6_6 -E Main -spirv %s -fcgl | FileCheck %s + + +// Make sure no code is generated for the cast to void. + +// CHECK: %src_Main = OpFunction %void None +// CHECK-NEXT: OpLabel +// CHECK-NEXT: %x = OpVariable +// CHECK-NEXT: OpStore %x %false +// CHECK-NEXT: OpReturn +// CHECK-NEXT: OpFunctionEnd + +[numthreads(1, 1, 1)] +void Main() +{ + bool x = false; + (void)x; +} From 0958e064380f7a450974c09dd6ea6e77ce10a523 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Wed, 19 Mar 2025 13:03:13 -0400 Subject: [PATCH 36/88] [SPIRV] Don't assume entry points are at the start of the worklist. (#7225) Fixes https://github.com/microsoft/DirectXShaderCompiler/issues/7161 --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 22 ++++++++----------- .../lib.fn.export.with.entrypoint.hlsl | 19 ++++++++++++++++ 2 files changed, 28 insertions(+), 13 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/lib.fn.export.with.entrypoint.hlsl diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 3aaa91d50a..d858e2caca 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -809,21 +809,17 @@ void SpirvEmitter::HandleTranslationUnit(ASTContext &context) { spvBuilder.setMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450); - // Even though the 'workQueue' grows due to the above loop, the first - // 'numEntryPoints' entries in the 'workQueue' are the ones with the HLSL - // 'shader' attribute, and must therefore be entry functions. - assert(numEntryPoints <= workQueue.size()); - - for (uint32_t i = 0; i < numEntryPoints; ++i) { + for (uint32_t i = 0; i < workQueue.size(); ++i) { // TODO: assign specific StageVars w.r.t. to entry point const FunctionInfo *entryInfo = workQueue[i]; - assert(entryInfo->isEntryFunction); - spvBuilder.addEntryPoint( - getSpirvShaderStage( - entryInfo->shaderModelKind, - featureManager.isExtensionEnabled(Extension::EXT_mesh_shader)), - entryInfo->entryFunction, getEntryPointName(entryInfo), - getInterfacesForEntryPoint(entryInfo->entryFunction)); + if (entryInfo->isEntryFunction) { + spvBuilder.addEntryPoint( + getSpirvShaderStage( + entryInfo->shaderModelKind, + featureManager.isExtensionEnabled(Extension::EXT_mesh_shader)), + entryInfo->entryFunction, getEntryPointName(entryInfo), + getInterfacesForEntryPoint(entryInfo->entryFunction)); + } } // Add Location decorations to stage input/output variables. diff --git a/tools/clang/test/CodeGenSPIRV/lib.fn.export.with.entrypoint.hlsl b/tools/clang/test/CodeGenSPIRV/lib.fn.export.with.entrypoint.hlsl new file mode 100644 index 0000000000..0ab965aded --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/lib.fn.export.with.entrypoint.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -T lib_6_6 -E main -fspv-target-env=universal1.5 -fcgl %s -spirv | FileCheck %s + +// CHECK: OpEntryPoint MissKHR %miss "miss" %payload +// CHECK: OpDecorate %func LinkageAttributes "func" Export + + +struct RayPayload +{ + uint a; +}; + +export void func() +{ +} + +[shader("miss")] +void miss(inout RayPayload payload) +{ +} From b2bcf21a62566fed959a9091abb6ace4751071f2 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Wed, 19 Mar 2025 17:07:57 -0400 Subject: [PATCH 37/88] Revert "[SPIRV] Use copy-in/copy-out for non-declaration (#7127)" (#7223) This did not solve all of the cases for the issue it was fixing. A new fix was done in the inliner in spirv-opt. This change is no longer needed. This reverts commit 8967dacb03f1d95fc0292aa7a2e48b0acf50dcd9. --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 8 +------ .../cs.groupshared.function-param.out.hlsl | 6 +---- .../CodeGenSPIRV/fn.fixfuncall-compute.hlsl | 10 ++++---- .../CodeGenSPIRV/fn.fixfuncall-linkage.hlsl | 8 +++---- .../fn.param.inout.storage-class.hlsl | 9 +++---- .../CodeGenSPIRV/fn.param.inout.vector.hlsl | 8 +------ .../CodeGenSPIRV/fn.param.isomorphism.hlsl | 24 +++++-------------- 7 files changed, 21 insertions(+), 52 deletions(-) diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index d858e2caca..557768f59a 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -3100,12 +3100,6 @@ SpirvInstruction *SpirvEmitter::processCall(const CallExpr *callExpr) { argInfo && argInfo->getStorageClass() != spv::StorageClass::Function && isResourceType(paramType); - // HLSL requires that the parameters be copied in and out from temporaries. - // This looks for cases where the copy can be elided. To generate valid - // SPIR-V, the argument must be a memory declaration. - // - // - // If argInfo is nullptr and argInst is a rvalue, we do not have a proper // pointer to pass to the function. we need a temporary variable in that // case. @@ -3114,7 +3108,7 @@ SpirvInstruction *SpirvEmitter::processCall(const CallExpr *callExpr) { // create a temporary variable for it because the function definition // expects are point-to-pointer argument for resources, which will be // resolved by legalization. - if ((argInfo || (argInst && argInst->getopcode() == spv::Op::OpVariable)) && + if ((argInfo || (argInst && !argInst->isRValue())) && canActAsOutParmVar(param) && !isArgGlobalVarWithResourceType && paramTypeMatchesArgType(paramType, arg->getType())) { // Based on SPIR-V spec, function parameter must be always Function diff --git a/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl b/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl index 3ec0ad447e..8d0195d672 100644 --- a/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl +++ b/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl @@ -28,14 +28,10 @@ groupshared S D; [numthreads(1,1,1)] void main() { // CHECK: %E = OpVariable %_ptr_Function_int Function -// CHECK-NEXT: [[TempVar:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function - int E; // CHECK: [[A:%[0-9]+]] = OpAccessChain %_ptr_Uniform_int %A %int_0 %uint_0 -// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %int [[A]] -// CHECK-NEXT: OpStore [[TempVar]] [[ld]] -// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %foo [[TempVar]] %B %C %D %E +// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %foo [[A]] %B %C %D %E foo(A[0], B, C, D, E); A[0] = A[0] | B | C | D.a | E; } diff --git a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl index 70bf50abc6..dba7cd00ce 100644 --- a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl +++ b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl @@ -7,19 +7,19 @@ float4 foo(inout float f0, inout int f1) return 0; } -// CHECK-DAG: [[s39:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function -// CHECK-DAG: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function +// CHECK: [[s39:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function +// CHECK: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function // CHECK: [[s33:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Uniform_float {{%[a-zA-Z0-9_]+}} %int_0 +// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int {{%[a-zA-Z0-9_]+}} %int_1 // CHECK: [[s37:%[a-zA-Z0-9_]+]] = OpLoad %float [[s33]] // CHECK: OpStore [[s36]] [[s37]] -// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int {{%[a-zA-Z0-9_]+}} %int_1 // CHECK: [[s40:%[a-zA-Z0-9_]+]] = OpLoad %int [[s34]] // CHECK: OpStore [[s39]] [[s40]] // CHECK: {{%[a-zA-Z0-9_]+}} = OpFunctionCall %v4float %foo [[s36]] [[s39]] -// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]] -// CHECK: OpStore [[s33]] [[s38]] // CHECK: [[s41:%[a-zA-Z0-9_]+]] = OpLoad %int [[s39]] // CHECK: OpStore [[s34]] [[s41]] +// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]] +// CHECK: OpStore [[s33]] [[s38]] struct Stru { int x; diff --git a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl index 6acd104aa3..5977fc454a 100644 --- a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl +++ b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl @@ -6,19 +6,19 @@ RWStructuredBuffer< float4 > output : register(u1); // CHECK: OpDecorate %main LinkageAttributes "main" Export // CHECK: %main = OpFunction %int None -// CHECK: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function // CHECK: [[s39:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function +// CHECK: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function // CHECK: [[s33:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_StorageBuffer_float {{%[a-zA-Z0-9_]+}} %int_0 +// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int %stru %int_1 // CHECK: [[s37:%[a-zA-Z0-9_]+]] = OpLoad %float [[s33]] // CHECK: OpStore [[s36]] [[s37]] -// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int %stru %int_1 // CHECK: [[s40:%[a-zA-Z0-9_]+]] = OpLoad %int [[s34]] // CHECK: OpStore [[s39]] [[s40]] // CHECK: {{%[a-zA-Z0-9_]+}} = OpFunctionCall %void %func [[s36]] [[s39]] -// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]] -// CHECK: OpStore [[s33]] [[s38]] // CHECK: [[s41:%[a-zA-Z0-9_]+]] = OpLoad %int [[s39]] // CHECK: OpStore [[s34]] [[s41]] +// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]] +// CHECK: OpStore [[s33]] [[s38]] [noinline] void func(inout float f0, inout int f1) { diff --git a/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl b/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl index 4d75d27fa8..d0e771e834 100644 --- a/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl +++ b/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl @@ -11,13 +11,10 @@ void main(float input : INPUT) { // CHECK: %param_var_a = OpVariable %_ptr_Function_float Function // CHECK: [[val:%[0-9]+]] = OpLoad %float %input -// CHECK: OpStore %param_var_a [[val]] +// CHECK: OpStore %param_var_a [[val]] // CHECK: [[p0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_float %Data %int_0 %uint_0 -// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %float [[p0]] -// CHECK-NEXT: OpStore [[temp0:%[a-zA-Z0-9_]+]] [[ld]] // CHECK: [[p1:%[0-9]+]] = OpAccessChain %_ptr_Uniform_float %Data %int_0 %uint_1 -// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %float %32 -// CHECK-NEXT: OpStore [[temp1:%[a-zA-Z0-9_]+]] [[ld]] -// CHECK: OpFunctionCall %void %foo %param_var_a [[temp0]] [[temp1]] + +// CHECK: OpFunctionCall %void %foo %param_var_a [[p0]] [[p1]] foo(input, Data[0], Data[1]); } diff --git a/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl b/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl index 5641923aaa..bda2183057 100644 --- a/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl +++ b/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl @@ -18,9 +18,7 @@ float4 main() : C { float4 val; // CHECK: [[z_ptr:%[0-9]+]] = OpAccessChain %_ptr_Function_float %val %int_2 -// CHECK: [[ld:%[0-9]+]] = OpLoad %float [[z_ptr]] -// CHECK: OpStore %param_var_w [[ld]] -// CHECK: {{%[0-9]+}} = OpFunctionCall %void %bar %val %param_var_y %param_var_z %param_var_w +// CHECK: {{%[0-9]+}} = OpFunctionCall %void %bar %val %param_var_y %param_var_z [[z_ptr]] // CHECK-NEXT: [[y:%[0-9]+]] = OpLoad %v3float %param_var_y // CHECK-NEXT: [[old:%[0-9]+]] = OpLoad %v4float %val // Write to val.zwx: @@ -39,10 +37,6 @@ float4 main() : C { // CHECK-NEXT: [[old_0:%[0-9]+]] = OpLoad %v4float %val // CHECK-NEXT: [[new_0:%[0-9]+]] = OpVectorShuffle %v4float [[old_0]] [[z]] 4 5 2 3 // CHECK-NEXT: OpStore %val [[new_0]] - // Write to val.z: -// CHECK-NEXT: [[new:%[0-9]+]] = OpLoad %float %param_var_w -// CHECK-NEXT: OpStore [[z_ptr]] [[new]] - bar(val, val.zwx, val.xy, val.z); return MyRWBuffer[0]; diff --git a/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl b/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl index 3f890099f5..a4ad925f77 100644 --- a/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl +++ b/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl @@ -62,11 +62,7 @@ void main() { fn.incr(); // CHECK: [[rwsb_0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_R %rwsb %int_0 %uint_0 -// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %R [[rwsb_0]] -// CHECK-NEXT: [[ex:%[0-9]+]] = OpCompositeExtract %int [[ld]] 0 -// CHECK-NEXT: [[v:%[0-9]+]] = OpCompositeConstruct %R_0 [[ex]] -// CHECK-NEXT: OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[v]] -// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr [[TempVar]] +// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr [[rwsb_0]] decr(rwsb[0]); // CHECK: OpFunctionCall %void %decr2 %gs @@ -91,29 +87,21 @@ void main() { fnarr[0].incr(); // CHECK: [[gsarr_0:%[0-9]+]] = OpAccessChain %_ptr_Workgroup_S %gsarr %int_0 -// CHECK: [[ld:%[0-9]+]] = OpLoad %S [[gsarr_0]] -// CHECK: OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[ld]] -// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr2 [[TempVar]] +// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr2 [[gsarr_0]] decr2(gsarr[0]); // CHECK: [[starr_0:%[0-9]+]] = OpAccessChain %_ptr_Private_S %starr %int_0 -// CHECK: [[ld:%[0-9]+]] = OpLoad %S [[starr_0]] -// CHECK: OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[ld]] -// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr2 [[TempVar]] +// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr2 [[starr_0]] decr2(starr[0]); // CHECK: [[fnarr_0:%[0-9]+]] = OpAccessChain %_ptr_Function_S %fnarr %int_0 -// CHECK: [[ld:%[0-9]+]] = OpLoad %S [[fnarr_0]] -// CHECK: OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[ld]] -// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr2 [[TempVar]] +// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr2 [[fnarr_0]] decr2(fnarr[0]); // CHECK: [[arr:%[0-9]+]] = OpAccessChain %_ptr_Function_int %arr %int_0 // CHECK-NEXT: [[arr_0:%[0-9]+]] = OpLoad %int [[arr]] // CHECK-NEXT: [[arr_1:%[0-9]+]] = OpIAdd %int [[arr_0]] %int_1 -// CHECK-NEXT: OpStore [[arr]] [[arr_1]] -// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %int [[arr]] -// CHECK-NEXT: OpStore [[TempVar:%[0-9a-zA-Z_]+]] [[ld]] -// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %int_decr [[TempVar]] +// CHECK-NEXT: OpStore [[arr]] [[arr_1]] +// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %int_decr [[arr]] int_decr(++arr[0]); } From a0932fa0817dcd93f1c527f04cbaec0f282d56c6 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Wed, 19 Mar 2025 14:51:12 -0700 Subject: [PATCH 38/88] Add /bigobj compile option to MSVC build (#7228) When targeting arm64 Debug, this error is detected: `libclang\dxcrewriteunused.cpp(1,1): error C1128: number of sections exceeded object file format limit: compile with /bigobj` This PR adds a compile option for the folder that contains dxcrewriteunused.cpp, so that the limit on the number of sections is increased, and compilation may succeed. --- tools/clang/tools/libclang/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/clang/tools/libclang/CMakeLists.txt b/tools/clang/tools/libclang/CMakeLists.txt index 1ef0c8ecd9..ed49cbaf44 100644 --- a/tools/clang/tools/libclang/CMakeLists.txt +++ b/tools/clang/tools/libclang/CMakeLists.txt @@ -119,6 +119,7 @@ if(MSVC) # Each functions is exported as "dllexport" in include/clang-c. # KB835326 set(LLVM_EXPORTED_SYMBOL_FILE) + add_compile_options(/bigobj) endif() # HLSL Change Starts From eb0234398b7665c4084b71dd1f1f662794128e20 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Thu, 20 Mar 2025 09:12:34 -0700 Subject: [PATCH 39/88] NFC: Make hlsl::IntrinsicOp enum values stable (#7231) This change makes hlsl::IntrinsicOp enum values stable by: - adding hlsl_intrinsic_opcodes.json to capture assigned indices - adds this to the files generated by hctgen - generation assigns new indices after the last index - hlsl::IntrinsicOp enum values have explicit assignments - removes ENABLE_SPIRV_CODEGEN ifdefs around opcode definitions and lowering table entries to keep these stable whether or not the spirv build setting is enabled. Fixes #7230 --- CMakeLists.txt | 2 + include/dxc/HlslIntrinsicOp.h | 730 +++++++++++++------------- lib/HLSL/HLOperationLower.cpp | 8 +- utils/hct/CMakeLists.txt | 3 + utils/hct/hctdb.py | 35 +- utils/hct/hctdb_instrhelp.py | 39 +- utils/hct/hctgen.py | 10 + utils/hct/hlsl_intrinsic_opcodes.json | 363 +++++++++++++ 8 files changed, 801 insertions(+), 389 deletions(-) create mode 100644 utils/hct/CMakeLists.txt create mode 100644 utils/hct/hlsl_intrinsic_opcodes.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f7db99784..74244c1d58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -686,6 +686,8 @@ add_subdirectory(include/dxc) # really depend on anything else in the build it is safe. list(APPEND LLVM_COMMON_DEPENDS HCTGen) +add_subdirectory(utils/hct) + if(EXISTS "${LLVM_MAIN_SRC_DIR}/external") add_subdirectory(external) # SPIRV change endif() diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index fcc9bb11b1..41c72d1a51 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -5,378 +5,366 @@ #pragma once namespace hlsl { enum class IntrinsicOp { - IOP_AcceptHitAndEndSearch, - IOP_AddUint64, - IOP_AllMemoryBarrier, - IOP_AllMemoryBarrierWithGroupSync, - IOP_AllocateRayQuery, - IOP_Barrier, - IOP_CallShader, - IOP_CheckAccessFullyMapped, - IOP_CreateResourceFromHeap, - IOP_D3DCOLORtoUBYTE4, - IOP_DeviceMemoryBarrier, - IOP_DeviceMemoryBarrierWithGroupSync, - IOP_DispatchMesh, - IOP_DispatchRaysDimensions, - IOP_DispatchRaysIndex, - IOP_EvaluateAttributeAtSample, - IOP_EvaluateAttributeCentroid, - IOP_EvaluateAttributeSnapped, - IOP_GeometryIndex, - IOP_GetAttributeAtVertex, - IOP_GetRemainingRecursionLevels, - IOP_GetRenderTargetSampleCount, - IOP_GetRenderTargetSamplePosition, - IOP_GroupMemoryBarrier, - IOP_GroupMemoryBarrierWithGroupSync, - IOP_HitKind, - IOP_IgnoreHit, - IOP_InstanceID, - IOP_InstanceIndex, - IOP_InterlockedAdd, - IOP_InterlockedAnd, - IOP_InterlockedCompareExchange, - IOP_InterlockedCompareExchangeFloatBitwise, - IOP_InterlockedCompareStore, - IOP_InterlockedCompareStoreFloatBitwise, - IOP_InterlockedExchange, - IOP_InterlockedMax, - IOP_InterlockedMin, - IOP_InterlockedOr, - IOP_InterlockedXor, - IOP_IsHelperLane, - IOP_NonUniformResourceIndex, - IOP_ObjectRayDirection, - IOP_ObjectRayOrigin, - IOP_ObjectToWorld, - IOP_ObjectToWorld3x4, - IOP_ObjectToWorld4x3, - IOP_PrimitiveIndex, - IOP_Process2DQuadTessFactorsAvg, - IOP_Process2DQuadTessFactorsMax, - IOP_Process2DQuadTessFactorsMin, - IOP_ProcessIsolineTessFactors, - IOP_ProcessQuadTessFactorsAvg, - IOP_ProcessQuadTessFactorsMax, - IOP_ProcessQuadTessFactorsMin, - IOP_ProcessTriTessFactorsAvg, - IOP_ProcessTriTessFactorsMax, - IOP_ProcessTriTessFactorsMin, - IOP_QuadAll, - IOP_QuadAny, - IOP_QuadReadAcrossDiagonal, - IOP_QuadReadAcrossX, - IOP_QuadReadAcrossY, - IOP_QuadReadLaneAt, - IOP_RayFlags, - IOP_RayTCurrent, - IOP_RayTMin, - IOP_ReportHit, - IOP_SetMeshOutputCounts, - IOP_TraceRay, - IOP_WaveActiveAllEqual, - IOP_WaveActiveAllTrue, - IOP_WaveActiveAnyTrue, - IOP_WaveActiveBallot, - IOP_WaveActiveBitAnd, - IOP_WaveActiveBitOr, - IOP_WaveActiveBitXor, - IOP_WaveActiveCountBits, - IOP_WaveActiveMax, - IOP_WaveActiveMin, - IOP_WaveActiveProduct, - IOP_WaveActiveSum, - IOP_WaveGetLaneCount, - IOP_WaveGetLaneIndex, - IOP_WaveIsFirstLane, - IOP_WaveMatch, - IOP_WaveMultiPrefixBitAnd, - IOP_WaveMultiPrefixBitOr, - IOP_WaveMultiPrefixBitXor, - IOP_WaveMultiPrefixCountBits, - IOP_WaveMultiPrefixProduct, - IOP_WaveMultiPrefixSum, - IOP_WavePrefixCountBits, - IOP_WavePrefixProduct, - IOP_WavePrefixSum, - IOP_WaveReadLaneAt, - IOP_WaveReadLaneFirst, - IOP_WorldRayDirection, - IOP_WorldRayOrigin, - IOP_WorldToObject, - IOP_WorldToObject3x4, - IOP_WorldToObject4x3, - IOP_abort, - IOP_abs, - IOP_acos, - IOP_all, - IOP_and, - IOP_any, - IOP_asdouble, - IOP_asfloat, - IOP_asfloat16, - IOP_asin, - IOP_asint, - IOP_asint16, - IOP_asuint, - IOP_asuint16, - IOP_atan, - IOP_atan2, - IOP_ceil, - IOP_clamp, - IOP_clip, - IOP_cos, - IOP_cosh, - IOP_countbits, - IOP_cross, - IOP_ddx, - IOP_ddx_coarse, - IOP_ddx_fine, - IOP_ddy, - IOP_ddy_coarse, - IOP_ddy_fine, - IOP_degrees, - IOP_determinant, - IOP_distance, - IOP_dot, - IOP_dot2add, - IOP_dot4add_i8packed, - IOP_dot4add_u8packed, - IOP_dst, - IOP_exp, - IOP_exp2, - IOP_f16tof32, - IOP_f32tof16, - IOP_faceforward, - IOP_firstbithigh, - IOP_firstbitlow, - IOP_floor, - IOP_fma, - IOP_fmod, - IOP_frac, - IOP_frexp, - IOP_fwidth, - IOP_isfinite, - IOP_isinf, - IOP_isnan, - IOP_ldexp, - IOP_length, - IOP_lerp, - IOP_lit, - IOP_log, - IOP_log10, - IOP_log2, - IOP_mad, - IOP_max, - IOP_min, - IOP_modf, - IOP_msad4, - IOP_mul, - IOP_normalize, - IOP_or, - IOP_pack_clamp_s8, - IOP_pack_clamp_u8, - IOP_pack_s8, - IOP_pack_u8, - IOP_pow, - IOP_printf, - IOP_radians, - IOP_rcp, - IOP_reflect, - IOP_refract, - IOP_reversebits, - IOP_round, - IOP_rsqrt, - IOP_saturate, - IOP_select, - IOP_sign, - IOP_sin, - IOP_sincos, - IOP_sinh, - IOP_smoothstep, - IOP_source_mark, - IOP_sqrt, - IOP_step, - IOP_tan, - IOP_tanh, - IOP_tex1D, - IOP_tex1Dbias, - IOP_tex1Dgrad, - IOP_tex1Dlod, - IOP_tex1Dproj, - IOP_tex2D, - IOP_tex2Dbias, - IOP_tex2Dgrad, - IOP_tex2Dlod, - IOP_tex2Dproj, - IOP_tex3D, - IOP_tex3Dbias, - IOP_tex3Dgrad, - IOP_tex3Dlod, - IOP_tex3Dproj, - IOP_texCUBE, - IOP_texCUBEbias, - IOP_texCUBEgrad, - IOP_texCUBElod, - IOP_texCUBEproj, - IOP_transpose, - IOP_trunc, - IOP_unpack_s8s16, - IOP_unpack_s8s32, - IOP_unpack_u8u16, - IOP_unpack_u8u32, -#ifdef ENABLE_SPIRV_CODEGEN - IOP_VkRawBufferLoad, -#endif // ENABLE_SPIRV_CODEGEN -#ifdef ENABLE_SPIRV_CODEGEN - IOP_VkRawBufferStore, -#endif // ENABLE_SPIRV_CODEGEN -#ifdef ENABLE_SPIRV_CODEGEN - IOP_VkReadClock, -#endif // ENABLE_SPIRV_CODEGEN -#ifdef ENABLE_SPIRV_CODEGEN - IOP_Vkext_execution_mode, -#endif // ENABLE_SPIRV_CODEGEN -#ifdef ENABLE_SPIRV_CODEGEN - IOP_Vkext_execution_mode_id, -#endif // ENABLE_SPIRV_CODEGEN - MOP_Append, - MOP_RestartStrip, - MOP_CalculateLevelOfDetail, - MOP_CalculateLevelOfDetailUnclamped, - MOP_GetDimensions, - MOP_Load, - MOP_Sample, - MOP_SampleBias, - MOP_SampleCmp, - MOP_SampleCmpBias, - MOP_SampleCmpGrad, - MOP_SampleCmpLevel, - MOP_SampleCmpLevelZero, - MOP_SampleGrad, - MOP_SampleLevel, - MOP_Gather, - MOP_GatherAlpha, - MOP_GatherBlue, - MOP_GatherCmp, - MOP_GatherCmpAlpha, - MOP_GatherCmpBlue, - MOP_GatherCmpGreen, - MOP_GatherCmpRed, - MOP_GatherGreen, - MOP_GatherRaw, - MOP_GatherRed, - MOP_GetSamplePosition, - MOP_Load2, - MOP_Load3, - MOP_Load4, - MOP_InterlockedAdd, - MOP_InterlockedAdd64, - MOP_InterlockedAnd, - MOP_InterlockedAnd64, - MOP_InterlockedCompareExchange, - MOP_InterlockedCompareExchange64, - MOP_InterlockedCompareExchangeFloatBitwise, - MOP_InterlockedCompareStore, - MOP_InterlockedCompareStore64, - MOP_InterlockedCompareStoreFloatBitwise, - MOP_InterlockedExchange, - MOP_InterlockedExchange64, - MOP_InterlockedExchangeFloat, - MOP_InterlockedMax, - MOP_InterlockedMax64, - MOP_InterlockedMin, - MOP_InterlockedMin64, - MOP_InterlockedOr, - MOP_InterlockedOr64, - MOP_InterlockedXor, - MOP_InterlockedXor64, - MOP_Store, - MOP_Store2, - MOP_Store3, - MOP_Store4, - MOP_DecrementCounter, - MOP_IncrementCounter, - MOP_Consume, - MOP_WriteSamplerFeedback, - MOP_WriteSamplerFeedbackBias, - MOP_WriteSamplerFeedbackGrad, - MOP_WriteSamplerFeedbackLevel, - MOP_Abort, - MOP_CandidateGeometryIndex, - MOP_CandidateInstanceContributionToHitGroupIndex, - MOP_CandidateInstanceID, - MOP_CandidateInstanceIndex, - MOP_CandidateObjectRayDirection, - MOP_CandidateObjectRayOrigin, - MOP_CandidateObjectToWorld3x4, - MOP_CandidateObjectToWorld4x3, - MOP_CandidatePrimitiveIndex, - MOP_CandidateProceduralPrimitiveNonOpaque, - MOP_CandidateTriangleBarycentrics, - MOP_CandidateTriangleFrontFace, - MOP_CandidateTriangleRayT, - MOP_CandidateType, - MOP_CandidateWorldToObject3x4, - MOP_CandidateWorldToObject4x3, - MOP_CommitNonOpaqueTriangleHit, - MOP_CommitProceduralPrimitiveHit, - MOP_CommittedGeometryIndex, - MOP_CommittedInstanceContributionToHitGroupIndex, - MOP_CommittedInstanceID, - MOP_CommittedInstanceIndex, - MOP_CommittedObjectRayDirection, - MOP_CommittedObjectRayOrigin, - MOP_CommittedObjectToWorld3x4, - MOP_CommittedObjectToWorld4x3, - MOP_CommittedPrimitiveIndex, - MOP_CommittedRayT, - MOP_CommittedStatus, - MOP_CommittedTriangleBarycentrics, - MOP_CommittedTriangleFrontFace, - MOP_CommittedWorldToObject3x4, - MOP_CommittedWorldToObject4x3, - MOP_Proceed, - MOP_RayFlags, - MOP_RayTMin, - MOP_TraceRayInline, - MOP_WorldRayDirection, - MOP_WorldRayOrigin, - MOP_Count, - MOP_FinishedCrossGroupSharing, - MOP_GetGroupNodeOutputRecords, - MOP_GetThreadNodeOutputRecords, - MOP_IsValid, - MOP_GroupIncrementOutputCount, - MOP_ThreadIncrementOutputCount, - MOP_OutputComplete, -#ifdef ENABLE_SPIRV_CODEGEN - MOP_SubpassLoad, -#endif // ENABLE_SPIRV_CODEGEN + IOP_AcceptHitAndEndSearch = 0, + IOP_AddUint64 = 1, + IOP_AllMemoryBarrier = 2, + IOP_AllMemoryBarrierWithGroupSync = 3, + IOP_AllocateRayQuery = 4, + IOP_Barrier = 5, + IOP_CallShader = 6, + IOP_CheckAccessFullyMapped = 7, + IOP_CreateResourceFromHeap = 8, + IOP_D3DCOLORtoUBYTE4 = 9, + IOP_DeviceMemoryBarrier = 10, + IOP_DeviceMemoryBarrierWithGroupSync = 11, + IOP_DispatchMesh = 12, + IOP_DispatchRaysDimensions = 13, + IOP_DispatchRaysIndex = 14, + IOP_EvaluateAttributeAtSample = 15, + IOP_EvaluateAttributeCentroid = 16, + IOP_EvaluateAttributeSnapped = 17, + IOP_GeometryIndex = 18, + IOP_GetAttributeAtVertex = 19, + IOP_GetRemainingRecursionLevels = 20, + IOP_GetRenderTargetSampleCount = 21, + IOP_GetRenderTargetSamplePosition = 22, + IOP_GroupMemoryBarrier = 23, + IOP_GroupMemoryBarrierWithGroupSync = 24, + IOP_HitKind = 25, + IOP_IgnoreHit = 26, + IOP_InstanceID = 27, + IOP_InstanceIndex = 28, + IOP_InterlockedAdd = 29, + IOP_InterlockedAnd = 30, + IOP_InterlockedCompareExchange = 31, + IOP_InterlockedCompareExchangeFloatBitwise = 32, + IOP_InterlockedCompareStore = 33, + IOP_InterlockedCompareStoreFloatBitwise = 34, + IOP_InterlockedExchange = 35, + IOP_InterlockedMax = 36, + IOP_InterlockedMin = 37, + IOP_InterlockedOr = 38, + IOP_InterlockedXor = 39, + IOP_IsHelperLane = 40, + IOP_NonUniformResourceIndex = 41, + IOP_ObjectRayDirection = 42, + IOP_ObjectRayOrigin = 43, + IOP_ObjectToWorld = 44, + IOP_ObjectToWorld3x4 = 45, + IOP_ObjectToWorld4x3 = 46, + IOP_PrimitiveIndex = 47, + IOP_Process2DQuadTessFactorsAvg = 48, + IOP_Process2DQuadTessFactorsMax = 49, + IOP_Process2DQuadTessFactorsMin = 50, + IOP_ProcessIsolineTessFactors = 51, + IOP_ProcessQuadTessFactorsAvg = 52, + IOP_ProcessQuadTessFactorsMax = 53, + IOP_ProcessQuadTessFactorsMin = 54, + IOP_ProcessTriTessFactorsAvg = 55, + IOP_ProcessTriTessFactorsMax = 56, + IOP_ProcessTriTessFactorsMin = 57, + IOP_QuadAll = 58, + IOP_QuadAny = 59, + IOP_QuadReadAcrossDiagonal = 60, + IOP_QuadReadAcrossX = 61, + IOP_QuadReadAcrossY = 62, + IOP_QuadReadLaneAt = 63, + IOP_RayFlags = 64, + IOP_RayTCurrent = 65, + IOP_RayTMin = 66, + IOP_ReportHit = 67, + IOP_SetMeshOutputCounts = 68, + IOP_TraceRay = 69, + IOP_WaveActiveAllEqual = 70, + IOP_WaveActiveAllTrue = 71, + IOP_WaveActiveAnyTrue = 72, + IOP_WaveActiveBallot = 73, + IOP_WaveActiveBitAnd = 74, + IOP_WaveActiveBitOr = 75, + IOP_WaveActiveBitXor = 76, + IOP_WaveActiveCountBits = 77, + IOP_WaveActiveMax = 78, + IOP_WaveActiveMin = 79, + IOP_WaveActiveProduct = 80, + IOP_WaveActiveSum = 81, + IOP_WaveGetLaneCount = 82, + IOP_WaveGetLaneIndex = 83, + IOP_WaveIsFirstLane = 84, + IOP_WaveMatch = 85, + IOP_WaveMultiPrefixBitAnd = 86, + IOP_WaveMultiPrefixBitOr = 87, + IOP_WaveMultiPrefixBitXor = 88, + IOP_WaveMultiPrefixCountBits = 89, + IOP_WaveMultiPrefixProduct = 90, + IOP_WaveMultiPrefixSum = 91, + IOP_WavePrefixCountBits = 92, + IOP_WavePrefixProduct = 93, + IOP_WavePrefixSum = 94, + IOP_WaveReadLaneAt = 95, + IOP_WaveReadLaneFirst = 96, + IOP_WorldRayDirection = 97, + IOP_WorldRayOrigin = 98, + IOP_WorldToObject = 99, + IOP_WorldToObject3x4 = 100, + IOP_WorldToObject4x3 = 101, + IOP_abort = 102, + IOP_abs = 103, + IOP_acos = 104, + IOP_all = 105, + IOP_and = 106, + IOP_any = 107, + IOP_asdouble = 108, + IOP_asfloat = 109, + IOP_asfloat16 = 110, + IOP_asin = 111, + IOP_asint = 112, + IOP_asint16 = 113, + IOP_asuint = 114, + IOP_asuint16 = 115, + IOP_atan = 116, + IOP_atan2 = 117, + IOP_ceil = 118, + IOP_clamp = 119, + IOP_clip = 120, + IOP_cos = 121, + IOP_cosh = 122, + IOP_countbits = 123, + IOP_cross = 124, + IOP_ddx = 125, + IOP_ddx_coarse = 126, + IOP_ddx_fine = 127, + IOP_ddy = 128, + IOP_ddy_coarse = 129, + IOP_ddy_fine = 130, + IOP_degrees = 131, + IOP_determinant = 132, + IOP_distance = 133, + IOP_dot = 134, + IOP_dot2add = 135, + IOP_dot4add_i8packed = 136, + IOP_dot4add_u8packed = 137, + IOP_dst = 138, + IOP_exp = 139, + IOP_exp2 = 140, + IOP_f16tof32 = 141, + IOP_f32tof16 = 142, + IOP_faceforward = 143, + IOP_firstbithigh = 144, + IOP_firstbitlow = 145, + IOP_floor = 146, + IOP_fma = 147, + IOP_fmod = 148, + IOP_frac = 149, + IOP_frexp = 150, + IOP_fwidth = 151, + IOP_isfinite = 152, + IOP_isinf = 153, + IOP_isnan = 154, + IOP_ldexp = 155, + IOP_length = 156, + IOP_lerp = 157, + IOP_lit = 158, + IOP_log = 159, + IOP_log10 = 160, + IOP_log2 = 161, + IOP_mad = 162, + IOP_max = 163, + IOP_min = 164, + IOP_modf = 165, + IOP_msad4 = 166, + IOP_mul = 167, + IOP_normalize = 168, + IOP_or = 169, + IOP_pack_clamp_s8 = 170, + IOP_pack_clamp_u8 = 171, + IOP_pack_s8 = 172, + IOP_pack_u8 = 173, + IOP_pow = 174, + IOP_printf = 175, + IOP_radians = 176, + IOP_rcp = 177, + IOP_reflect = 178, + IOP_refract = 179, + IOP_reversebits = 180, + IOP_round = 181, + IOP_rsqrt = 182, + IOP_saturate = 183, + IOP_select = 184, + IOP_sign = 185, + IOP_sin = 186, + IOP_sincos = 187, + IOP_sinh = 188, + IOP_smoothstep = 189, + IOP_source_mark = 190, + IOP_sqrt = 191, + IOP_step = 192, + IOP_tan = 193, + IOP_tanh = 194, + IOP_tex1D = 195, + IOP_tex1Dbias = 196, + IOP_tex1Dgrad = 197, + IOP_tex1Dlod = 198, + IOP_tex1Dproj = 199, + IOP_tex2D = 200, + IOP_tex2Dbias = 201, + IOP_tex2Dgrad = 202, + IOP_tex2Dlod = 203, + IOP_tex2Dproj = 204, + IOP_tex3D = 205, + IOP_tex3Dbias = 206, + IOP_tex3Dgrad = 207, + IOP_tex3Dlod = 208, + IOP_tex3Dproj = 209, + IOP_texCUBE = 210, + IOP_texCUBEbias = 211, + IOP_texCUBEgrad = 212, + IOP_texCUBElod = 213, + IOP_texCUBEproj = 214, + IOP_transpose = 215, + IOP_trunc = 216, + IOP_unpack_s8s16 = 217, + IOP_unpack_s8s32 = 218, + IOP_unpack_u8u16 = 219, + IOP_unpack_u8u32 = 220, + IOP_VkRawBufferLoad = 221, + IOP_VkRawBufferStore = 222, + IOP_VkReadClock = 223, + IOP_Vkext_execution_mode = 224, + IOP_Vkext_execution_mode_id = 225, + MOP_Append = 226, + MOP_RestartStrip = 227, + MOP_CalculateLevelOfDetail = 228, + MOP_CalculateLevelOfDetailUnclamped = 229, + MOP_GetDimensions = 230, + MOP_Load = 231, + MOP_Sample = 232, + MOP_SampleBias = 233, + MOP_SampleCmp = 234, + MOP_SampleCmpBias = 235, + MOP_SampleCmpGrad = 236, + MOP_SampleCmpLevel = 237, + MOP_SampleCmpLevelZero = 238, + MOP_SampleGrad = 239, + MOP_SampleLevel = 240, + MOP_Gather = 241, + MOP_GatherAlpha = 242, + MOP_GatherBlue = 243, + MOP_GatherCmp = 244, + MOP_GatherCmpAlpha = 245, + MOP_GatherCmpBlue = 246, + MOP_GatherCmpGreen = 247, + MOP_GatherCmpRed = 248, + MOP_GatherGreen = 249, + MOP_GatherRaw = 250, + MOP_GatherRed = 251, + MOP_GetSamplePosition = 252, + MOP_Load2 = 253, + MOP_Load3 = 254, + MOP_Load4 = 255, + MOP_InterlockedAdd = 256, + MOP_InterlockedAdd64 = 257, + MOP_InterlockedAnd = 258, + MOP_InterlockedAnd64 = 259, + MOP_InterlockedCompareExchange = 260, + MOP_InterlockedCompareExchange64 = 261, + MOP_InterlockedCompareExchangeFloatBitwise = 262, + MOP_InterlockedCompareStore = 263, + MOP_InterlockedCompareStore64 = 264, + MOP_InterlockedCompareStoreFloatBitwise = 265, + MOP_InterlockedExchange = 266, + MOP_InterlockedExchange64 = 267, + MOP_InterlockedExchangeFloat = 268, + MOP_InterlockedMax = 269, + MOP_InterlockedMax64 = 270, + MOP_InterlockedMin = 271, + MOP_InterlockedMin64 = 272, + MOP_InterlockedOr = 273, + MOP_InterlockedOr64 = 274, + MOP_InterlockedXor = 275, + MOP_InterlockedXor64 = 276, + MOP_Store = 277, + MOP_Store2 = 278, + MOP_Store3 = 279, + MOP_Store4 = 280, + MOP_DecrementCounter = 281, + MOP_IncrementCounter = 282, + MOP_Consume = 283, + MOP_WriteSamplerFeedback = 284, + MOP_WriteSamplerFeedbackBias = 285, + MOP_WriteSamplerFeedbackGrad = 286, + MOP_WriteSamplerFeedbackLevel = 287, + MOP_Abort = 288, + MOP_CandidateGeometryIndex = 289, + MOP_CandidateInstanceContributionToHitGroupIndex = 290, + MOP_CandidateInstanceID = 291, + MOP_CandidateInstanceIndex = 292, + MOP_CandidateObjectRayDirection = 293, + MOP_CandidateObjectRayOrigin = 294, + MOP_CandidateObjectToWorld3x4 = 295, + MOP_CandidateObjectToWorld4x3 = 296, + MOP_CandidatePrimitiveIndex = 297, + MOP_CandidateProceduralPrimitiveNonOpaque = 298, + MOP_CandidateTriangleBarycentrics = 299, + MOP_CandidateTriangleFrontFace = 300, + MOP_CandidateTriangleRayT = 301, + MOP_CandidateType = 302, + MOP_CandidateWorldToObject3x4 = 303, + MOP_CandidateWorldToObject4x3 = 304, + MOP_CommitNonOpaqueTriangleHit = 305, + MOP_CommitProceduralPrimitiveHit = 306, + MOP_CommittedGeometryIndex = 307, + MOP_CommittedInstanceContributionToHitGroupIndex = 308, + MOP_CommittedInstanceID = 309, + MOP_CommittedInstanceIndex = 310, + MOP_CommittedObjectRayDirection = 311, + MOP_CommittedObjectRayOrigin = 312, + MOP_CommittedObjectToWorld3x4 = 313, + MOP_CommittedObjectToWorld4x3 = 314, + MOP_CommittedPrimitiveIndex = 315, + MOP_CommittedRayT = 316, + MOP_CommittedStatus = 317, + MOP_CommittedTriangleBarycentrics = 318, + MOP_CommittedTriangleFrontFace = 319, + MOP_CommittedWorldToObject3x4 = 320, + MOP_CommittedWorldToObject4x3 = 321, + MOP_Proceed = 322, + MOP_RayFlags = 323, + MOP_RayTMin = 324, + MOP_TraceRayInline = 325, + MOP_WorldRayDirection = 326, + MOP_WorldRayOrigin = 327, + MOP_Count = 328, + MOP_FinishedCrossGroupSharing = 329, + MOP_GetGroupNodeOutputRecords = 330, + MOP_GetThreadNodeOutputRecords = 331, + MOP_IsValid = 332, + MOP_GroupIncrementOutputCount = 333, + MOP_ThreadIncrementOutputCount = 334, + MOP_OutputComplete = 335, + MOP_SubpassLoad = 336, // unsigned - IOP_InterlockedUMax, - IOP_InterlockedUMin, - IOP_WaveActiveUMax, - IOP_WaveActiveUMin, - IOP_WaveActiveUProduct, - IOP_WaveActiveUSum, - IOP_WaveMultiPrefixUProduct, - IOP_WaveMultiPrefixUSum, - IOP_WavePrefixUProduct, - IOP_WavePrefixUSum, - IOP_uabs, - IOP_uclamp, - IOP_udot, - IOP_ufirstbithigh, - IOP_umad, - IOP_umax, - IOP_umin, - IOP_umul, - IOP_usign, - MOP_InterlockedUMax, - MOP_InterlockedUMin, - Num_Intrinsics, + IOP_InterlockedUMax = 337, + IOP_InterlockedUMin = 338, + IOP_WaveActiveUMax = 339, + IOP_WaveActiveUMin = 340, + IOP_WaveActiveUProduct = 341, + IOP_WaveActiveUSum = 342, + IOP_WaveMultiPrefixUProduct = 343, + IOP_WaveMultiPrefixUSum = 344, + IOP_WavePrefixUProduct = 345, + IOP_WavePrefixUSum = 346, + IOP_uabs = 347, + IOP_uclamp = 348, + IOP_udot = 349, + IOP_ufirstbithigh = 350, + IOP_umad = 351, + IOP_umax = 352, + IOP_umin = 353, + IOP_umul = 354, + IOP_usign = 355, + MOP_InterlockedUMax = 356, + MOP_InterlockedUMin = 357, + Num_Intrinsics = 358, }; inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) { switch (opcode) { diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 9c3ad76b92..80d3af4147 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -6156,7 +6156,6 @@ Value *EmptyLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, } // SPIRV change starts -#ifdef ENABLE_SPIRV_CODEGEN Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, HLOperationLowerHelper &helper, @@ -6166,7 +6165,6 @@ Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP, dxilutil::EmitErrorOnInstruction(CI, "Unsupported Vulkan intrinsic."); return nullptr; } -#endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, @@ -6511,7 +6509,6 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_unpack_s8s32, TranslateUnpack, DXIL::OpCode::Unpack4x8}, {IntrinsicOp::IOP_unpack_u8u16, TranslateUnpack, DXIL::OpCode::Unpack4x8}, {IntrinsicOp::IOP_unpack_u8u32, TranslateUnpack, DXIL::OpCode::Unpack4x8}, -#ifdef ENABLE_SPIRV_CODEGEN {IntrinsicOp::IOP_VkRawBufferLoad, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_VkRawBufferStore, UnsupportedVulkanIntrinsic, @@ -6522,7 +6519,6 @@ IntrinsicLower gLowerTable[] = { DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_Vkext_execution_mode_id, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes}, -#endif // ENABLE_SPIRV_CODEGEN {IntrinsicOp::MOP_Append, StreamOutputLower, DXIL::OpCode::EmitStream}, {IntrinsicOp::MOP_RestartStrip, StreamOutputLower, DXIL::OpCode::CutStream}, {IntrinsicOp::MOP_CalculateLevelOfDetail, TranslateCalculateLOD, @@ -6750,11 +6746,9 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::MOP_OutputComplete, TranslateNodeOutputComplete, DXIL::OpCode::OutputComplete}, -// SPIRV change starts -#ifdef ENABLE_SPIRV_CODEGEN + // SPIRV change starts {IntrinsicOp::MOP_SubpassLoad, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes}, -#endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends // Manually added part. diff --git a/utils/hct/CMakeLists.txt b/utils/hct/CMakeLists.txt new file mode 100644 index 0000000000..41e6b494e6 --- /dev/null +++ b/utils/hct/CMakeLists.txt @@ -0,0 +1,3 @@ +# generate hlsl_intrinsic_opcodes.json to preserve high level intrinsic opcodes +# This uses CODE_TAG because the file exists in the source tree. +add_hlsl_hctgen(HlslIntrinsicOpcodes OUTPUT hlsl_intrinsic_opcodes.json CODE_TAG) diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 1c3fd0f717..6f4611db32 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -8254,6 +8254,8 @@ def __init__( self.vulkanSpecific = ns.startswith( "Vk" ) # Vulkan specific intrinsic - SPIRV change + self.opcode = None # high-level opcode assigned later + self.unsigned_opcode = None # unsigned high-level opcode if appicable class db_hlsl_namespace(object): @@ -8295,11 +8297,10 @@ def __init__( self.template_id_idx = template_id_idx # Template ID numeric value self.component_id_idx = component_id_idx # Component ID numeric value - class db_hlsl(object): "A database of HLSL language data" - def __init__(self, intrinsic_defs): + def __init__(self, intrinsic_defs, opcode_data): self.base_types = { "bool": "LICOMPTYPE_BOOL", "int": "LICOMPTYPE_INT", @@ -8372,6 +8373,13 @@ def __init__(self, intrinsic_defs): self.populate_attributes() self.opcode_namespace = "hlsl::IntrinsicOp" + # Populate opcode data for HLSL intrinsics. + self.opcode_data = opcode_data + # If opcode data is empty, create the default structure. + if not self.opcode_data: + self.opcode_data["IntrinsicOpCodes"] = {"Num_Intrinsics": 0} + self.assign_opcodes() + def create_namespaces(self): last_ns = None self.namespaces = {} @@ -8898,6 +8906,29 @@ def add_attr_arg(title_name, scope, args, doc): ) self.attributes = attributes + # Iterate through all intrinsics, assigning opcodes to each one. + # This uses the opcode_data to preserve already-assigned opcodes. + def assign_opcodes(self): + "Assign opcodes to the intrinsics." + IntrinsicOpDict = self.opcode_data["IntrinsicOpCodes"] + Num_Intrinsics = self.opcode_data["IntrinsicOpCodes"]["Num_Intrinsics"] + + def add_intrinsic(name): + nonlocal Num_Intrinsics + opcode = IntrinsicOpDict.setdefault(name, Num_Intrinsics) + if opcode == Num_Intrinsics: + Num_Intrinsics += 1 + return opcode + + sorted_intrinsics = sorted(self.intrinsics, key=lambda x: x.key) + for i in sorted_intrinsics: + i.opcode = add_intrinsic(i.enum_name) + for i in sorted_intrinsics: + if i.unsigned_op == "": + continue + i.unsigned_opcode = add_intrinsic(i.unsigned_op) + self.opcode_data["IntrinsicOpCodes"]["Num_Intrinsics"] = Num_Intrinsics + if __name__ == "__main__": db = db_dxil() diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py index 353f8f9634..2a0359d274 100644 --- a/utils/hct/hctdb_instrhelp.py +++ b/utils/hct/hctdb_instrhelp.py @@ -18,15 +18,36 @@ def get_db_dxil(): return g_db_dxil -g_db_hlsl = None +# opcode data contains fixed opcode assignments for HLSL intrinsics. +g_hlsl_opcode_data = None + + +def get_hlsl_opcode_data(): + global g_hlsl_opcode_data + if g_hlsl_opcode_data is None: + # Load the intrinsic opcodes from the JSON file. + json_filepath = os.path.join( + os.path.dirname(__file__), "hlsl_intrinsic_opcodes.json" + ) + try: + with open(json_filepath, "r") as file: + g_hlsl_opcode_data = json.load(file) + except FileNotFoundError: + print(f"File not found: {json_filepath}") + except json.JSONDecodeError as e: + print(f"Error decoding JSON from {json_filepath}: {e}") + if not g_hlsl_opcode_data: + g_hlsl_opcode_data = {} + return g_hlsl_opcode_data +g_db_hlsl = None def get_db_hlsl(): global g_db_hlsl if g_db_hlsl is None: thisdir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(thisdir, "gen_intrin_main.txt"), "r") as f: - g_db_hlsl = db_hlsl(f) + g_db_hlsl = db_hlsl(f, get_hlsl_opcode_data()) return g_db_hlsl @@ -1055,22 +1076,22 @@ def wrap_with_ifdef_if_vulkan_specific(intrinsic, text): def enum_hlsl_intrinsics(): db = get_db_hlsl() result = "" - enumed = [] + enumed = set() for i in sorted(db.intrinsics, key=lambda x: x.key): if i.enum_name not in enumed: - enumerant = " %s,\n" % (i.enum_name) - result += wrap_with_ifdef_if_vulkan_specific(i, enumerant) # SPIRV Change - enumed.append(i.enum_name) + result += " %s = %d,\n" % (i.enum_name, i.opcode) + enumed.add(i.enum_name) # unsigned result += " // unsigned\n" for i in sorted(db.intrinsics, key=lambda x: x.key): if i.unsigned_op != "": if i.unsigned_op not in enumed: - result += " %s,\n" % (i.unsigned_op) - enumed.append(i.unsigned_op) + result += " %s = %d,\n" % (i.unsigned_op, i.unsigned_opcode) + enumed.add(i.unsigned_op) - result += " Num_Intrinsics,\n" + Num_Intrinsics = get_hlsl_opcode_data()["IntrinsicOpCodes"]["Num_Intrinsics"] + result += " Num_Intrinsics = %d,\n" % (Num_Intrinsics) return result diff --git a/utils/hct/hctgen.py b/utils/hct/hctgen.py index dbb7e3a745..1421fbfad5 100755 --- a/utils/hct/hctgen.py +++ b/utils/hct/hctgen.py @@ -2,6 +2,7 @@ import argparse from hctdb_instrhelp import * from hctdb import * +import json import sys import os import CodeTags @@ -28,6 +29,7 @@ "DxilCounters", "DxilMetadata", "RDAT_LibraryTypes", + "HlslIntrinsicOpcodes", ], ) parser.add_argument("--output", required=True) @@ -232,6 +234,14 @@ def writeDxilPIXPasses(args): return 0 +def writeHlslIntrinsicOpcodes(args): + out = openOutput(args) + # get_db_hlsl() initializes the hlsl intrinsic database and opcode_data. + get_db_hlsl() + json.dump(get_hlsl_opcode_data(), out, indent=2) + out.write("\n") + return 0 + args = parser.parse_args() if args.force_lf and args.force_crlf: eprint("--force-lf and --force-crlf are mutually exclusive, only pass one") diff --git a/utils/hct/hlsl_intrinsic_opcodes.json b/utils/hct/hlsl_intrinsic_opcodes.json new file mode 100644 index 0000000000..48a0b74c17 --- /dev/null +++ b/utils/hct/hlsl_intrinsic_opcodes.json @@ -0,0 +1,363 @@ +{ + "IntrinsicOpCodes": { + "Num_Intrinsics": 358, + "IOP_AcceptHitAndEndSearch": 0, + "IOP_AddUint64": 1, + "IOP_AllMemoryBarrier": 2, + "IOP_AllMemoryBarrierWithGroupSync": 3, + "IOP_AllocateRayQuery": 4, + "IOP_Barrier": 5, + "IOP_CallShader": 6, + "IOP_CheckAccessFullyMapped": 7, + "IOP_CreateResourceFromHeap": 8, + "IOP_D3DCOLORtoUBYTE4": 9, + "IOP_DeviceMemoryBarrier": 10, + "IOP_DeviceMemoryBarrierWithGroupSync": 11, + "IOP_DispatchMesh": 12, + "IOP_DispatchRaysDimensions": 13, + "IOP_DispatchRaysIndex": 14, + "IOP_EvaluateAttributeAtSample": 15, + "IOP_EvaluateAttributeCentroid": 16, + "IOP_EvaluateAttributeSnapped": 17, + "IOP_GeometryIndex": 18, + "IOP_GetAttributeAtVertex": 19, + "IOP_GetRemainingRecursionLevels": 20, + "IOP_GetRenderTargetSampleCount": 21, + "IOP_GetRenderTargetSamplePosition": 22, + "IOP_GroupMemoryBarrier": 23, + "IOP_GroupMemoryBarrierWithGroupSync": 24, + "IOP_HitKind": 25, + "IOP_IgnoreHit": 26, + "IOP_InstanceID": 27, + "IOP_InstanceIndex": 28, + "IOP_InterlockedAdd": 29, + "IOP_InterlockedAnd": 30, + "IOP_InterlockedCompareExchange": 31, + "IOP_InterlockedCompareExchangeFloatBitwise": 32, + "IOP_InterlockedCompareStore": 33, + "IOP_InterlockedCompareStoreFloatBitwise": 34, + "IOP_InterlockedExchange": 35, + "IOP_InterlockedMax": 36, + "IOP_InterlockedMin": 37, + "IOP_InterlockedOr": 38, + "IOP_InterlockedXor": 39, + "IOP_IsHelperLane": 40, + "IOP_NonUniformResourceIndex": 41, + "IOP_ObjectRayDirection": 42, + "IOP_ObjectRayOrigin": 43, + "IOP_ObjectToWorld": 44, + "IOP_ObjectToWorld3x4": 45, + "IOP_ObjectToWorld4x3": 46, + "IOP_PrimitiveIndex": 47, + "IOP_Process2DQuadTessFactorsAvg": 48, + "IOP_Process2DQuadTessFactorsMax": 49, + "IOP_Process2DQuadTessFactorsMin": 50, + "IOP_ProcessIsolineTessFactors": 51, + "IOP_ProcessQuadTessFactorsAvg": 52, + "IOP_ProcessQuadTessFactorsMax": 53, + "IOP_ProcessQuadTessFactorsMin": 54, + "IOP_ProcessTriTessFactorsAvg": 55, + "IOP_ProcessTriTessFactorsMax": 56, + "IOP_ProcessTriTessFactorsMin": 57, + "IOP_QuadAll": 58, + "IOP_QuadAny": 59, + "IOP_QuadReadAcrossDiagonal": 60, + "IOP_QuadReadAcrossX": 61, + "IOP_QuadReadAcrossY": 62, + "IOP_QuadReadLaneAt": 63, + "IOP_RayFlags": 64, + "IOP_RayTCurrent": 65, + "IOP_RayTMin": 66, + "IOP_ReportHit": 67, + "IOP_SetMeshOutputCounts": 68, + "IOP_TraceRay": 69, + "IOP_WaveActiveAllEqual": 70, + "IOP_WaveActiveAllTrue": 71, + "IOP_WaveActiveAnyTrue": 72, + "IOP_WaveActiveBallot": 73, + "IOP_WaveActiveBitAnd": 74, + "IOP_WaveActiveBitOr": 75, + "IOP_WaveActiveBitXor": 76, + "IOP_WaveActiveCountBits": 77, + "IOP_WaveActiveMax": 78, + "IOP_WaveActiveMin": 79, + "IOP_WaveActiveProduct": 80, + "IOP_WaveActiveSum": 81, + "IOP_WaveGetLaneCount": 82, + "IOP_WaveGetLaneIndex": 83, + "IOP_WaveIsFirstLane": 84, + "IOP_WaveMatch": 85, + "IOP_WaveMultiPrefixBitAnd": 86, + "IOP_WaveMultiPrefixBitOr": 87, + "IOP_WaveMultiPrefixBitXor": 88, + "IOP_WaveMultiPrefixCountBits": 89, + "IOP_WaveMultiPrefixProduct": 90, + "IOP_WaveMultiPrefixSum": 91, + "IOP_WavePrefixCountBits": 92, + "IOP_WavePrefixProduct": 93, + "IOP_WavePrefixSum": 94, + "IOP_WaveReadLaneAt": 95, + "IOP_WaveReadLaneFirst": 96, + "IOP_WorldRayDirection": 97, + "IOP_WorldRayOrigin": 98, + "IOP_WorldToObject": 99, + "IOP_WorldToObject3x4": 100, + "IOP_WorldToObject4x3": 101, + "IOP_abort": 102, + "IOP_abs": 103, + "IOP_acos": 104, + "IOP_all": 105, + "IOP_and": 106, + "IOP_any": 107, + "IOP_asdouble": 108, + "IOP_asfloat": 109, + "IOP_asfloat16": 110, + "IOP_asin": 111, + "IOP_asint": 112, + "IOP_asint16": 113, + "IOP_asuint": 114, + "IOP_asuint16": 115, + "IOP_atan": 116, + "IOP_atan2": 117, + "IOP_ceil": 118, + "IOP_clamp": 119, + "IOP_clip": 120, + "IOP_cos": 121, + "IOP_cosh": 122, + "IOP_countbits": 123, + "IOP_cross": 124, + "IOP_ddx": 125, + "IOP_ddx_coarse": 126, + "IOP_ddx_fine": 127, + "IOP_ddy": 128, + "IOP_ddy_coarse": 129, + "IOP_ddy_fine": 130, + "IOP_degrees": 131, + "IOP_determinant": 132, + "IOP_distance": 133, + "IOP_dot": 134, + "IOP_dot2add": 135, + "IOP_dot4add_i8packed": 136, + "IOP_dot4add_u8packed": 137, + "IOP_dst": 138, + "IOP_exp": 139, + "IOP_exp2": 140, + "IOP_f16tof32": 141, + "IOP_f32tof16": 142, + "IOP_faceforward": 143, + "IOP_firstbithigh": 144, + "IOP_firstbitlow": 145, + "IOP_floor": 146, + "IOP_fma": 147, + "IOP_fmod": 148, + "IOP_frac": 149, + "IOP_frexp": 150, + "IOP_fwidth": 151, + "IOP_isfinite": 152, + "IOP_isinf": 153, + "IOP_isnan": 154, + "IOP_ldexp": 155, + "IOP_length": 156, + "IOP_lerp": 157, + "IOP_lit": 158, + "IOP_log": 159, + "IOP_log10": 160, + "IOP_log2": 161, + "IOP_mad": 162, + "IOP_max": 163, + "IOP_min": 164, + "IOP_modf": 165, + "IOP_msad4": 166, + "IOP_mul": 167, + "IOP_normalize": 168, + "IOP_or": 169, + "IOP_pack_clamp_s8": 170, + "IOP_pack_clamp_u8": 171, + "IOP_pack_s8": 172, + "IOP_pack_u8": 173, + "IOP_pow": 174, + "IOP_printf": 175, + "IOP_radians": 176, + "IOP_rcp": 177, + "IOP_reflect": 178, + "IOP_refract": 179, + "IOP_reversebits": 180, + "IOP_round": 181, + "IOP_rsqrt": 182, + "IOP_saturate": 183, + "IOP_select": 184, + "IOP_sign": 185, + "IOP_sin": 186, + "IOP_sincos": 187, + "IOP_sinh": 188, + "IOP_smoothstep": 189, + "IOP_source_mark": 190, + "IOP_sqrt": 191, + "IOP_step": 192, + "IOP_tan": 193, + "IOP_tanh": 194, + "IOP_tex1D": 195, + "IOP_tex1Dbias": 196, + "IOP_tex1Dgrad": 197, + "IOP_tex1Dlod": 198, + "IOP_tex1Dproj": 199, + "IOP_tex2D": 200, + "IOP_tex2Dbias": 201, + "IOP_tex2Dgrad": 202, + "IOP_tex2Dlod": 203, + "IOP_tex2Dproj": 204, + "IOP_tex3D": 205, + "IOP_tex3Dbias": 206, + "IOP_tex3Dgrad": 207, + "IOP_tex3Dlod": 208, + "IOP_tex3Dproj": 209, + "IOP_texCUBE": 210, + "IOP_texCUBEbias": 211, + "IOP_texCUBEgrad": 212, + "IOP_texCUBElod": 213, + "IOP_texCUBEproj": 214, + "IOP_transpose": 215, + "IOP_trunc": 216, + "IOP_unpack_s8s16": 217, + "IOP_unpack_s8s32": 218, + "IOP_unpack_u8u16": 219, + "IOP_unpack_u8u32": 220, + "IOP_VkRawBufferLoad": 221, + "IOP_VkRawBufferStore": 222, + "IOP_VkReadClock": 223, + "IOP_Vkext_execution_mode": 224, + "IOP_Vkext_execution_mode_id": 225, + "MOP_Append": 226, + "MOP_RestartStrip": 227, + "MOP_CalculateLevelOfDetail": 228, + "MOP_CalculateLevelOfDetailUnclamped": 229, + "MOP_GetDimensions": 230, + "MOP_Load": 231, + "MOP_Sample": 232, + "MOP_SampleBias": 233, + "MOP_SampleCmp": 234, + "MOP_SampleCmpBias": 235, + "MOP_SampleCmpGrad": 236, + "MOP_SampleCmpLevel": 237, + "MOP_SampleCmpLevelZero": 238, + "MOP_SampleGrad": 239, + "MOP_SampleLevel": 240, + "MOP_Gather": 241, + "MOP_GatherAlpha": 242, + "MOP_GatherBlue": 243, + "MOP_GatherCmp": 244, + "MOP_GatherCmpAlpha": 245, + "MOP_GatherCmpBlue": 246, + "MOP_GatherCmpGreen": 247, + "MOP_GatherCmpRed": 248, + "MOP_GatherGreen": 249, + "MOP_GatherRaw": 250, + "MOP_GatherRed": 251, + "MOP_GetSamplePosition": 252, + "MOP_Load2": 253, + "MOP_Load3": 254, + "MOP_Load4": 255, + "MOP_InterlockedAdd": 256, + "MOP_InterlockedAdd64": 257, + "MOP_InterlockedAnd": 258, + "MOP_InterlockedAnd64": 259, + "MOP_InterlockedCompareExchange": 260, + "MOP_InterlockedCompareExchange64": 261, + "MOP_InterlockedCompareExchangeFloatBitwise": 262, + "MOP_InterlockedCompareStore": 263, + "MOP_InterlockedCompareStore64": 264, + "MOP_InterlockedCompareStoreFloatBitwise": 265, + "MOP_InterlockedExchange": 266, + "MOP_InterlockedExchange64": 267, + "MOP_InterlockedExchangeFloat": 268, + "MOP_InterlockedMax": 269, + "MOP_InterlockedMax64": 270, + "MOP_InterlockedMin": 271, + "MOP_InterlockedMin64": 272, + "MOP_InterlockedOr": 273, + "MOP_InterlockedOr64": 274, + "MOP_InterlockedXor": 275, + "MOP_InterlockedXor64": 276, + "MOP_Store": 277, + "MOP_Store2": 278, + "MOP_Store3": 279, + "MOP_Store4": 280, + "MOP_DecrementCounter": 281, + "MOP_IncrementCounter": 282, + "MOP_Consume": 283, + "MOP_WriteSamplerFeedback": 284, + "MOP_WriteSamplerFeedbackBias": 285, + "MOP_WriteSamplerFeedbackGrad": 286, + "MOP_WriteSamplerFeedbackLevel": 287, + "MOP_Abort": 288, + "MOP_CandidateGeometryIndex": 289, + "MOP_CandidateInstanceContributionToHitGroupIndex": 290, + "MOP_CandidateInstanceID": 291, + "MOP_CandidateInstanceIndex": 292, + "MOP_CandidateObjectRayDirection": 293, + "MOP_CandidateObjectRayOrigin": 294, + "MOP_CandidateObjectToWorld3x4": 295, + "MOP_CandidateObjectToWorld4x3": 296, + "MOP_CandidatePrimitiveIndex": 297, + "MOP_CandidateProceduralPrimitiveNonOpaque": 298, + "MOP_CandidateTriangleBarycentrics": 299, + "MOP_CandidateTriangleFrontFace": 300, + "MOP_CandidateTriangleRayT": 301, + "MOP_CandidateType": 302, + "MOP_CandidateWorldToObject3x4": 303, + "MOP_CandidateWorldToObject4x3": 304, + "MOP_CommitNonOpaqueTriangleHit": 305, + "MOP_CommitProceduralPrimitiveHit": 306, + "MOP_CommittedGeometryIndex": 307, + "MOP_CommittedInstanceContributionToHitGroupIndex": 308, + "MOP_CommittedInstanceID": 309, + "MOP_CommittedInstanceIndex": 310, + "MOP_CommittedObjectRayDirection": 311, + "MOP_CommittedObjectRayOrigin": 312, + "MOP_CommittedObjectToWorld3x4": 313, + "MOP_CommittedObjectToWorld4x3": 314, + "MOP_CommittedPrimitiveIndex": 315, + "MOP_CommittedRayT": 316, + "MOP_CommittedStatus": 317, + "MOP_CommittedTriangleBarycentrics": 318, + "MOP_CommittedTriangleFrontFace": 319, + "MOP_CommittedWorldToObject3x4": 320, + "MOP_CommittedWorldToObject4x3": 321, + "MOP_Proceed": 322, + "MOP_RayFlags": 323, + "MOP_RayTMin": 324, + "MOP_TraceRayInline": 325, + "MOP_WorldRayDirection": 326, + "MOP_WorldRayOrigin": 327, + "MOP_Count": 328, + "MOP_FinishedCrossGroupSharing": 329, + "MOP_GetGroupNodeOutputRecords": 330, + "MOP_GetThreadNodeOutputRecords": 331, + "MOP_IsValid": 332, + "MOP_GroupIncrementOutputCount": 333, + "MOP_ThreadIncrementOutputCount": 334, + "MOP_OutputComplete": 335, + "MOP_SubpassLoad": 336, + "IOP_InterlockedUMax": 337, + "IOP_InterlockedUMin": 338, + "IOP_WaveActiveUMax": 339, + "IOP_WaveActiveUMin": 340, + "IOP_WaveActiveUProduct": 341, + "IOP_WaveActiveUSum": 342, + "IOP_WaveMultiPrefixUProduct": 343, + "IOP_WaveMultiPrefixUSum": 344, + "IOP_WavePrefixUProduct": 345, + "IOP_WavePrefixUSum": 346, + "IOP_uabs": 347, + "IOP_uclamp": 348, + "IOP_udot": 349, + "IOP_ufirstbithigh": 350, + "IOP_umad": 351, + "IOP_umax": 352, + "IOP_umin": 353, + "IOP_umul": 354, + "IOP_usign": 355, + "MOP_InterlockedUMax": 356, + "MOP_InterlockedUMin": 357 + } +} From 9e8a698deed37116e5e55cebd0d725c5c2be5e4c Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 20 Mar 2025 10:10:48 -0700 Subject: [PATCH 40/88] Lower RayQuery constructor to allocateRayQuery2 (#7205) This PR connects the front end change to the back end change, by taking the existing rayquery constructor translation, and augmenting it so that allocaterayquery2 can be emitted as an opcode if there are 2 template arguments. It is independent of the shader model. If 2 template args are detected, and the 2nd template argument has a non-zero value, it just emits allocateRayQuery2. A test was added to make sure that when targeting shader model 6.9, using 2 template args where the 2nd arg is non-zero in a rayquery declaration will produce an allocateRayQuery2 opcode. Fixes [#7136](https://github.com/microsoft/DirectXShaderCompiler/issues/7136) --- include/dxc/HLSL/HLOperations.h | 4 + lib/DXIL/DxilShaderFlags.cpp | 1 + lib/HLSL/HLOperationLower.cpp | 20 ++- .../lib/CodeGen/CGHLSLMSFinishCodeGen.cpp | 23 +++- .../objects/RayQuery/allocateRayQuery2.hlsl | 23 ++++ .../Passes/DxilGen/LowerAllocateRayQuery2.ll | 118 ++++++++++++++++++ utils/hct/gen_intrin_main.txt | 4 +- 7 files changed, 183 insertions(+), 10 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/allocateRayQuery2.hlsl create mode 100644 tools/clang/test/DXC/Passes/DxilGen/LowerAllocateRayQuery2.ll diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h index 1ccb7f04a2..f87d324baf 100644 --- a/include/dxc/HLSL/HLOperations.h +++ b/include/dxc/HLSL/HLOperations.h @@ -398,6 +398,10 @@ const unsigned kAnnotateHandleResourceTypeOpIdx = 3; const unsigned kTraceRayRayDescOpIdx = 7; const unsigned kTraceRayPayLoadOpIdx = 8; +// AllocateRayQuery +const unsigned kAllocateRayQueryRayFlagsIdx = 1; +const unsigned kAllocateRayQueryRayQueryFlagsIdx = 2; + // CallShader. const unsigned kCallShaderPayloadOpIdx = 2; diff --git a/lib/DXIL/DxilShaderFlags.cpp b/lib/DXIL/DxilShaderFlags.cpp index 7d0799dc64..993038aaf1 100644 --- a/lib/DXIL/DxilShaderFlags.cpp +++ b/lib/DXIL/DxilShaderFlags.cpp @@ -637,6 +637,7 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F, hasViewID = true; break; case DXIL::OpCode::AllocateRayQuery: + case DXIL::OpCode::AllocateRayQuery2: case DXIL::OpCode::GeometryIndex: hasRaytracingTier1_1 = true; break; diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 80d3af4147..96ebda43ac 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -5670,7 +5670,24 @@ Value *TranslateAllocateRayQuery(CallInst *CI, IntrinsicOp IOP, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; - Value *refArgs[] = {nullptr, CI->getOperand(1)}; + // upgrade to allocateRayQuery2 if there is a non-zero 2nd template arg + DXASSERT(CI->getNumArgOperands() == 3, + "hlopcode for allocaterayquery always expects 3 arguments"); + + llvm::Value *Arg = + CI->getArgOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx); + llvm::ConstantInt *ConstVal = llvm::dyn_cast(Arg); + DXASSERT(ConstVal, + "2nd argument to allocaterayquery must always be a constant value"); + if (ConstVal->getValue().getZExtValue() != 0) { + Value *refArgs[3] = { + nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx), + CI->getOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx)}; + opcode = OP::OpCode::AllocateRayQuery2; + return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP); + } + Value *refArgs[2] = { + nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx)}; return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP); } @@ -5679,7 +5696,6 @@ Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; - Value *opArg = hlslOP->GetU32Const(static_cast(opcode)); Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp]; diff --git a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp index 16f268f102..532ec01458 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp +++ b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp @@ -2795,10 +2795,12 @@ unsigned AlignBufferOffsetInLegacy(unsigned offset, unsigned size, } // Translate RayQuery constructor. From: -// %call = call %"RayQuery" @(%"RayQuery" %ptr) +// %call = call %"RayQuery>" +// @(%"RayQuery" %ptr) // To: -// i32 %handle = AllocateRayQuery(i32 , i32 -// %flags) %gep = GEP %"RayQuery" %ptr, 0, 0 store i32* %gep, i32 +// i32 %handle = AllocateRayQuery2(i32 , i32 +// %flags, i32 %constrayqueryflags <0 if not given>) %gep = GEP +// %"RayQuery" %ptr, 0, 0 store i32* %gep, i32 // %handle ; and replace uses of %call with %ptr void TranslateRayQueryConstructor(HLModule &HLM) { llvm::Module &M = *HLM.GetModule(); @@ -2822,9 +2824,13 @@ void TranslateRayQueryConstructor(HLModule &HLM) { llvm::IntegerType *i32Ty = llvm::Type::getInt32Ty(M.getContext()); llvm::ConstantInt *i32Zero = llvm::ConstantInt::get(i32Ty, (uint64_t)0, false); + + // the third argument will default to 0 if the rayquery constructor doesn't + // have a second template argument llvm::FunctionType *funcTy = - llvm::FunctionType::get(i32Ty, {i32Ty, i32Ty}, false); + llvm::FunctionType::get(i32Ty, {i32Ty, i32Ty, i32Ty}, false); unsigned opcode = (unsigned)IntrinsicOp::IOP_AllocateRayQuery; + llvm::ConstantInt *opVal = llvm::ConstantInt::get(i32Ty, opcode, false); Function *opFunc = GetOrCreateHLFunction(M, funcTy, HLOpcodeGroup::HLIntrinsic, opcode); @@ -2848,8 +2854,13 @@ void TranslateRayQueryConstructor(HLModule &HLM) { llvm::IRBuilder<> Builder(CI); llvm::Value *rayFlags = Builder.getInt32(SA->GetTemplateArgAnnotation(0).GetIntegral()); - llvm::Value *Call = - Builder.CreateCall(opFunc, {opVal, rayFlags}, pThis->getName()); + // the default val of 0 will be assigned if there is no 2nd template arg + llvm::Value *rayQueryFlags = + Builder.getInt32(SA->GetTemplateArgAnnotation(1).GetIntegral()); + + llvm::Value *Call = Builder.CreateCall( + opFunc, {opVal, rayFlags, rayQueryFlags}, pThis->getName()); + llvm::Value *GEP = Builder.CreateInBoundsGEP(pThis, {i32Zero, i32Zero}); Builder.CreateStore(Call, GEP); CI->replaceAllUsesWith(pThis); diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/allocateRayQuery2.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/allocateRayQuery2.hlsl new file mode 100644 index 0000000000..de79a2f481 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/allocateRayQuery2.hlsl @@ -0,0 +1,23 @@ +// REQUIRES: dxil-1-9 +// RUN: %dxc -T lib_6_9 %s | FileCheck %s +// RUN: %dxc -T lib_6_9 -fcgl %s | FileCheck -check-prefix=FCGL %s + +// RUN: %dxc -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -T vs_6_9 -fcgl %s | FileCheck -check-prefix=FCGL %s + + +RaytracingAccelerationStructure RTAS; +[shader("vertex")] +void main(RayDesc rayDesc : RAYDESC) { + + // CHECK: call i32 @dx.op.allocateRayQuery2(i32 258, i32 1024, i32 1) + // FCGL: call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1024, i32 1) + RayQuery rayQuery1; + + rayQuery1.TraceRayInline(RTAS, RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc); + + // CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 1) + // FCGL: call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1, i32 0) + RayQuery rayQuery2; + rayQuery2.TraceRayInline(RTAS, 0, 2, rayDesc); +} diff --git a/tools/clang/test/DXC/Passes/DxilGen/LowerAllocateRayQuery2.ll b/tools/clang/test/DXC/Passes/DxilGen/LowerAllocateRayQuery2.ll new file mode 100644 index 0000000000..ab86452b17 --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/LowerAllocateRayQuery2.ll @@ -0,0 +1,118 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; generated the IR with: +; ExtractIRForPassTest.py -p dxilgen -o LowerAllocateRayQuery2.ll tools\clang\test\CodeGenDXIL\hlsl\objects\RayQuery\allocateRayQuery2.hlsl -- -T vs_6_9 +; Importantly, extraction took place with spirv code-gen enabled + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%"class.RayQuery<1024, 1>" = type { i32 } +%"class.RayQuery<1, 0>" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +; Function Attrs: nounwind +define void @main(<3 x float>, float, <3 x float>, float) #0 { +entry: + ; CHECK: call i32 @dx.op.allocateRayQuery2(i32 258, i32 1024, i32 1) + %rayQuery12 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1024, i32 1), !dbg !42 ; line:15 col:79 + %4 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !46 ; line:17 col:3 + %5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %4), !dbg !46 ; line:17 col:3 + %6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !46 ; line:17 col:3 + call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery12, %dx.types.Handle %6, i32 1024, i32 2, <3 x float> %0, float %1, <3 x float> %2, float %3), !dbg !46 ; line:17 col:3 + + ; CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 1) + %rayQuery23 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1, i32 0), !dbg !47 ; line:21 col:35 + %7 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !48 ; line:22 col:3 + %8 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %7), !dbg !48 ; line:22 col:3 + %9 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %8, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !48 ; line:22 col:3 + call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery23, %dx.types.Handle %9, i32 0, i32 2, <3 x float> %0, float %1, <3 x float> %2, float %3), !dbg !48 ; line:22 col:3 + ret void, !dbg !49 ; line:23 col:1 +} + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !21} +!dx.entryPoints = !{!34} +!dx.fnprops = !{!39} +!dx.options = !{!40, !41} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4853 (lowerOMM, ca5df957eb33-dirty)"} +!3 = !{i32 1, i32 9} +!4 = !{!"vs", i32 6, i32 9} +!5 = !{i32 0, %struct.RayDesc undef, !6, %"class.RayQuery<1024, 1>" undef, !11, %"class.RayQuery<1, 0>" undef, !17} +!6 = !{i32 32, !7, !8, !9, !10} +!7 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!8 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!9 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} +!10 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!11 = !{i32 4, !12, !13} +!12 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!13 = !{i32 0, !14} +!14 = !{!15, !16} +!15 = !{i32 1, i64 1024} +!16 = !{i32 1, i64 1} +!17 = !{i32 4, !12, !18} +!18 = !{i32 0, !19} +!19 = !{!16, !20} +!20 = !{i32 1, i64 0} +!21 = !{i32 1, void (<3 x float>, float, <3 x float>, float)* @main, !22} +!22 = !{!23, !25, !28, !30, !32} +!23 = !{i32 0, !24, !24} +!24 = !{} +!25 = !{i32 0, !26, !27} +!26 = !{i32 4, !"RAYDESC", i32 7, i32 9} +!27 = !{i32 0} +!28 = !{i32 0, !26, !29} +!29 = !{i32 1} +!30 = !{i32 0, !26, !31} +!31 = !{i32 2} +!32 = !{i32 0, !26, !33} +!33 = !{i32 3} +!34 = !{void (<3 x float>, float, <3 x float>, float)* @main, !"main", null, !35, null} +!35 = !{!36, null, null, null} +!36 = !{!37} +!37 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !38} +!38 = !{i32 0, i32 4} +!39 = !{void (<3 x float>, float, <3 x float>, float)* @main, i32 1} +!40 = !{i32 -2147483584} +!41 = !{i32 -1} +!42 = !DILocation(line: 15, column: 79, scope: !43) +!43 = !DISubprogram(name: "main", scope: !44, file: !44, line: 11, type: !45, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, function: void (<3 x float>, float, <3 x float>, float)* @main) +!44 = !DIFile(filename: "tools\5Cclang\5Ctest\5CCodeGenDXIL\5Chlsl\5Cobjects\5CRayQuery\5CallocateRayQuery2.hlsl", directory: "") +!45 = !DISubroutineType(types: !24) +!46 = !DILocation(line: 17, column: 3, scope: !43) +!47 = !DILocation(line: 21, column: 35, scope: !43) +!48 = !DILocation(line: 22, column: 3, scope: !43) +!49 = !DILocation(line: 23, column: 1, scope: !43) diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 7f7637b230..51ea6b3176 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -361,8 +361,8 @@ void [[]] DispatchMesh(in uint threadGroupCountX, in uint threadGroupCountY, in // Return true if the current lane is a helper lane bool [[ro]] IsHelperLane(); -// HL Op for allocating ray query object that default constructor uses -uint [[hidden]] AllocateRayQuery(in uint flags); +// HL Op for allocating ray query object +uint [[hidden]] AllocateRayQuery(in uint flags, in uint rayqueryflags); resource [[hidden]] CreateResourceFromHeap(in uint index); From 8b3fae2f23b946eb47429b3ee432885c2b63301b Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 20 Mar 2025 16:28:41 -0700 Subject: [PATCH 41/88] Use Wide String variants explicitly for Windows API calls (#7235) This PR changes some code in ExecutionTests.cpp to use the wide string variants of Windows API calls explicitly. This is because some internal builds will get confused about which overload to resolve the GetModuleHandle function to. By being explicit, this should eliminate the error that an arg can't be converted to LPCWSTR. --- tools/clang/unittests/HLSLExec/ExecutionTest.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp index 7066247883..91b42f6b79 100644 --- a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp +++ b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp @@ -820,10 +820,10 @@ class ExecutionTest { return false; } - if (GetModuleHandle("d3d10warp.dll") != NULL) { - CHAR szFullModuleFilePath[MAX_PATH] = ""; - GetModuleFileName(GetModuleHandle("d3d10warp.dll"), - szFullModuleFilePath, sizeof(szFullModuleFilePath)); + if (GetModuleHandleW(L"d3d10warp.dll") != NULL) { + WCHAR szFullModuleFilePath[MAX_PATH] = L""; + GetModuleFileNameW(GetModuleHandleW(L"d3d10warp.dll"), + szFullModuleFilePath, sizeof(szFullModuleFilePath)); WEX::Logging::Log::Comment(WEX::Common::String().Format( L"WARP driver loaded from: %S", szFullModuleFilePath)); } From 60e6c76fbad97dd0137385289498dc76ffe7b611 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 20 Mar 2025 19:10:13 -0700 Subject: [PATCH 42/88] Add constraint to test that requires spirv support (#7241) This PR adds a // REQUIRES: spirv line to the top of a test that uses spirv. This prevents failures in dev environments that don't have spirv enabled. --- .../test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl b/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl index 4fcce749d7..ece7e3f2f4 100644 --- a/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl +++ b/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl @@ -1,3 +1,4 @@ +// REQUIRES: spirv // RUN: %dxc -T ps_6_0 -E main -verify -spirv %s struct S From b646ad39c722a43b39d2df4a80d5f118d85a8685 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Fri, 21 Mar 2025 13:27:18 -0400 Subject: [PATCH 43/88] [SPIRV] Update submodules and fix test (#7243) Updates the submodules. One test is updated because spirv-opt does not common the load of a sampler anymore to avoid using a value from a different basic block. --- external/SPIRV-Tools | 2 +- .../vk.binding.global-struct-of-resource.and.array.hlsl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools index f289d047f4..ada1771a9f 160000 --- a/external/SPIRV-Tools +++ b/external/SPIRV-Tools @@ -1 +1 @@ -Subproject commit f289d047f49fb60488301ec62bafab85573668cc +Subproject commit ada1771a9f7a125573aa94fe551fdc44b45769bd diff --git a/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl b/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl index 9d226eb962..526bfc002c 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl @@ -27,6 +27,7 @@ float4 main() : SV_Target // CHECK: [[x:%[0-9]+]] = OpSampledImage %type_sampled_image [[tex]] [[smp]] return Textures[0].Sample(TheStruct.Sampler, float2(0, 0)) // CHECK: [[tex:%[0-9]+]] = OpLoad %type_2d_image %TheStruct_Texture +// CHECK: [[smp:%[0-9]+]] = OpLoad %type_sampler %TheStruct_Sampler // CHECK: [[x:%[0-9]+]] = OpSampledImage %type_sampled_image [[tex]] [[smp]] + TheStruct.Texture.Sample(TheStruct.Sampler, float2(0, 0)); } From 94596e1c97e10ef2f97cf21d33cbabdc0e7df2e8 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Mon, 24 Mar 2025 15:52:46 -0400 Subject: [PATCH 44/88] [SPIRV] Allow sampled type to be half for universal (#7252) We have a check that the sample type for an image cannot be a 16-bit float. This is true for Vulkan, but not true for general spir-v. We modify this check to only apply when the target env is vulkan. Wew also move the check to spirvemitter where the error handling is better. In its current location, the compiler continue to run with an unexpected nullptr. Fixes #6987 Fixes #6989 --------- Co-authored-by: Cassandra Beckley --- .../include/clang/SPIRV/FeatureManager.h | 3 +++ tools/clang/lib/SPIRV/FeatureManager.cpp | 18 +++++++++++++++++ tools/clang/lib/SPIRV/LowerTypeVisitor.cpp | 20 ------------------- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 13 ++++++++++++ .../test/CodeGenSPIRV/type.buffer.half.hlsl | 12 +++++++++-- .../test/CodeGenSPIRV/type.buffer.half4.hlsl | 14 +++++++++++++ 6 files changed, 58 insertions(+), 22 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/type.buffer.half4.hlsl diff --git a/tools/clang/include/clang/SPIRV/FeatureManager.h b/tools/clang/include/clang/SPIRV/FeatureManager.h index 32ee187091..841708d8d5 100644 --- a/tools/clang/include/clang/SPIRV/FeatureManager.h +++ b/tools/clang/include/clang/SPIRV/FeatureManager.h @@ -132,6 +132,9 @@ class FeatureManager { /// Returns false otherwise. bool isTargetEnvVulkan1p3OrAbove(); + /// Return true if the target environment is a Vulkan environment. + bool isTargetEnvVulkan(); + /// Returns the spv_target_env matching the input string if possible. /// This functions matches the spv_target_env with the command-line version /// of the name ('vulkan1.1', not 'Vulkan 1.1'). diff --git a/tools/clang/lib/SPIRV/FeatureManager.cpp b/tools/clang/lib/SPIRV/FeatureManager.cpp index 2512984a4c..c459f7af0f 100644 --- a/tools/clang/lib/SPIRV/FeatureManager.cpp +++ b/tools/clang/lib/SPIRV/FeatureManager.cpp @@ -405,5 +405,23 @@ bool FeatureManager::isTargetEnvVulkan1p3OrAbove() { return targetEnv >= SPV_ENV_VULKAN_1_3; } +bool FeatureManager::isTargetEnvVulkan() { + // This assert ensure that this list will be updated, if necessary, when + // a new target environment is added. + static_assert(SPV_ENV_VULKAN_1_4 + 1 == SPV_ENV_MAX); + + switch (targetEnv) { + case SPV_ENV_VULKAN_1_0: + case SPV_ENV_VULKAN_1_1: + case SPV_ENV_VULKAN_1_2: + case SPV_ENV_VULKAN_1_1_SPIRV_1_4: + case SPV_ENV_VULKAN_1_3: + case SPV_ENV_VULKAN_1_4: + return true; + default: + return false; + } +} + } // end namespace spirv } // end namespace clang diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp index 24cce9d89e..a5bc4a4aa8 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp @@ -834,26 +834,6 @@ LowerTypeVisitor::lowerResourceType(QualType type, SpirvLayoutRule rule, // TODO: avoid string comparison once hlsl::IsHLSLResouceType() does that. - // Vulkan does not yet support true 16-bit float texture objexts. - if (name == "Buffer" || name == "RWBuffer" || name == "Texture1D" || - name == "Texture2D" || name == "Texture3D" || name == "TextureCube" || - name == "Texture1DArray" || name == "Texture2DArray" || - name == "Texture2DMS" || name == "Texture2DMSArray" || - name == "TextureCubeArray" || name == "RWTexture1D" || - name == "RWTexture2D" || name == "RWTexture3D" || - name == "RWTexture1DArray" || name == "RWTexture2DArray") { - const auto sampledType = hlsl::GetHLSLResourceResultType(type); - const auto loweredType = - lowerType(getElementType(astContext, sampledType), rule, - /*isRowMajor*/ llvm::None, srcLoc); - if (const auto *floatType = dyn_cast(loweredType)) { - if (floatType->getBitwidth() == 16) { - emitError("16-bit texture types not yet supported with -spirv", srcLoc); - return nullptr; - } - } - } - { // Texture types spv::Dim dim = {}; bool isArray = {}; diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 557768f59a..e1124999ec 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -1880,6 +1880,19 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) { } } + if (featureManager.isTargetEnvVulkan() && + (isTexture(decl->getType()) || isRWTexture(decl->getType()) || + isBuffer(decl->getType()) || isRWBuffer(decl->getType()))) { + const auto sampledType = hlsl::GetHLSLResourceResultType(decl->getType()); + if (isFloatOrVecMatOfFloatType(sampledType) && + isOrContains16BitType(sampledType, spirvOptions.enable16BitTypes)) { + emitError("The sampled type for textures cannot be a floating point type " + "smaller than 32-bits when targeting a Vulkan environment.", + loc); + return; + } + } + if (decl->hasAttr()) { // This is a VarDecl for specialization constant. createSpecConstant(decl); diff --git a/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl b/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl index e5954abae5..99d365b5e2 100644 --- a/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl @@ -1,6 +1,14 @@ -// RUN: not %dxc -T ps_6_6 -E main -fcgl %s -spirv -enable-16bit-types 2>&1 | FileCheck %s +// RUN: not %dxc -T ps_6_6 -E main -fcgl %s -spirv -enable-16bit-types 2>&1 | FileCheck %s --check-prefix=VK +// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv -fspv-target-env=universal1.5 -enable-16bit-types 2>&1 | FileCheck %s --check-prefix=UNIVERSAL -// CHECK: error: 16-bit texture types not yet supported with -spirv +// When targeting Vulkan, A 16-bit floating pointer buffer is not valid. +// VK: error: The sampled type for textures cannot be a floating point type smaller than 32-bits when targeting a Vulkan environment. + +// When not targeting Vulkan, we should generate the 16-bit floating pointer buffer. +// UNIVERSAL: %half = OpTypeFloat 16 +// UNIVERSAL: %type_buffer_image = OpTypeImage %half Buffer 2 0 0 1 Unknown +// UNIVERSAL: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image +// UNIVERSAL: %MyBuffer = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant Buffer MyBuffer; void main(): SV_Target { } diff --git a/tools/clang/test/CodeGenSPIRV/type.buffer.half4.hlsl b/tools/clang/test/CodeGenSPIRV/type.buffer.half4.hlsl new file mode 100644 index 0000000000..f29af69c1c --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/type.buffer.half4.hlsl @@ -0,0 +1,14 @@ +// RUN: not %dxc -T ps_6_6 -E main -fcgl %s -spirv -enable-16bit-types 2>&1 | FileCheck %s --check-prefix=VK +// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv -fspv-target-env=universal1.5 -enable-16bit-types 2>&1 | FileCheck %s --check-prefix=UNIVERSAL + +// When targeting Vulkan, A 16-bit floating pointer buffer is not valid. +// VK: error: The sampled type for textures cannot be a floating point type smaller than 32-bits when targeting a Vulkan environment. + +// When not targeting Vulkan, we should generate the 16-bit floating pointer buffer. +// UNIVERSAL: %half = OpTypeFloat 16 +// UNIVERSAL: %type_buffer_image = OpTypeImage %half Buffer 2 0 0 1 Unknown +// UNIVERSAL: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image +// UNIVERSAL: %MyBuffer = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +Buffer MyBuffer; + +void main(): SV_Target { } From 9a06f4d27acdce04b0fcd1c9ffef46eb43b667b8 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Mon, 24 Mar 2025 15:54:50 -0700 Subject: [PATCH 45/88] Consolidate buffer store translation (#7251) Consolidate buffer store translation Added structured and types buffer support to TranslateStore and used it for all such lowerings. Includes IR and fcgl tests for the same in addition to recently added load/store tests that exercise this same code. --- lib/HLSL/HLOperationLower.cpp | 175 +-- .../hlsl/intrinsics/buffer-store.hlsl | 192 +++ .../hlsl/intrinsics/buffer-store.ll | 822 +++++++++++++ .../hlsl/intrinsics/buffer-typed-store.hlsl | 404 ++++++ .../hlsl/intrinsics/buffer-typed-store.ll | 1079 +++++++++++++++++ 5 files changed, 2560 insertions(+), 112 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.ll create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.ll diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 96ebda43ac..5a0dadf7f4 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -4335,18 +4335,15 @@ void Split64bitValForStore(Type *EltTy, ArrayRef vals, unsigned size, } void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, - Value *offset, IRBuilder<> &Builder, hlsl::OP *OP, - Value *sampIdx = nullptr) { + Value *Idx, Value *offset, IRBuilder<> &Builder, + hlsl::OP *OP, Value *sampIdx = nullptr) { Type *Ty = val->getType(); - - // This function is no longer used for lowering stores to a - // structured buffer. - DXASSERT_NOMSG(RK != DxilResource::Kind::StructuredBuffer); - OP::OpCode opcode = OP::OpCode::NumOpCodes; + bool IsTyped = true; switch (RK) { case DxilResource::Kind::RawBuffer: case DxilResource::Kind::StructuredBuffer: + IsTyped = false; opcode = OP::OpCode::RawBufferStore; break; case DxilResource::Kind::TypedBuffer: @@ -4364,10 +4361,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, break; } - bool isTyped = opcode == OP::OpCode::TextureStore || - opcode == OP::OpCode::TextureStoreSample || - RK == DxilResource::Kind::TypedBuffer; - Type *i32Ty = Builder.getInt32Ty(); Type *i64Ty = Builder.getInt64Ty(); Type *doubleTy = Builder.getDoubleTy(); @@ -4390,7 +4383,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, alignValue = 4; Constant *Alignment = OP->GetI32Const(alignValue); bool is64 = EltTy == i64Ty || EltTy == doubleTy; - if (is64 && isTyped) { + if (is64 && IsTyped) { EltTy = i32Ty; } @@ -4406,38 +4399,42 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, storeArgs.emplace_back(opArg); // opcode storeArgs.emplace_back(handle); // resource handle - unsigned offset0Idx = 0; - if (RK == DxilResource::Kind::RawBuffer || - RK == DxilResource::Kind::TypedBuffer) { - // Offset 0 - if (offset->getType()->isVectorTy()) { - Value *scalarOffset = Builder.CreateExtractElement(offset, (uint64_t)0); - storeArgs.emplace_back(scalarOffset); // offset + unsigned OffsetIdx = 0; + if (opcode == OP::OpCode::RawBufferStore || + opcode == OP::OpCode::BufferStore) { + // Append Coord0 (Index) value. + if (Idx->getType()->isVectorTy()) { + Value *ScalarIdx = Builder.CreateExtractElement(Idx, (uint64_t)0); + storeArgs.emplace_back(ScalarIdx); // Coord0 (Index). } else { - storeArgs.emplace_back(offset); // offset + storeArgs.emplace_back(Idx); // Coord0 (Index). } - // Store offset0 for later use - offset0Idx = storeArgs.size() - 1; + // Store OffsetIdx representing the argument that may need to be incremented + // later to load additional chunks of data. + // Only structured buffers can use the offset parameter. + // Others must increment the index. + if (RK == DxilResource::Kind::StructuredBuffer) + OffsetIdx = storeArgs.size(); + else + OffsetIdx = storeArgs.size() - 1; - // Offset 1 - storeArgs.emplace_back(undefI); + // Coord1 (Offset). + // Only relevant when storing more than 4 elements to structured buffers. + storeArgs.emplace_back(offset); } else { // texture store unsigned coordSize = DxilResource::GetNumCoords(RK); // Set x first. - if (offset->getType()->isVectorTy()) - storeArgs.emplace_back(Builder.CreateExtractElement(offset, (uint64_t)0)); + if (Idx->getType()->isVectorTy()) + storeArgs.emplace_back(Builder.CreateExtractElement(Idx, (uint64_t)0)); else - storeArgs.emplace_back(offset); - - // Store offset0 for later use - offset0Idx = storeArgs.size() - 1; + storeArgs.emplace_back(Idx); for (unsigned i = 1; i < 3; i++) { if (i < coordSize) - storeArgs.emplace_back(Builder.CreateExtractElement(offset, i)); + storeArgs.emplace_back(Builder.CreateExtractElement(Idx, i)); else storeArgs.emplace_back(undefI); } @@ -4464,30 +4461,24 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, } for (unsigned j = 0; j < storeArgsList.size(); j++) { - - // For second and subsequent store calls, increment the offset0 (i.e. store - // index) + // For second and subsequent store calls, increment the resource-appropriate + // index or offset parameter. if (j > 0) { - // Greater than four-components store is not allowed for - // TypedBuffer and Textures. So greater than four elements - // scenario should only get hit here for RawBuffer. - DXASSERT_NOMSG(RK == DxilResource::Kind::RawBuffer); unsigned EltSize = OP->GetAllocSizeForType(EltTy); - unsigned newOffset = EltSize * MaxStoreElemCount * j; - Value *newOffsetVal = ConstantInt::get(Builder.getInt32Ty(), newOffset); - newOffsetVal = - Builder.CreateAdd(storeArgsList[0][offset0Idx], newOffsetVal); - storeArgsList[j][offset0Idx] = newOffsetVal; + unsigned NewCoord = EltSize * MaxStoreElemCount * j; + Value *NewCoordVal = ConstantInt::get(Builder.getInt32Ty(), NewCoord); + NewCoordVal = Builder.CreateAdd(storeArgsList[0][OffsetIdx], NewCoordVal); + storeArgsList[j][OffsetIdx] = NewCoordVal; } - // values + // Set value parameters. uint8_t mask = 0; if (Ty->isVectorTy()) { unsigned vecSize = std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) - (j * MaxStoreElemCount); Value *emptyVal = undefVal; - if (isTyped) { + if (IsTyped) { mask = DXIL::kCompMask_All; emptyVal = Builder.CreateExtractElement(val, (uint64_t)0); } @@ -4503,7 +4494,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, } } else { - if (isTyped) { + if (IsTyped) { mask = DXIL::kCompMask_All; storeArgsList[j].emplace_back(val); storeArgsList[j].emplace_back(val); @@ -4518,7 +4509,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, } } - if (is64 && isTyped) { + if (is64 && IsTyped) { unsigned size = 1; if (Ty->isVectorTy()) { size = @@ -4576,7 +4567,8 @@ Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx); Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx); - TranslateStore(RK, handle, val, offset, Builder, hlslOP); + Value *UndefI = UndefValue::get(Builder.getInt32Ty()); + TranslateStore(RK, handle, val, offset, UndefI, Builder, hlslOP); return nullptr; } @@ -7907,40 +7899,11 @@ Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder, void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle, hlsl::OP *OP, Value *bufIdx, Value *baseOffset, Value *val, const DataLayout &DL) { - HLMatrixType MatTy = HLMatrixType::cast(matType); - Type *EltTy = MatTy.getElementTypeForMem(); - - val = MatTy.emitLoweredRegToMem(val, Builder); - - unsigned EltSize = DL.getTypeAllocSize(EltTy); - Constant *Alignment = OP->GetI32Const(EltSize); - Value *offset = baseOffset; - if (baseOffset == nullptr) - offset = OP->GetU32Const(0); - - unsigned matSize = MatTy.getNumElements(); - Value *undefElt = UndefValue::get(EltTy); - - unsigned storeSize = matSize; - if (matSize % 4) { - storeSize = matSize + 4 - (matSize & 3); - } - std::vector elts(storeSize, undefElt); - for (unsigned i = 0; i < matSize; i++) - elts[i] = Builder.CreateExtractElement(val, i); - - for (unsigned i = 0; i < matSize; i += 4) { - uint8_t mask = 0; - for (unsigned j = 0; j < 4 && (i + j) < matSize; j++) { - if (elts[i + j] != undefElt) - mask |= (1 << j); - } - GenerateStructBufSt(handle, bufIdx, offset, EltTy, OP, Builder, - {elts[i], elts[i + 1], elts[i + 2], elts[i + 3]}, mask, - Alignment); - // Update offset by 4*4bytes. - offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize)); - } + [[maybe_unused]] HLMatrixType MatTy = HLMatrixType::cast(matType); + DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) == val->getType(), + "helper type should match vectorized matrix"); + TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx, + baseOffset, Builder, OP); } void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, HLResource::Kind RK, @@ -8085,6 +8048,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, GEP->eraseFromParent(); } else if (StoreInst *stUser = dyn_cast(subsUser)) { + // Store elements of matrix in a struct. Needs to be done one scalar at a + // time even for vectors in the case that matrix orientation spreads the + // indexed scalars throughout the matrix vector. IRBuilder<> stBuilder(stUser); Value *Val = stUser->getValueOperand(); if (Val->getType()->isVectorTy()) { @@ -8108,6 +8074,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, LoadInst *ldUser = cast(subsUser); IRBuilder<> ldBuilder(ldUser); Value *ldData = UndefValue::get(resultType); + // Load elements of matrix in a struct. Needs to be done one scalar at a + // time even for vectors in the case that matrix orientation spreads the + // indexed scalars throughout the matrix vector. if (resultType->isVectorTy()) { for (unsigned i = 0; i < resultSize; i++) { Value *ResultElt; @@ -8248,30 +8217,9 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle, LdInst->eraseFromParent(); } else if (StoreInst *StInst = dyn_cast(user)) { // Store of scalar/vector within a struct or structured raw store. - Type *Ty = StInst->getValueOperand()->getType(); - Type *pOverloadTy = Ty->getScalarType(); - Value *offset = baseOffset; - Value *val = StInst->getValueOperand(); - Value *undefVal = llvm::UndefValue::get(pOverloadTy); - Value *vals[] = {undefVal, undefVal, undefVal, undefVal}; - uint8_t mask = 0; - if (Ty->isVectorTy()) { - unsigned vectorNumElements = Ty->getVectorNumElements(); - DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector"); - assert(vectorNumElements <= 4); - for (unsigned i = 0; i < vectorNumElements; i++) { - vals[i] = Builder.CreateExtractElement(val, i); - mask |= (1 << i); - } - } else { - vals[0] = val; - mask = DXIL::kCompMask_X; - } - Constant *alignment = - OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType())); - GenerateStructBufSt(handle, bufIdx, offset, pOverloadTy, OP, Builder, vals, - mask, alignment); + TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx, + baseOffset, Builder, OP); StInst->eraseFromParent(); } else if (BitCastInst *BCI = dyn_cast(user)) { // Recurse users @@ -8418,14 +8366,15 @@ void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper, User *user = *(It++); Instruction *I = cast(user); IRBuilder<> Builder(I); + Value *UndefI = UndefValue::get(Builder.getInt32Ty()); if (LoadInst *ldInst = dyn_cast(user)) { TranslateTypedBufSubscript(CI, RK, RC, handle, ldInst, Builder, hlslOP, helper.dataLayout); } else if (StoreInst *stInst = dyn_cast(user)) { Value *val = stInst->getValueOperand(); TranslateStore(RK, handle, val, - CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx), - Builder, hlslOP); + CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), + UndefI, Builder, hlslOP); // delete the st stInst->eraseFromParent(); } else if (GetElementPtrInst *GEP = dyn_cast(user)) { @@ -8450,9 +8399,10 @@ void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper, // Generate St. // Reset insert point, UpdateVectorElt may move SI to different block. StBuilder.SetInsertPoint(SI); - TranslateStore(RK, handle, ldVal, - CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx), - StBuilder, hlslOP); + TranslateStore( + RK, handle, ldVal, + CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), UndefI, + StBuilder, hlslOP); SI->eraseFromParent(); continue; } @@ -8642,9 +8592,10 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, } else { StoreInst *stInst = cast(*U); Value *val = stInst->getValueOperand(); + Value *UndefI = UndefValue::get(Builder.getInt32Ty()); TranslateStore(RK, handle, val, - CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx), - Builder, hlslOP, mipLevel); + CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), + UndefI, Builder, hlslOP, mipLevel); stInst->eraseFromParent(); } Translated = true; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.hlsl new file mode 100644 index 0000000000..fa070ceca5 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.hlsl @@ -0,0 +1,192 @@ +// RUN: %dxc -fcgl -T vs_6_6 %s | FileCheck %s + +// Source file for DxilGen IR test for buffer store lowering + +template +struct Vector { + float4 pad1; + double pad2; + vector v; + Vector operator+(Vector vec) { + Vector ret; + ret.pad1 = 0.0; + ret.pad2 = 0.0; + ret.v = v + vec.v; + return ret; + } +}; + +template +struct Matrix { + float4 pad1; + matrix m; + Matrix operator+(Matrix mat) { + Matrix ret; + ret.m = m + mat.m; + return ret; + } +}; + +RWByteAddressBuffer BabBuf : register(u1); +RWStructuredBuffer< float2 > VecBuf : register(u2); +RWStructuredBuffer< float[2] > ArrBuf : register(u3); +RWStructuredBuffer< Vector > SVecBuf : register(u4); +RWStructuredBuffer< float2x2 > MatBuf : register(u5); +RWStructuredBuffer< Matrix > SMatBuf : register(u6); + +ConsumeStructuredBuffer< float2 > CVecBuf : register(u7); +ConsumeStructuredBuffer< float[2] > CArrBuf : register(u8); +ConsumeStructuredBuffer< Vector > CSVecBuf : register(u9); +ConsumeStructuredBuffer< float2x2 > CMatBuf : register(u10); +ConsumeStructuredBuffer< Matrix > CSMatBuf : register(u11); + +AppendStructuredBuffer< float2 > AVecBuf : register(u12); +AppendStructuredBuffer< float[2] > AArrBuf : register(u13); +AppendStructuredBuffer< Vector > ASVecBuf : register(u14); +AppendStructuredBuffer< float2x2 > AMatBuf : register(u15); +AppendStructuredBuffer< Matrix > ASMatBuf : register(u16); + +void main(uint ix0 : IX0) { + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 0 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x i1>)"(i32 277, %dx.types.Handle [[anhdl]], i32 [[ix]], <2 x i1> + BabBuf.Store(ix0 + 1, BabBuf.Load< bool2 >(ix0 + 0)); + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, [2 x float]*)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]], [2 x float] + BabBuf.Store(ix0 + 2, BabBuf.Load< float[2] >(ix0 + 1)); + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, %\22struct.Vector\22*)"(i32 277, %dx.types.Handle [[anhdl]], i32 [[ix]], %"struct.Vector" + BabBuf.Store >(ix0 + 3, BabBuf.Load< Vector >(ix0 + 2)); + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call %class.matrix.float.2.2 @"dx.hl.op.ro.%class.matrix.float.2.2 (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, %class.matrix.float.2.2)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]], %class.matrix.float.2.2 + BabBuf.Store(ix0 + 4, BabBuf.Load< float2x2 >(ix0 + 3)); + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: [[MSS:%.*]] = call %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 5 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, %\22struct.Matrix\22*)"(i32 277, %dx.types.Handle [[anhdl]], i32 [[ix]], %"struct.Matrix" + BabBuf.Store >(ix0 + 5, BabBuf.Load< Matrix >(ix0 + 4)); + + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 0 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]] + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + VecBuf[ix0 + 1] = VecBuf[ix0 + 0]; + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer" undef) + // CHECK: call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer" undef) + // CHECK: call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + ArrBuf[ix0 + 2] = ArrBuf[ix0 + 1]; + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + SVecBuf[ix0 + 3] = SVecBuf[ix0 + 2]; + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer >" undef) + // CHECK: [[SS:%.*]] = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + MatBuf[ix0 + 4] = MatBuf[ix0 + 3]; + + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 5 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer >" undef) + // CHECK: [[MSS:%.*]] = call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer >" undef) + // CHECK: call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]) + SMatBuf[ix0 + 5] = SMatBuf[ix0 + 4]; + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer >" undef) + // CHECK: [[cn:%.*]] = call <2 x float> @"dx.hl.op..consume<2 x float> (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]]) + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer >" undef) + // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, <2 x float>)"(i32 226, %dx.types.Handle [[anhdl]], <2 x float> [[cn]]) + AVecBuf.Append(CVecBuf.Consume()); + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer\22)"(i32 0, %"class.ConsumeStructuredBuffer" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer" undef) + // CHECK: [[cn:%.*]] = call [2 x float]* @"dx.hl.op..consume[2 x float]* (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]]) + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer\22)"(i32 0, %"class.AppendStructuredBuffer" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer" undef) + // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, [2 x float]*)"(i32 226, %dx.types.Handle [[anhdl]], [2 x float]* + AArrBuf.Append(CArrBuf.Consume()); + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.ConsumeStructuredBuffer >" undef) + // CHECK: [[cn:%.*]] = call %"struct.Vector"* @"dx.hl.op..consume%\22struct.Vector\22* (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]]) + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.AppendStructuredBuffer >" undef) + // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, %\22struct.Vector\22*)"(i32 226, %dx.types.Handle [[anhdl]], %"struct.Vector"* + ASVecBuf.Append(CSVecBuf.Consume()); + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.ConsumeStructuredBuffer >" undef) + // CHECK: [[cn:%.*]] = call %class.matrix.float.2.2 @"dx.hl.op..consume%class.matrix.float.2.2 (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]]) + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.AppendStructuredBuffer >" undef) + // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, %class.matrix.float.2.2)"(i32 226, %dx.types.Handle [[anhdl]], %class.matrix.float.2.2 [[cn]]) + AMatBuf.Append(CMatBuf.Consume()); + + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.ConsumeStructuredBuffer >" undef) + // CHECK: [[cn:%.*]] = call %"struct.Matrix"* @"dx.hl.op..consume%\22struct.Matrix\22* (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]]) + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.AppendStructuredBuffer >" undef) + // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, %\22struct.Matrix\22*)"(i32 226, %dx.types.Handle [[anhdl]], %"struct.Matrix"* + ASMatBuf.Append(CSMatBuf.Consume()); + +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.ll new file mode 100644 index 0000000000..540ab85819 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.ll @@ -0,0 +1,822 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RWByteAddressBuffer = type { i32 } +%"class.RWStructuredBuffer >" = type { <2 x float> } +%"class.RWStructuredBuffer" = type { [2 x float] } +%"class.RWStructuredBuffer >" = type { %"struct.Vector" } +%"struct.Vector" = type { <4 x float>, double, <2 x float> } +%"class.RWStructuredBuffer >" = type { %class.matrix.float.2.2 } +%class.matrix.float.2.2 = type { [2 x <2 x float>] } +%"class.RWStructuredBuffer >" = type { %"struct.Matrix" } +%"struct.Matrix" = type { <4 x float>, %class.matrix.float.2.2 } +%"class.ConsumeStructuredBuffer >" = type { <2 x float> } +%"class.ConsumeStructuredBuffer" = type { [2 x float] } +%"class.ConsumeStructuredBuffer >" = type { %"struct.Vector" } +%"class.ConsumeStructuredBuffer >" = type { %class.matrix.float.2.2 } +%"class.ConsumeStructuredBuffer >" = type { %"struct.Matrix" } +%"class.AppendStructuredBuffer >" = type { <2 x float> } +%"class.AppendStructuredBuffer" = type { [2 x float] } +%"class.AppendStructuredBuffer >" = type { %"struct.Vector" } +%"class.AppendStructuredBuffer >" = type { %class.matrix.float.2.2 } +%"class.AppendStructuredBuffer >" = type { %"struct.Matrix" } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?BabBuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 +@"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A" = external global %"class.RWStructuredBuffer", align 4 +@"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.RWStructuredBuffer >", align 8 +@"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?CVecBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.ConsumeStructuredBuffer >", align 4 +@"\01?CArrBuf@@3V?$ConsumeStructuredBuffer@$$BY01M@@A" = external global %"class.ConsumeStructuredBuffer", align 4 +@"\01?CSVecBuf@@3V?$ConsumeStructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.ConsumeStructuredBuffer >", align 8 +@"\01?CMatBuf@@3V?$ConsumeStructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.ConsumeStructuredBuffer >", align 4 +@"\01?CSMatBuf@@3V?$ConsumeStructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.ConsumeStructuredBuffer >", align 4 +@"\01?AVecBuf@@3V?$AppendStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.AppendStructuredBuffer >", align 4 +@"\01?AArrBuf@@3V?$AppendStructuredBuffer@$$BY01M@@A" = external global %"class.AppendStructuredBuffer", align 4 +@"\01?ASVecBuf@@3V?$AppendStructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.AppendStructuredBuffer >", align 8 +@"\01?AMatBuf@@3V?$AppendStructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.AppendStructuredBuffer >", align 4 +@"\01?ASMatBuf@@3V?$AppendStructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.AppendStructuredBuffer >", align 4 + +; CHECK-LABEL: define void @main(i32 %ix0) +; Function Attrs: nounwind +define void @main(i32 %ix0) #0 { +bb: + ; CHECK: [[pix:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef) + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle [[anhdl]], i32 [[pix]], i32 undef, i8 3, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1 + ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer + + %tmp = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:60 col:32 + %tmp1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp) ; line:60 col:32 + %tmp2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:60 col:32 + %tmp3 = call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp2, i32 %ix0) ; line:60 col:32 + + ; CHECK: [[stix:%.*]] = add i32 [[pix]], 1 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[vec2:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32> + ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec2]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec2]], i64 1 + ; CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, i32 [[val0]], i32 [[val1]], i32 undef, i32 undef, i8 3, i32 4) + %tmp4 = add i32 %ix0, 1 ; line:60 col:27 + %tmp5 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:60 col:3 + %tmp6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp5) ; line:60 col:3 + %tmp7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:60 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x i1>)"(i32 277, %dx.types.Handle %tmp7, i32 %tmp4, <2 x i1> %tmp3) ; line:60 col:3 + + ; CHECK: [[ix:%.*]] = add i32 [[pix]], 1 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[lix:%.*]] = add i32 0, [[ix]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 1, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[lix:%.*]] = add i32 4, [[ix]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 1, i32 4) + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + + %tmp8 = add i32 %ix0, 1 ; line:70 col:63 + %tmp9 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:70 col:35 + %tmp10 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp9) ; line:70 col:35 + %tmp11 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp10, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:70 col:35 + %tmp12 = call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp11, i32 %tmp8) ; line:70 col:35 + %tmp13 = getelementptr inbounds [2 x float], [2 x float]* %tmp12, i32 0, i32 0 ; line:70 col:3 + %tmp14 = load float, float* %tmp13 ; line:70 col:3 + %tmp15 = getelementptr inbounds [2 x float], [2 x float]* %tmp12, i32 0, i32 1 ; line:70 col:3 + %tmp16 = load float, float* %tmp15 ; line:70 col:3 + + + ; CHECK: [[ix:%.*]] = add i32 [[pix]], 2 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float undef, float undef, float undef, i8 1, i32 4) + ; CHECK: [[stix:%.*]] = add i32 [[ix]], 4 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val1]], float undef, float undef, float undef, i8 1, i32 4) + + %tmp17 = add i32 %ix0, 2 ; line:70 col:30 + %tmp18 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:70 col:3 + %tmp19 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp18) ; line:70 col:3 + %tmp20 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp19, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:70 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %tmp20, i32 %tmp17, float %tmp14) ; line:70 col:3 + %tmp21 = add i32 %tmp17, 4 ; line:70 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %tmp20, i32 %tmp21, float %tmp16) ; line:70 col:3 + + ; CHECK: [[ix:%.*]] = add i32 [[pix]], 2 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[lix:%.*]] = add i32 0, [[ix]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[lix:%.*]] = add i32 16, [[ix]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 1, i32 4) + ; CHECK: [[dval:%.*]] = extractvalue %dx.types.ResRet.f64 [[ld]], 0 + ; CHECK: [[lix:%.*]] = add i32 24, [[ix]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 3, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1 + %tmp22 = add i32 %ix0, 2 ; line:80 col:78 + %tmp23 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:80 col:43 + %tmp24 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp23) ; line:80 col:43 + %tmp25 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp24, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:80 col:43 + %tmp26 = call %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp25, i32 %tmp22) ; line:80 col:43 + %tmp27 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp26, i32 0, i32 0 ; line:80 col:3 + %tmp28 = load <4 x float>, <4 x float>* %tmp27 ; line:80 col:3 + %tmp29 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp26, i32 0, i32 1 ; line:80 col:3 + %tmp30 = load double, double* %tmp29 ; line:80 col:3 + %tmp31 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp26, i32 0, i32 2 ; line:80 col:3 + %tmp32 = load <2 x float>, <2 x float>* %tmp31 ; line:80 col:3 + + ; CHECK: [[ix:%.*]] = add i32 [[pix]], 3 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + ; CHECK: [[stix:%.*]] = add i32 [[ix]], 16 + ; CHECK: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, double [[dval]] + ; CHECK: [[stix:%.*]] = add i32 [[ix]], 24 + ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4) + %tmp33 = add i32 %ix0, 3 ; line:80 col:38 + %tmp34 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:80 col:3 + %tmp35 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp34) ; line:80 col:3 + %tmp36 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp35, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:80 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp36, i32 %tmp33, <4 x float> %tmp28) ; line:80 col:3 + %tmp37 = add i32 %tmp33, 16 ; line:80 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, double)"(i32 277, %dx.types.Handle %tmp36, i32 %tmp37, double %tmp30) ; line:80 col:3 + %tmp38 = add i32 %tmp33, 24 ; line:80 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32 277, %dx.types.Handle %tmp36, i32 %tmp38, <2 x float> %tmp32) ; line:80 col:3 + + + ; CHECK: [[lix:%.*]] = add i32 [[pix]], 3 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[rvec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + %tmp39 = add i32 %ix0, 3 ; line:90 col:63 + %tmp40 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:90 col:35 + %tmp41 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp40) ; line:90 col:35 + %tmp42 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp41, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:90 col:35 + %tmp43 = call <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp42, i32 %tmp39) ; line:90 col:35 + + ; CHECK: [[stix:%.*]] = add i32 [[pix]], 4 + ; CHECK: [[cvec4:%.*]] = shufflevector <4 x float> [[rvec4]], <4 x float> [[rvec4]], <4 x i32> + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[cvec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[cvec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[cvec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[cvec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + %tmp44 = add i32 %ix0, 4 ; line:90 col:30 + %row2col = shufflevector <4 x float> %tmp43, <4 x float> %tmp43, <4 x i32> ; line:90 col:3 + %tmp45 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:90 col:3 + %tmp46 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp45) ; line:90 col:3 + %tmp47 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp46, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:90 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp47, i32 %tmp44, <4 x float> %row2col) ; line:90 col:3 + + + ; CHECK: [[ix:%.*]] = add i32 [[pix]], 4 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[lix:%.*]] = add i32 0, [[ix]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[lix:%.*]] = add i32 16, [[ix]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[mat:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + %tmp48 = add i32 %ix0, 4 ; line:100 col:82 + %tmp49 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:100 col:45 + %tmp50 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp49) ; line:100 col:45 + %tmp51 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp50, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:100 col:45 + %tmp52 = call %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp51, i32 %tmp48) ; line:100 col:45 + %tmp53 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp52, i32 0, i32 0 ; line:100 col:3 + %tmp54 = load <4 x float>, <4 x float>* %tmp53 ; line:100 col:3 + %tmp55 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp52, i32 0, i32 1 ; line:100 col:3 + %tmp56 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp55) ; line:100 col:3 + + ; CHECK: [[ix:%.*]] = add i32 [[pix]], 5 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }) + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + ; CHECK: [[stix:%.*]] = add i32 [[ix]], 16 + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[mat]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[mat]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[mat]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[mat]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + %tmp57 = add i32 %ix0, 5 ; line:100 col:40 + %tmp58 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:100 col:3 + %tmp59 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp58) ; line:100 col:3 + %tmp60 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp59, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:100 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp60, i32 %tmp57, <4 x float> %tmp54) ; line:100 col:3 + %tmp61 = add i32 %tmp57, 16 ; line:100 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp60, i32 %tmp61, <4 x float> %tmp56) ; line:100 col:3 + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[pix]], i32 0, i8 3, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1 + %tmp62 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" ; line:111 col:21 + %tmp63 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp62) ; line:111 col:21 + %tmp64 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp63, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:111 col:21 + %tmp65 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp64, i32 %ix0) ; line:111 col:21 + %tmp66 = load <2 x float>, <2 x float>* %tmp65 ; line:111 col:21 + + ; CHECK: [[stix:%.*]] = add i32 [[pix]], 1 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4) + %tmp67 = add i32 %ix0, 1 ; line:111 col:14 + %tmp68 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" ; line:111 col:3 + %tmp69 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp68) ; line:111 col:3 + %tmp70 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp69, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:111 col:3 + %tmp71 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp70, i32 %tmp67) ; line:111 col:3 + store <2 x float> %tmp66, <2 x float>* %tmp71 ; line:111 col:19 + + + ; CHECK: [[stix:%.*]] = add i32 [[pix]], 2 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" + ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + ; CHECK: [[lix:%.*]] = add i32 [[pix]], 1 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" + ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 1, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float undef, float undef, float undef, i8 1, i32 4) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 4, i8 1, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 4, float [[val0]], float undef, float undef, float undef, i8 1, i32 4) + %tmp72 = add i32 %ix0, 2 ; line:121 col:14 + %tmp73 = load %"class.RWStructuredBuffer", %"class.RWStructuredBuffer"* @"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A" ; line:121 col:3 + %tmp74 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer\22)"(i32 0, %"class.RWStructuredBuffer" %tmp73) ; line:121 col:3 + %tmp75 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer\22)"(i32 14, %dx.types.Handle %tmp74, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer" zeroinitializer) ; line:121 col:3 + %tmp76 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp75, i32 %tmp72) ; line:121 col:3 + %tmp77 = add i32 %ix0, 1 ; line:121 col:32 + %tmp78 = load %"class.RWStructuredBuffer", %"class.RWStructuredBuffer"* @"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A" ; line:121 col:21 + %tmp79 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer\22)"(i32 0, %"class.RWStructuredBuffer" %tmp78) ; line:121 col:21 + %tmp80 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer\22)"(i32 14, %dx.types.Handle %tmp79, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer" zeroinitializer) ; line:121 col:21 + %tmp81 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp80, i32 %tmp77) ; line:121 col:21 + %tmp82 = getelementptr inbounds [2 x float], [2 x float]* %tmp76, i32 0, i32 0 ; line:121 col:21 + %tmp83 = getelementptr inbounds [2 x float], [2 x float]* %tmp81, i32 0, i32 0 ; line:121 col:21 + %tmp84 = load float, float* %tmp83 ; line:121 col:21 + store float %tmp84, float* %tmp82 ; line:121 col:21 + %tmp85 = getelementptr inbounds [2 x float], [2 x float]* %tmp76, i32 0, i32 1 ; line:121 col:21 + %tmp86 = getelementptr inbounds [2 x float], [2 x float]* %tmp81, i32 0, i32 1 ; line:121 col:21 + %tmp87 = load float, float* %tmp86 ; line:121 col:21 + store float %tmp87, float* %tmp85 ; line:121 col:21 + + + ; CHECK: [[stix:%.*]] = add i32 [[pix]], 3 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }) + ; CHECK: [[lix:%.*]] = add i32 [[pix]], 2 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 16, i8 1, i32 8) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f64 [[ld]], 0 + ; CHECK: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 16, double [[val0]], double undef, double undef, double undef, i8 1, i32 8) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 24, i8 3, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 24, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4) + %tmp88 = add i32 %ix0, 3 ; line:131 col:15 + %tmp89 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A" ; line:131 col:3 + %tmp90 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp89) ; line:131 col:3 + %tmp91 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp90, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:131 col:3 + %tmp92 = call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp91, i32 %tmp88) ; line:131 col:3 + %tmp93 = add i32 %ix0, 2 ; line:131 col:34 + %tmp94 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A" ; line:131 col:22 + %tmp95 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp94) ; line:131 col:22 + %tmp96 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp95, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:131 col:22 + %tmp97 = call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp96, i32 %tmp93) ; line:131 col:22 + %tmp98 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp92, i32 0, i32 0 ; line:131 col:22 + %tmp99 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp97, i32 0, i32 0 ; line:131 col:22 + %tmp100 = load <4 x float>, <4 x float>* %tmp99 ; line:131 col:22 + store <4 x float> %tmp100, <4 x float>* %tmp98 ; line:131 col:22 + %tmp101 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp92, i32 0, i32 1 ; line:131 col:22 + %tmp102 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp97, i32 0, i32 1 ; line:131 col:22 + %tmp103 = load double, double* %tmp102 ; line:131 col:22 + store double %tmp103, double* %tmp101 ; line:131 col:22 + %tmp104 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp92, i32 0, i32 2 ; line:131 col:22 + %tmp105 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp97, i32 0, i32 2 ; line:131 col:22 + %tmp106 = load <2 x float>, <2 x float>* %tmp105 ; line:131 col:22 + store <2 x float> %tmp106, <2 x float>* %tmp104 ; line:131 col:22 + + + ; CHECK: [[stix:%.*]] = add i32 [[pix]], 4 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }) + ; CHECK: [[lix:%.*]] = add i32 [[pix]], 3 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + %tmp107 = add i32 %ix0, 4 ; line:141 col:14 + %tmp108 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:141 col:3 + %tmp109 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp108) ; line:141 col:3 + %tmp110 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp109, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:141 col:3 + %tmp111 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp110, i32 %tmp107) ; line:141 col:3 + %tmp112 = add i32 %ix0, 3 ; line:141 col:32 + %tmp113 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:141 col:21 + %tmp114 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp113) ; line:141 col:21 + %tmp115 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp114, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:141 col:21 + %tmp116 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp115, i32 %tmp112) ; line:141 col:21 + %tmp117 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp116) ; line:141 col:21 + %tmp118 = call <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp111, <4 x float> %tmp117) ; line:141 col:19 + + + ; CHECK: [[stix:%.*]] = add i32 [[pix]], 5 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }) + ; CHECK: [[lix:%.*]] = add i32 [[pix]], 4 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" + ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 16, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 16, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + %tmp119 = add i32 %ix0, 5 ; line:151 col:15 + %tmp120 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:151 col:3 + %tmp121 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp120) ; line:151 col:3 + %tmp122 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp121, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:151 col:3 + %tmp123 = call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp122, i32 %tmp119) ; line:151 col:3 + %tmp124 = add i32 %ix0, 4 ; line:151 col:34 + %tmp125 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:151 col:22 + %tmp126 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp125) ; line:151 col:22 + %tmp127 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp126, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:151 col:22 + %tmp128 = call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp127, i32 %tmp124) ; line:151 col:22 + %tmp129 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp123, i32 0, i32 0 ; line:151 col:22 + %tmp130 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp128, i32 0, i32 0 ; line:151 col:22 + %tmp131 = load <4 x float>, <4 x float>* %tmp130 ; line:151 col:22 + store <4 x float> %tmp131, <4 x float>* %tmp129 ; line:151 col:22 + %tmp132 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp123, i32 0, i32 1 ; line:151 col:22 + %tmp133 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp128, i32 0, i32 1 ; line:151 col:22 + %tmp134 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp133) ; line:151 col:22 + %tmp135 = call <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp132, <4 x float> %tmp134) ; line:151 col:22 + + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer >"(i32 160, %"class.ConsumeStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 3, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1 + %tmp136 = load %"class.ConsumeStructuredBuffer >", %"class.ConsumeStructuredBuffer >"* @"\01?CVecBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$01@@@@A" ; line:159 col:18 + %tmp137 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" %tmp136) ; line:159 col:18 + %tmp138 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp137, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer >" zeroinitializer) ; line:159 col:18 + %tmp139 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp138) #0 ; line:159 col:18 + %tmp140 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp138, i32 %tmp139) #0 ; line:159 col:18 + %tmp141 = load <2 x float>, <2 x float>* %tmp140 ; line:159 col:18 + + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer >"(i32 160, %"class.AppendStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1) + ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4) + %tmp142 = load %"class.AppendStructuredBuffer >", %"class.AppendStructuredBuffer >"* @"\01?AVecBuf@@3V?$AppendStructuredBuffer@V?$vector@M$01@@@@A" ; line:159 col:3 + %tmp143 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" %tmp142) ; line:159 col:3 + %tmp144 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp143, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer >" zeroinitializer) ; line:159 col:3 + %tmp145 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp144) #0 ; line:159 col:3 + %tmp146 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp144, i32 %tmp145) #0 ; line:159 col:3 + store <2 x float> %tmp141, <2 x float>* %tmp146 ; line:159 col:3 + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer"(i32 160, %"class.ConsumeStructuredBuffer" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 1, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 4, i8 1, i32 4) + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + + %tmp147 = load %"class.ConsumeStructuredBuffer", %"class.ConsumeStructuredBuffer"* @"\01?CArrBuf@@3V?$ConsumeStructuredBuffer@$$BY01M@@A" ; line:167 col:18 + %tmp148 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer\22)"(i32 0, %"class.ConsumeStructuredBuffer" %tmp147) ; line:167 col:18 + %tmp149 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer\22)"(i32 14, %dx.types.Handle %tmp148, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer" zeroinitializer) ; line:167 col:18 + %tmp150 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp149) #0 ; line:167 col:18 + %tmp151 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp149, i32 %tmp150) #0 ; line:167 col:18 + %tmp152 = getelementptr inbounds [2 x float], [2 x float]* %tmp151, i32 0, i32 0 ; line:167 col:3 + %tmp153 = load float, float* %tmp152 ; line:167 col:3 + %tmp154 = getelementptr inbounds [2 x float], [2 x float]* %tmp151, i32 0, i32 1 ; line:167 col:3 + %tmp155 = load float, float* %tmp154 ; line:167 col:3 + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer"(i32 160, %"class.AppendStructuredBuffer" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1) + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float undef, float undef, float undef, i8 1, i32 4) + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 4, float [[val1]], float undef, float undef, float undef, i8 1, i32 4) + + %tmp156 = load %"class.AppendStructuredBuffer", %"class.AppendStructuredBuffer"* @"\01?AArrBuf@@3V?$AppendStructuredBuffer@$$BY01M@@A" ; line:167 col:3 + %tmp157 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer\22)"(i32 0, %"class.AppendStructuredBuffer" %tmp156) ; line:167 col:3 + %tmp158 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer\22)"(i32 14, %dx.types.Handle %tmp157, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer" zeroinitializer) ; line:167 col:3 + %tmp159 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp158) #0 ; line:167 col:3 + %tmp160 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp158, i32 %tmp159) #0 ; line:167 col:3 + %tmp161 = getelementptr inbounds [2 x float], [2 x float]* %tmp160, i32 0, i32 0 ; line:167 col:3 + store float %tmp153, float* %tmp161 ; line:167 col:3 + %tmp162 = getelementptr inbounds [2 x float], [2 x float]* %tmp160, i32 0, i32 1 ; line:167 col:3 + store float %tmp155, float* %tmp162 ; line:167 col:3 + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer >"(i32 160, %"class.ConsumeStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37644, i32 32 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, i8 1, i32 8) + ; CHECK: [[dval:%.*]] = extractvalue %dx.types.ResRet.f64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 24, i8 3, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1 + + %tmp163 = load %"class.ConsumeStructuredBuffer >", %"class.ConsumeStructuredBuffer >"* @"\01?CSVecBuf@@3V?$ConsumeStructuredBuffer@U?$Vector@M$01@@@@A" ; line:175 col:19 + %tmp164 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" %tmp163) ; line:175 col:19 + %tmp165 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp164, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.ConsumeStructuredBuffer >" zeroinitializer) ; line:175 col:19 + %tmp166 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp165) #0 ; line:175 col:19 + %tmp167 = call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp165, i32 %tmp166) #0 ; line:175 col:19 + %tmp168 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp167, i32 0, i32 0 ; line:175 col:3 + %tmp169 = load <4 x float>, <4 x float>* %tmp168 ; line:175 col:3 + %tmp170 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp167, i32 0, i32 1 ; line:175 col:3 + %tmp171 = load double, double* %tmp170 ; line:175 col:3 + %tmp172 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp167, i32 0, i32 2 ; line:175 col:3 + %tmp173 = load <2 x float>, <2 x float>* %tmp172 ; line:175 col:3 + + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer >"(i32 160, %"class.AppendStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37644, i32 32 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1) + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + ; CHECK: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, double [[dval]], double undef, double undef, double undef, i8 1, i32 8) + ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 24, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4) + %tmp174 = load %"class.AppendStructuredBuffer >", %"class.AppendStructuredBuffer >"* @"\01?ASVecBuf@@3V?$AppendStructuredBuffer@U?$Vector@M$01@@@@A" ; line:175 col:3 + %tmp175 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" %tmp174) ; line:175 col:3 + %tmp176 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp175, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.AppendStructuredBuffer >" zeroinitializer) ; line:175 col:3 + %tmp177 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp176) #0 ; line:175 col:3 + %tmp178 = call %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp176, i32 %tmp177) #0 ; line:175 col:3 + %tmp179 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp178, i32 0, i32 0 ; line:175 col:3 + store <4 x float> %tmp169, <4 x float>* %tmp179 ; line:175 col:3 + %tmp180 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp178, i32 0, i32 1 ; line:175 col:3 + store double %tmp171, double* %tmp180 ; line:175 col:3 + %tmp181 = getelementptr inbounds %"struct.Vector", %"struct.Vector"* %tmp178, i32 0, i32 2 ; line:175 col:3 + store <2 x float> %tmp173, <2 x float>* %tmp181 ; line:175 col:3 + + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer >"(i32 160, %"class.ConsumeStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 16 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[rvec4:%.*]] = shufflevector <4 x float> [[vec4]], <4 x float> [[vec4]], <4 x i32> + %tmp182 = load %"class.ConsumeStructuredBuffer >", %"class.ConsumeStructuredBuffer >"* @"\01?CMatBuf@@3V?$ConsumeStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:183 col:18 + %tmp183 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" %tmp182) ; line:183 col:18 + %tmp184 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp183, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.ConsumeStructuredBuffer >" zeroinitializer) ; line:183 col:18 + %tmp185 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp184) #0 ; line:183 col:18 + %tmp186 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp184, i32 %tmp185) #0 ; line:183 col:18 + %tmp187 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp186) ; line:183 col:18 + %col2row10 = shufflevector <4 x float> %tmp187, <4 x float> %tmp187, <4 x i32> ; line:183 col:18 + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer >"(i32 160, %"class.AppendStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 16 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1) + ; CHECK: [[cvec4:%.*]] = shufflevector <4 x float> [[rvec4]], <4 x float> [[rvec4]], <4 x i32> + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[cvec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[cvec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[cvec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[cvec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + + %tmp188 = load %"class.AppendStructuredBuffer >", %"class.AppendStructuredBuffer >"* @"\01?AMatBuf@@3V?$AppendStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:183 col:3 + %tmp189 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" %tmp188) ; line:183 col:3 + %tmp190 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp189, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.AppendStructuredBuffer >" zeroinitializer) ; line:183 col:3 + %tmp191 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp190) #0 ; line:183 col:3 + %tmp192 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp190, i32 %tmp191) #0 ; line:183 col:3 + %row2col11 = shufflevector <4 x float> %col2row10, <4 x float> %col2row10, <4 x i32> ; line:183 col:3 + call void @"dx.hl.matldst.colStore.void (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp192, <4 x float> %row2col11) ; line:183 col:3 + + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer >"(i32 160, %"class.ConsumeStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 32 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[mat:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3 + %tmp193 = load %"class.ConsumeStructuredBuffer >", %"class.ConsumeStructuredBuffer >"* @"\01?CSMatBuf@@3V?$ConsumeStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:191 col:19 + %tmp194 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32 0, %"class.ConsumeStructuredBuffer >" %tmp193) ; line:191 col:19 + %tmp195 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp194, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.ConsumeStructuredBuffer >" zeroinitializer) ; line:191 col:19 + %tmp196 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp195) #0 ; line:191 col:19 + %tmp197 = call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp195, i32 %tmp196) #0 ; line:191 col:19 + %tmp198 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp197, i32 0, i32 0 ; line:191 col:3 + %tmp199 = load <4 x float>, <4 x float>* %tmp198 ; line:191 col:3 + %tmp200 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp197, i32 0, i32 1 ; line:191 col:3 + %tmp201 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp200) ; line:191 col:3 + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer >"(i32 160, %"class.AppendStructuredBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 32 }) + ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1) + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[mat]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[mat]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[mat]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[mat]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, float [[val0]], float [[val1]], float [[val2]], float [[val3]] + + %tmp202 = load %"class.AppendStructuredBuffer >", %"class.AppendStructuredBuffer >"* @"\01?ASMatBuf@@3V?$AppendStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:191 col:3 + %tmp203 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32 0, %"class.AppendStructuredBuffer >" %tmp202) ; line:191 col:3 + %tmp204 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp203, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.AppendStructuredBuffer >" zeroinitializer) ; line:191 col:3 + %tmp205 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp204) #0 ; line:191 col:3 + %tmp206 = call %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp204, i32 %tmp205) #0 ; line:191 col:3 + %tmp207 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp206, i32 0, i32 0 ; line:191 col:3 + store <4 x float> %tmp199, <4 x float>* %tmp207 ; line:191 col:3 + %tmp208 = getelementptr inbounds %"struct.Matrix", %"struct.Matrix"* %tmp206, i32 0, i32 1 ; line:191 col:3 + %tmp209 = call <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp208, <4 x float> %tmp201) ; line:191 col:3 + + + ; CHECK: ret void + ret void ; line:193 col:1 +} + +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x i1>)"(i32, %dx.types.Handle, i32, <2 x i1>) #0 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #1 +declare <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare %"struct.Vector"* @"dx.hl.op.ro.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare %"struct.Matrix"* @"dx.hl.op.ro.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer\22)"(i32, %"class.RWStructuredBuffer") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer") #1 +declare %"struct.Vector"* @"dx.hl.subscript.[].rn.%\22struct.Vector\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare %"struct.Matrix"* @"dx.hl.subscript.[].rn.%\22struct.Matrix\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32, %dx.types.Handle) #0 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32, %"class.ConsumeStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer\22)"(i32, %"class.AppendStructuredBuffer") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer\22)"(i32, %"class.ConsumeStructuredBuffer") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32, %"class.ConsumeStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32, %"class.ConsumeStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer >\22)"(i32, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer >\22)"(i32, %"class.ConsumeStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer >") #1 +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32, %dx.types.Handle, i32, float) #0 +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32, %dx.types.Handle, i32, <4 x float>) #0 +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, double)"(i32, %dx.types.Handle, i32, double) #0 +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32, %dx.types.Handle, i32, <2 x float>) #0 +declare <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 +declare <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32, %class.matrix.float.2.2*) #2 +declare <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32, %class.matrix.float.2.2*, <4 x float>) #0 +declare void @"dx.hl.matldst.colStore.void (i32, %class.matrix.float.2.2*, <4 x float>)"(i32, %class.matrix.float.2.2*, <4 x float>) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !43} +!dx.entryPoints = !{!50} +!dx.fnprops = !{!72} +!dx.options = !{!73, !74} + +!3 = !{i32 1, i32 6} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 6} +!6 = !{i32 0, %"class.RWStructuredBuffer >" undef, !7, %"class.RWStructuredBuffer" undef, !12, %"class.RWStructuredBuffer >" undef, !16, %"struct.Vector" undef, !21, %"class.RWStructuredBuffer >" undef, !29, %"class.RWStructuredBuffer >" undef, !35, %"struct.Matrix" undef, !39, %"class.ConsumeStructuredBuffer >" undef, !7, %"class.ConsumeStructuredBuffer" undef, !12, %"class.ConsumeStructuredBuffer >" undef, !16, %"class.ConsumeStructuredBuffer >" undef, !29, %"class.ConsumeStructuredBuffer >" undef, !35, %"class.AppendStructuredBuffer >" undef, !7, %"class.AppendStructuredBuffer" undef, !12, %"class.AppendStructuredBuffer >" undef, !16, %"class.AppendStructuredBuffer >" undef, !29, %"class.AppendStructuredBuffer >" undef, !35} +!7 = !{i32 8, !8, !9} +!8 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 0, !10} +!10 = !{!11} +!11 = !{i32 0, <2 x float> undef} +!12 = !{i32 20, !8, !13} +!13 = !{i32 0, !14} +!14 = !{!15} +!15 = !{i32 0, [2 x float] undef} +!16 = !{i32 32, !17, !18} +!17 = !{i32 6, !"h", i32 3, i32 0} +!18 = !{i32 0, !19} +!19 = !{!20} +!20 = !{i32 0, %"struct.Vector" undef} +!21 = !{i32 32, !22, !23, !24, !25} +!22 = !{i32 6, !"pad1", i32 3, i32 0, i32 7, i32 9} +!23 = !{i32 6, !"pad2", i32 3, i32 16, i32 7, i32 10} +!24 = !{i32 6, !"v", i32 3, i32 24, i32 7, i32 9} +!25 = !{i32 0, !26} +!26 = !{!27, !28} +!27 = !{i32 0, float undef} +!28 = !{i32 1, i64 2} +!29 = !{i32 24, !30, !32} +!30 = !{i32 6, !"h", i32 2, !31, i32 3, i32 0, i32 7, i32 9} +!31 = !{i32 2, i32 2, i32 2} +!32 = !{i32 0, !33} +!33 = !{!34} +!34 = !{i32 0, %class.matrix.float.2.2 undef} +!35 = !{i32 40, !17, !36} +!36 = !{i32 0, !37} +!37 = !{!38} +!38 = !{i32 0, %"struct.Matrix" undef} +!39 = !{i32 40, !22, !40, !41} +!40 = !{i32 6, !"m", i32 2, !31, i32 3, i32 16, i32 7, i32 9} +!41 = !{i32 0, !42} +!42 = !{!27, !28, !28} +!43 = !{i32 1, void (i32)* @main, !44} +!44 = !{!45, !47} +!45 = !{i32 1, !46, !46} +!46 = !{} +!47 = !{i32 0, !48, !49} +!48 = !{i32 4, !"IX0", i32 7, i32 5} +!49 = !{i32 0} +!50 = !{void (i32)* @main, !"main", null, !51, null} +!51 = !{null, !52, null, null} +!52 = !{!53, !54, !56, !57, !59, !61, !62, !63, !64, !65, !66, !67, !68, !69, !70, !71} +!53 = !{i32 0, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A", !"BabBuf", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!54 = !{i32 1, %"class.RWStructuredBuffer >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A", !"VecBuf", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !55} +!55 = !{i32 1, i32 8} +!56 = !{i32 2, %"class.RWStructuredBuffer"* @"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A", !"ArrBuf", i32 0, i32 3, i32 1, i32 12, i1 false, i1 false, i1 false, !55} +!57 = !{i32 3, %"class.RWStructuredBuffer >"* @"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A", !"SVecBuf", i32 0, i32 4, i32 1, i32 12, i1 false, i1 false, i1 false, !58} +!58 = !{i32 1, i32 32} +!59 = !{i32 4, %"class.RWStructuredBuffer >"* @"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A", !"MatBuf", i32 0, i32 5, i32 1, i32 12, i1 false, i1 false, i1 false, !60} +!60 = !{i32 1, i32 16} +!61 = !{i32 5, %"class.RWStructuredBuffer >"* @"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A", !"SMatBuf", i32 0, i32 6, i32 1, i32 12, i1 false, i1 false, i1 false, !58} +!62 = !{i32 6, %"class.ConsumeStructuredBuffer >"* @"\01?CVecBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$01@@@@A", !"CVecBuf", i32 0, i32 7, i32 1, i32 12, i1 false, i1 false, i1 false, !55} +!63 = !{i32 7, %"class.ConsumeStructuredBuffer"* @"\01?CArrBuf@@3V?$ConsumeStructuredBuffer@$$BY01M@@A", !"CArrBuf", i32 0, i32 8, i32 1, i32 12, i1 false, i1 false, i1 false, !55} +!64 = !{i32 8, %"class.ConsumeStructuredBuffer >"* @"\01?CSVecBuf@@3V?$ConsumeStructuredBuffer@U?$Vector@M$01@@@@A", !"CSVecBuf", i32 0, i32 9, i32 1, i32 12, i1 false, i1 false, i1 false, !58} +!65 = !{i32 9, %"class.ConsumeStructuredBuffer >"* @"\01?CMatBuf@@3V?$ConsumeStructuredBuffer@V?$matrix@M$01$01@@@@A", !"CMatBuf", i32 0, i32 10, i32 1, i32 12, i1 false, i1 false, i1 false, !60} +!66 = !{i32 10, %"class.ConsumeStructuredBuffer >"* @"\01?CSMatBuf@@3V?$ConsumeStructuredBuffer@U?$Matrix@M$01$01@@@@A", !"CSMatBuf", i32 0, i32 11, i32 1, i32 12, i1 false, i1 false, i1 false, !58} +!67 = !{i32 11, %"class.AppendStructuredBuffer >"* @"\01?AVecBuf@@3V?$AppendStructuredBuffer@V?$vector@M$01@@@@A", !"AVecBuf", i32 0, i32 12, i32 1, i32 12, i1 false, i1 false, i1 false, !55} +!68 = !{i32 12, %"class.AppendStructuredBuffer"* @"\01?AArrBuf@@3V?$AppendStructuredBuffer@$$BY01M@@A", !"AArrBuf", i32 0, i32 13, i32 1, i32 12, i1 false, i1 false, i1 false, !55} +!69 = !{i32 13, %"class.AppendStructuredBuffer >"* @"\01?ASVecBuf@@3V?$AppendStructuredBuffer@U?$Vector@M$01@@@@A", !"ASVecBuf", i32 0, i32 14, i32 1, i32 12, i1 false, i1 false, i1 false, !58} +!70 = !{i32 14, %"class.AppendStructuredBuffer >"* @"\01?AMatBuf@@3V?$AppendStructuredBuffer@V?$matrix@M$01$01@@@@A", !"AMatBuf", i32 0, i32 15, i32 1, i32 12, i1 false, i1 false, i1 false, !60} +!71 = !{i32 15, %"class.AppendStructuredBuffer >"* @"\01?ASMatBuf@@3V?$AppendStructuredBuffer@U?$Matrix@M$01$01@@@@A", !"ASMatBuf", i32 0, i32 16, i32 1, i32 12, i1 false, i1 false, i1 false, !58} +!72 = !{void (i32)* @main, i32 1} +!73 = !{i32 64} +!74 = !{i32 -1} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.hlsl new file mode 100644 index 0000000000..9ff6039127 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.hlsl @@ -0,0 +1,404 @@ +// RUN: %dxc -fcgl -T vs_6_6 %s | FileCheck %s + +// Source file for DxilGen IR test for typed buffer store lowering +// Focuses on converted types in addition to common float type. + +RWBuffer FTyBuf; +RWBuffer BTyBuf; +RWBuffer LTyBuf; +RWBuffer DTyBuf; + +RWTexture1D FTex1d; +RWTexture1D BTex1d; +RWTexture1D LTex1d; +RWTexture1D DTex1d; + +RWTexture2D FTex2d; +RWTexture2D BTex2d; +RWTexture2D LTex2d; +RWTexture2D DTex2d; + +RWTexture3D FTex3d; +RWTexture3D BTex3d; +RWTexture3D LTex3d; +RWTexture3D DTex3d; + +RWTexture2DMS FTex2dMs; +RWTexture2DMS BTex2dMs; +RWTexture2DMS LTex2dMs; +RWTexture2DMS DTex2dMs; + +// CHECK: define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3) +void main(uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { + + // CHECK-DAG: [[ix3adr:%.*]] = alloca <3 x i32>, align 4 + // CHECK-DAG: [[ix2adr:%.*]] = alloca <2 x i32>, align 4 + // CHECK-DAG: [[ix1adr:%.*]] = alloca i32, align 4 + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 0 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 1 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]] + FTyBuf[ix1 + 1] = FTyBuf[ix1 + 0]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 2 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]] + // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 3 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32> + // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]] + BTyBuf[ix1 + 3] = BTyBuf[ix1 + 2]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 4 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 5 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]] + LTyBuf[ix1 + 5] = LTyBuf[ix1 + 4]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 6 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer\22)"(i32 0, %"class.RWBuffer" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load double, double* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 7 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer\22)"(i32 0, %"class.RWBuffer" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: store double [[ld]], double* [[sub]] + DTyBuf[ix1 + 7] = DTyBuf[ix1 + 6]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 8 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 9 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]] + FTex1d[ix1 + 9] = FTex1d[ix1 + 8]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 10 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]] + // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 11 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32> + // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]] + BTex1d[ix1 + 11] = BTex1d[ix1 + 10]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 12 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 13 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]] + LTex1d[ix1 + 13] = LTex1d[ix1 + 12]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 14 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D\22)"(i32 0, %"class.RWTexture1D" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: [[ld:%.*]] = load double, double* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[ix:%.*]] = add i32 [[ix1]], 15 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D\22)"(i32 0, %"class.RWTexture1D" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]]) + // CHECK: store double [[ld]], double* [[sub]] + DTex1d[ix1 + 15] = DTex1d[ix1 + 14]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]] + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]] + FTex2d[ix2 + 17] = FTex2d[ix2 + 16]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]] + // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32> + // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]] + BTex2d[ix2 + 19] = BTex2d[ix2 + 18]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]] + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]] + LTex2d[ix2 + 21] = LTex2d[ix2 + 20]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D\22)"(i32 0, %"class.RWTexture2D" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load double, double* [[sub]] + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D\22)"(i32 0, %"class.RWTexture2D" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: store double [[ld]], double* [[sub]] + DTex2d[ix2 + 23] = DTex2d[ix2 + 22]; + + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]] + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D >" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]] + FTex3d[ix3 + 25] = FTex3d[ix3 + 24]; + + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]] + // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32> + // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]] + BTex3d[ix3 + 27] = BTex3d[ix3 + 26]; + + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]] + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]] + LTex3d[ix3 + 29] = LTex3d[ix3 + 28]; + + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D\22)"(i32 0, %"class.RWTexture3D" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load double, double* [[sub]] + // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4 + // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D\22)"(i32 0, %"class.RWTexture3D" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]]) + // CHECK: store double [[ld]], double* [[sub]] + DTex3d[ix3 + 31] = DTex3d[ix3 + 30]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]] + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]] + FTex2dMs[ix2 + 33] = FTex2dMs[ix2 + 32]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]] + // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32> + // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]] + BTex2dMs[ix2 + 35] = BTex2dMs[ix2 + 34]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]] + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]] + LTex2dMs[ix2 + 37] = LTex2dMs[ix2 + 36]; + + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: [[ld:%.*]] = load double, double* [[sub]] + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" undef) + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]]) + // CHECK: store double [[ld]], double* [[sub]] + DTex2dMs[ix2 + 39] = DTex2dMs[ix2 + 38]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 0 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 1 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]] + FTex2dMs.sample[ix1 + 1][ix2 + 41] = FTex2dMs.sample[ix1 + 0][ix2 + 40]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 2 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]] + // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 3 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32> + // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]] + BTex2dMs.sample[ix1 + 3][ix2 + 43] = BTex2dMs.sample[ix1 + 2][ix2 + 42]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 4 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 5 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]] + LTex2dMs.sample[ix1 + 5][ix2 + 45] = LTex2dMs.sample[ix1 + 4][ix2 + 44]; + + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 6 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: [[ld:%.*]] = load double, double* [[sub]] + // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4 + // CHECK: [[sax:%.*]] = add i32 [[ix1]], 7 + // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" + // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" undef) + // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4 + // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], + // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]]) + // CHECK: store double [[ld]], double* [[sub]] + DTex2dMs.sample[ix1 + 7][ix2 + 47] = DTex2dMs.sample[ix1 + 6][ix2 + 46]; + + // CHECK: ret void + +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.ll new file mode 100644 index 0000000000..ac5c6182e1 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.ll @@ -0,0 +1,1079 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWBuffer >" = type { <3 x float> } +%"class.RWBuffer >" = type { <2 x i32> } +%"class.RWBuffer >" = type { <2 x i64> } +%"class.RWBuffer" = type { double } +%"class.RWTexture1D >" = type { <3 x float> } +%"class.RWTexture1D >" = type { <2 x i32> } +%"class.RWTexture1D >" = type { <2 x i64> } +%"class.RWTexture1D" = type { double } +%"class.RWTexture2D >" = type { <3 x float> } +%"class.RWTexture2D >" = type { <2 x i32> } +%"class.RWTexture2D >" = type { <2 x i64> } +%"class.RWTexture2D" = type { double } +%"class.RWTexture3D >" = type { <3 x float> } +%"class.RWTexture3D >" = type { <2 x i32> } +%"class.RWTexture3D >" = type { <2 x i64> } +%"class.RWTexture3D" = type { double } +%"class.RWTexture2DMS, 0>" = type { <3 x float>, %"class.RWTexture2DMS, 0>::sample_type" } +%"class.RWTexture2DMS, 0>::sample_type" = type { i32 } +%"class.RWTexture2DMS, 0>" = type { <2 x i32>, %"class.RWTexture2DMS, 0>::sample_type" } +%"class.RWTexture2DMS, 0>::sample_type" = type { i32 } +%"class.RWTexture2DMS, 0>" = type { <2 x i64>, %"class.RWTexture2DMS, 0>::sample_type" } +%"class.RWTexture2DMS, 0>::sample_type" = type { i32 } +%"class.RWTexture2DMS" = type { double, %"class.RWTexture2DMS::sample_type" } +%"class.RWTexture2DMS::sample_type" = type { i32 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A" = external global %"class.RWBuffer >", align 4 +@"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" = external global %"class.RWBuffer >", align 4 +@"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A" = external global %"class.RWBuffer >", align 8 +@"\01?DTyBuf@@3V?$RWBuffer@N@@A" = external global %"class.RWBuffer", align 8 +@"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A" = external global %"class.RWTexture1D >", align 4 +@"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A" = external global %"class.RWTexture1D >", align 4 +@"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A" = external global %"class.RWTexture1D >", align 8 +@"\01?DTex1d@@3V?$RWTexture1D@N@@A" = external global %"class.RWTexture1D", align 8 +@"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A" = external global %"class.RWTexture2D >", align 4 +@"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A" = external global %"class.RWTexture2D >", align 4 +@"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A" = external global %"class.RWTexture2D >", align 8 +@"\01?DTex2d@@3V?$RWTexture2D@N@@A" = external global %"class.RWTexture2D", align 8 +@"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A" = external global %"class.RWTexture3D >", align 4 +@"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A" = external global %"class.RWTexture3D >", align 4 +@"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A" = external global %"class.RWTexture3D >", align 8 +@"\01?DTex3d@@3V?$RWTexture3D@N@@A" = external global %"class.RWTexture3D", align 8 +@"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A" = external global %"class.RWTexture2DMS, 0>", align 4 +@"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A" = external global %"class.RWTexture2DMS, 0>", align 4 +@"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A" = external global %"class.RWTexture2DMS, 0>", align 8 +@"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A" = external global %"class.RWTexture2DMS", align 8 + +; Function Attrs: nounwind +; CHECK-LABEL: define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3) +define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3) #0 { +bb: + ; CHECK: [[ix3_0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 0, i32 undef) + ; CHECK: [[ix3:%.*]] = insertelement <3 x i32> undef, i32 [[ix3_0]], i64 0 + ; CHECK: [[ix3_1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 1, i32 undef) + ; CHECK: [[vec3:%.*]] = insertelement <3 x i32> [[ix3]], i32 [[ix3_1]], i64 1 + ; CHECK: [[ix3_2:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 2, i32 undef) + ; CHECK: [[ix3:%.*]] = insertelement <3 x i32> [[vec3]], i32 [[ix3_2]], i64 2 + ; CHECK: [[ix2_0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef) + ; CHECK: [[vec2:%.*]] = insertelement <2 x i32> undef, i32 [[ix2_0]], i64 0 + ; CHECK: [[ix2_1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 1, i32 undef) + ; CHECK: [[ix2:%.*]] = insertelement <2 x i32> [[vec2]], i32 [[ix2_1]], i64 1 + ; CHECK: [[ix1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef) + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix1]], i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 1 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 }) + ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2 + ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15) + %tmp = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A" + %tmp1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %tmp) + %tmp2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer >" zeroinitializer) + %tmp3 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp2, i32 %ix1) + %tmp4 = load <3 x float>, <3 x float>* %tmp3 + %tmp5 = add i32 %ix1, 1 + %tmp6 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A" + %tmp7 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %tmp6) + %tmp8 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer >" zeroinitializer) + %tmp9 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp8, i32 %tmp5) + store <3 x float> %tmp4, <3 x float>* %tmp9 + + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 2 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1 + ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 3 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }) + ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32> + ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1 + ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15) + %tmp10 = add i32 %ix1, 2 + %tmp11 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" + %tmp12 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %tmp11) + %tmp13 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %tmp12, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" zeroinitializer) + %tmp14 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp13, i32 %tmp10) + %tmp15 = load <2 x i32>, <2 x i32>* %tmp14 + %tmp16 = icmp ne <2 x i32> %tmp15, zeroinitializer + %tmp17 = add i32 %ix1, 3 + %tmp18 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" + %tmp19 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %tmp18) + %tmp20 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %tmp19, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" zeroinitializer) + %tmp21 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp20, i32 %tmp17) + %tmp22 = zext <2 x i1> %tmp16 to <2 x i32> + store <2 x i32> %tmp22, <2 x i32>* %tmp21 + + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 4 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3 + ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0 + ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1 + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 5 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer >"(i32 160, %"class.RWBuffer >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }) + ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1 + ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32 + ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32 + ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32 + ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32 + ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32 + ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32 + ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15) + %tmp23 = add i32 %ix1, 4 + %tmp24 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A" + %tmp25 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %tmp24) + %tmp26 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %tmp25, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" zeroinitializer) + %tmp27 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp26, i32 %tmp23) + %tmp28 = load <2 x i64>, <2 x i64>* %tmp27 + %tmp29 = add i32 %ix1, 5 + %tmp30 = load %"class.RWBuffer >", %"class.RWBuffer >"* @"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A" + %tmp31 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32 0, %"class.RWBuffer >" %tmp30) + %tmp32 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32 14, %dx.types.Handle %tmp31, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer >" zeroinitializer) + %tmp33 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp32, i32 %tmp29) + store <2 x i64> %tmp28, <2 x i64>* %tmp33 + + + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 6 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer"(i32 160, %"class.RWBuffer" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]]) + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 7 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer"(i32 160, %"class.RWBuffer" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 }) + ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]]) + ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0 + ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1 + ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15) + %tmp34 = add i32 %ix1, 6 + %tmp35 = load %"class.RWBuffer", %"class.RWBuffer"* @"\01?DTyBuf@@3V?$RWBuffer@N@@A" + %tmp36 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer\22)"(i32 0, %"class.RWBuffer" %tmp35) + %tmp37 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer\22)"(i32 14, %dx.types.Handle %tmp36, %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer" zeroinitializer) + %tmp38 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp37, i32 %tmp34) + %tmp39 = load double, double* %tmp38 + %tmp40 = add i32 %ix1, 7 + %tmp41 = load %"class.RWBuffer", %"class.RWBuffer"* @"\01?DTyBuf@@3V?$RWBuffer@N@@A" + %tmp42 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer\22)"(i32 0, %"class.RWBuffer" %tmp41) + %tmp43 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer\22)"(i32 14, %dx.types.Handle %tmp42, %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer" zeroinitializer) + %tmp44 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp43, i32 %tmp40) + store double %tmp39, double* %tmp44 + + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 8 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D >"(i32 160, %"class.RWTexture1D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 9 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D >"(i32 160, %"class.RWTexture1D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 }) + ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2 + ; CHECK: call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15) + %tmp45 = add i32 %ix1, 8 + %tmp46 = load %"class.RWTexture1D >", %"class.RWTexture1D >"* @"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A" + %tmp47 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" %tmp46) + %tmp48 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle %tmp47, %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D >" zeroinitializer) + %tmp49 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp48, i32 %tmp45) + %tmp50 = load <3 x float>, <3 x float>* %tmp49 + %tmp51 = add i32 %ix1, 9 + %tmp52 = load %"class.RWTexture1D >", %"class.RWTexture1D >"* @"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A" + %tmp53 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" %tmp52) + %tmp54 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle %tmp53, %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D >" zeroinitializer) + %tmp55 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp54, i32 %tmp51) + store <3 x float> %tmp50, <3 x float>* %tmp55 + + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 10 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D >"(i32 160, %"class.RWTexture1D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1 + ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 11 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D >"(i32 160, %"class.RWTexture1D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }) + ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32> + ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15) + %tmp56 = add i32 %ix1, 10 + %tmp57 = load %"class.RWTexture1D >", %"class.RWTexture1D >"* @"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A" + %tmp58 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" %tmp57) + %tmp59 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle %tmp58, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" zeroinitializer) + %tmp60 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp59, i32 %tmp56) + %tmp61 = load <2 x i32>, <2 x i32>* %tmp60 + %tmp62 = icmp ne <2 x i32> %tmp61, zeroinitializer + %tmp63 = add i32 %ix1, 11 + %tmp64 = load %"class.RWTexture1D >", %"class.RWTexture1D >"* @"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A" + %tmp65 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" %tmp64) + %tmp66 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle %tmp65, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" zeroinitializer) + %tmp67 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp66, i32 %tmp63) + %tmp68 = zext <2 x i1> %tmp62 to <2 x i32> + store <2 x i32> %tmp68, <2 x i32>* %tmp67 + + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 12 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D >"(i32 160, %"class.RWTexture1D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3 + ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0 + ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1 + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 13 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D >"(i32 160, %"class.RWTexture1D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }) + ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1 + ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32 + ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32 + ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32 + ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32 + ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32 + ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15) + %tmp69 = add i32 %ix1, 12 + %tmp70 = load %"class.RWTexture1D >", %"class.RWTexture1D >"* @"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A" + %tmp71 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" %tmp70) + %tmp72 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle %tmp71, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" zeroinitializer) + %tmp73 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp72, i32 %tmp69) + %tmp74 = load <2 x i64>, <2 x i64>* %tmp73 + %tmp75 = add i32 %ix1, 13 + %tmp76 = load %"class.RWTexture1D >", %"class.RWTexture1D >"* @"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A" + %tmp77 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32 0, %"class.RWTexture1D >" %tmp76) + %tmp78 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32 14, %dx.types.Handle %tmp77, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D >" zeroinitializer) + %tmp79 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp78, i32 %tmp75) + store <2 x i64> %tmp74, <2 x i64>* %tmp79 + + + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 14 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D"(i32 160, %"class.RWTexture1D" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 }) + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]]) + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 15 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D"(i32 160, %"class.RWTexture1D" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 }) + ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]]) + ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0 + ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15) + %tmp80 = add i32 %ix1, 14 + %tmp81 = load %"class.RWTexture1D", %"class.RWTexture1D"* @"\01?DTex1d@@3V?$RWTexture1D@N@@A" + %tmp82 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D\22)"(i32 0, %"class.RWTexture1D" %tmp81) + %tmp83 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D\22)"(i32 14, %dx.types.Handle %tmp82, %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D" zeroinitializer) + %tmp84 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp83, i32 %tmp80) + %tmp85 = load double, double* %tmp84 + %tmp86 = add i32 %ix1, 15 + %tmp87 = load %"class.RWTexture1D", %"class.RWTexture1D"* @"\01?DTex1d@@3V?$RWTexture1D@N@@A" + %tmp88 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D\22)"(i32 0, %"class.RWTexture1D" %tmp87) + %tmp89 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D\22)"(i32 14, %dx.types.Handle %tmp88, %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D" zeroinitializer) + %tmp90 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp89, i32 %tmp86) + store double %tmp85, double* %tmp90 + + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D >"(i32 160, %"class.RWTexture2D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D >"(i32 160, %"class.RWTexture2D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2 + ; CHECK: call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15) + %tmp91 = add <2 x i32> %ix2, + %tmp92 = load %"class.RWTexture2D >", %"class.RWTexture2D >"* @"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A" + %tmp93 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" %tmp92) + %tmp94 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle %tmp93, %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D >" zeroinitializer) + %tmp95 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp94, <2 x i32> %tmp91) + %tmp96 = load <3 x float>, <3 x float>* %tmp95 + %tmp97 = add <2 x i32> %ix2, + %tmp98 = load %"class.RWTexture2D >", %"class.RWTexture2D >"* @"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A" + %tmp99 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" %tmp98) + %tmp100 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle %tmp99, %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D >" zeroinitializer) + %tmp101 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp100, <2 x i32> %tmp97) + store <3 x float> %tmp96, <3 x float>* %tmp101 + + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D >"(i32 160, %"class.RWTexture2D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1 + ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D >"(i32 160, %"class.RWTexture2D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }) + ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32> + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15) + %tmp102 = add <2 x i32> %ix2, + %tmp103 = load %"class.RWTexture2D >", %"class.RWTexture2D >"* @"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A" + %tmp104 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" %tmp103) + %tmp105 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle %tmp104, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" zeroinitializer) + %tmp106 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp105, <2 x i32> %tmp102) + %tmp107 = load <2 x i32>, <2 x i32>* %tmp106 + %tmp108 = icmp ne <2 x i32> %tmp107, zeroinitializer + %tmp109 = add <2 x i32> %ix2, + %tmp110 = load %"class.RWTexture2D >", %"class.RWTexture2D >"* @"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A" + %tmp111 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" %tmp110) + %tmp112 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle %tmp111, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" zeroinitializer) + %tmp113 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp112, <2 x i32> %tmp109) + %tmp114 = zext <2 x i1> %tmp108 to <2 x i32> + store <2 x i32> %tmp114, <2 x i32>* %tmp113 + + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D >"(i32 160, %"class.RWTexture2D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3 + ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0 + ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1 + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D >"(i32 160, %"class.RWTexture2D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1 + ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32 + ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32 + ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32 + ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32 + ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32 + ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15) + %tmp115 = add <2 x i32> %ix2, + %tmp116 = load %"class.RWTexture2D >", %"class.RWTexture2D >"* @"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A" + %tmp117 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" %tmp116) + %tmp118 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle %tmp117, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" zeroinitializer) + %tmp119 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp118, <2 x i32> %tmp115) + %tmp120 = load <2 x i64>, <2 x i64>* %tmp119 + %tmp121 = add <2 x i32> %ix2, + %tmp122 = load %"class.RWTexture2D >", %"class.RWTexture2D >"* @"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A" + %tmp123 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32 0, %"class.RWTexture2D >" %tmp122) + %tmp124 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32 14, %dx.types.Handle %tmp123, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D >" zeroinitializer) + %tmp125 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp124, <2 x i32> %tmp121) + store <2 x i64> %tmp120, <2 x i64>* %tmp125 + + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D"(i32 160, %"class.RWTexture2D" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]]) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D"(i32 160, %"class.RWTexture2D" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]]) + ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0 + ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15) + %tmp126 = add <2 x i32> %ix2, + %tmp127 = load %"class.RWTexture2D", %"class.RWTexture2D"* @"\01?DTex2d@@3V?$RWTexture2D@N@@A" + %tmp128 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D\22)"(i32 0, %"class.RWTexture2D" %tmp127) + %tmp129 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D\22)"(i32 14, %dx.types.Handle %tmp128, %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D" zeroinitializer) + %tmp130 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp129, <2 x i32> %tmp126) + %tmp131 = load double, double* %tmp130 + %tmp132 = add <2 x i32> %ix2, + %tmp133 = load %"class.RWTexture2D", %"class.RWTexture2D"* @"\01?DTex2d@@3V?$RWTexture2D@N@@A" + %tmp134 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D\22)"(i32 0, %"class.RWTexture2D" %tmp133) + %tmp135 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D\22)"(i32 14, %dx.types.Handle %tmp134, %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D" zeroinitializer) + %tmp136 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp135, <2 x i32> %tmp132) + store double %tmp131, double* %tmp136 + + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D >"(i32 160, %"class.RWTexture3D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 }) + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D >"(i32 160, %"class.RWTexture3D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 }) + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2 + ; CHECK: call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15) + %tmp137 = add <3 x i32> %ix3, + %tmp138 = load %"class.RWTexture3D >", %"class.RWTexture3D >"* @"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A" + %tmp139 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" %tmp138) + %tmp140 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle %tmp139, %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D >" zeroinitializer) + %tmp141 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp140, <3 x i32> %tmp137) + %tmp142 = load <3 x float>, <3 x float>* %tmp141 + %tmp143 = add <3 x i32> %ix3, + %tmp144 = load %"class.RWTexture3D >", %"class.RWTexture3D >"* @"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A" + %tmp145 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" %tmp144) + %tmp146 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle %tmp145, %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D >" zeroinitializer) + %tmp147 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp146, <3 x i32> %tmp143) + store <3 x float> %tmp142, <3 x float>* %tmp147 + + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D >"(i32 160, %"class.RWTexture3D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }) + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1 + ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D >"(i32 160, %"class.RWTexture3D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }) + ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32> + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15) + %tmp148 = add <3 x i32> %ix3, + %tmp149 = load %"class.RWTexture3D >", %"class.RWTexture3D >"* @"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A" + %tmp150 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" %tmp149) + %tmp151 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle %tmp150, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" zeroinitializer) + %tmp152 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp151, <3 x i32> %tmp148) + %tmp153 = load <2 x i32>, <2 x i32>* %tmp152 + %tmp154 = icmp ne <2 x i32> %tmp153, zeroinitializer + %tmp155 = add <3 x i32> %ix3, + %tmp156 = load %"class.RWTexture3D >", %"class.RWTexture3D >"* @"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A" + %tmp157 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" %tmp156) + %tmp158 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle %tmp157, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" zeroinitializer) + %tmp159 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp158, <3 x i32> %tmp155) + %tmp160 = zext <2 x i1> %tmp154 to <2 x i32> + store <2 x i32> %tmp160, <2 x i32>* %tmp159 + + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D >"(i32 160, %"class.RWTexture3D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }) + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3 + ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0 + ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1 + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D >"(i32 160, %"class.RWTexture3D >" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }) + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1 + ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32 + ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32 + ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32 + ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32 + ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32 + ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15) + %tmp161 = add <3 x i32> %ix3, + %tmp162 = load %"class.RWTexture3D >", %"class.RWTexture3D >"* @"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A" + %tmp163 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" %tmp162) + %tmp164 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle %tmp163, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" zeroinitializer) + %tmp165 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp164, <3 x i32> %tmp161) + %tmp166 = load <2 x i64>, <2 x i64>* %tmp165 + %tmp167 = add <3 x i32> %ix3, + %tmp168 = load %"class.RWTexture3D >", %"class.RWTexture3D >"* @"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A" + %tmp169 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32 0, %"class.RWTexture3D >" %tmp168) + %tmp170 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32 14, %dx.types.Handle %tmp169, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D >" zeroinitializer) + %tmp171 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp170, <3 x i32> %tmp167) + store <2 x i64> %tmp166, <2 x i64>* %tmp171 + + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D"(i32 160, %"class.RWTexture3D" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 }) + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]]) + ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D"(i32 160, %"class.RWTexture3D" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 }) + ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0 + ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1 + ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2 + ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]]) + ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0 + ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1 + ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15) + %tmp172 = add <3 x i32> %ix3, + %tmp173 = load %"class.RWTexture3D", %"class.RWTexture3D"* @"\01?DTex3d@@3V?$RWTexture3D@N@@A" + %tmp174 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D\22)"(i32 0, %"class.RWTexture3D" %tmp173) + %tmp175 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D\22)"(i32 14, %dx.types.Handle %tmp174, %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D" zeroinitializer) + %tmp176 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp175, <3 x i32> %tmp172) + %tmp177 = load double, double* %tmp176 + %tmp178 = add <3 x i32> %ix3, + %tmp179 = load %"class.RWTexture3D", %"class.RWTexture3D"* @"\01?DTex3d@@3V?$RWTexture3D@N@@A" + %tmp180 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D\22)"(i32 0, %"class.RWTexture3D" %tmp179) + %tmp181 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D\22)"(i32 14, %dx.types.Handle %tmp180, %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D" zeroinitializer) + %tmp182 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp181, <3 x i32> %tmp178) + store double %tmp177, double* %tmp182 + + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2 + ; CHECK: call void @dx.op.textureStoreSample.f32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 0) + %tmp183 = add <2 x i32> %ix2, + %tmp184 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A" + %tmp185 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp184) + %tmp186 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp185, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp187 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp186, <2 x i32> %tmp183) + %tmp188 = load <3 x float>, <3 x float>* %tmp187 + %tmp189 = add <2 x i32> %ix2, + %tmp190 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A" + %tmp191 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp190) + %tmp192 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp191, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp193 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp192, <2 x i32> %tmp189) + store <3 x float> %tmp188, <3 x float>* %tmp193 + + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1 + ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32> + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1 + ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15, i32 0) + %tmp194 = add <2 x i32> %ix2, + %tmp195 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A" + %tmp196 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp195) + %tmp197 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp196, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp198 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp197, <2 x i32> %tmp194) + %tmp199 = load <2 x i32>, <2 x i32>* %tmp198 + %tmp200 = icmp ne <2 x i32> %tmp199, zeroinitializer + %tmp201 = add <2 x i32> %ix2, + %tmp202 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A" + %tmp203 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp202) + %tmp204 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp203, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp205 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp204, <2 x i32> %tmp201) + %tmp206 = zext <2 x i1> %tmp200 to <2 x i32> + store <2 x i32> %tmp206, <2 x i32>* %tmp205 + + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3 + ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0 + ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1 + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1 + ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32 + ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32 + ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32 + ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32 + ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32 + ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32 + ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15, i32 0) + + %tmp207 = add <2 x i32> %ix2, + %tmp208 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A" + %tmp209 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp208) + %tmp210 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp209, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp211 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp210, <2 x i32> %tmp207) + %tmp212 = load <2 x i64>, <2 x i64>* %tmp211 + %tmp213 = add <2 x i32> %ix2, + %tmp214 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A" + %tmp215 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp214) + %tmp216 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp215, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp217 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp216, <2 x i32> %tmp213) + store <2 x i64> %tmp212, <2 x i64>* %tmp217 + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS"(i32 160, %"class.RWTexture2DMS" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]]) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS"(i32 160, %"class.RWTexture2DMS" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }) + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]]) + ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0 + ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1 + ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15, i32 0) + + %tmp218 = add <2 x i32> %ix2, + %tmp219 = load %"class.RWTexture2DMS", %"class.RWTexture2DMS"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A" + %tmp220 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" %tmp219) + %tmp221 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle %tmp220, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" zeroinitializer) + %tmp222 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp221, <2 x i32> %tmp218) + %tmp223 = load double, double* %tmp222 + %tmp224 = add <2 x i32> %ix2, + %tmp225 = load %"class.RWTexture2DMS", %"class.RWTexture2DMS"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A" + %tmp226 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" %tmp225) + %tmp227 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle %tmp226, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" zeroinitializer) + %tmp228 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp227, <2 x i32> %tmp224) + store double %tmp223, double* %tmp228 + + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 [[ix1]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1 + ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2 + ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 1 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2 + ; CHECK: call void @dx.op.textureStoreSample.f32(i32 225, %dx.types.Handle %388, i32 %389, i32 %390, i32 undef, float %392, float %393, float %394, float %391, i8 15, i32 %tmp235) + %tmp229 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A" + %tmp230 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp229) + %tmp231 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp230, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp232 = add <2 x i32> %ix2, + %tmp233 = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp231, <2 x i32> %tmp232, i32 %ix1) + %tmp234 = load <3 x float>, <3 x float>* %tmp233 + %tmp235 = add i32 %ix1, 1 + %tmp236 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A" + %tmp237 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp236) + %tmp238 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp237, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp239 = add <2 x i32> %ix2, + %tmp240 = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp238, <2 x i32> %tmp239, i32 %tmp235) + store <3 x float> %tmp234, <3 x float>* %tmp240 + + ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 2 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 [[sax]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0 + ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1 + ; CHECK: %tmp248 = icmp ne <2 x i32> %402, zeroinitializer + ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 3 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: %407 = extractelement <2 x i32> %tmp255, i64 0 + ; CHECK: %408 = extractelement <2 x i32> %tmp255, i64 0 + ; CHECK: %409 = extractelement <2 x i32> %tmp255, i64 1 + ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle %404, i32 %405, i32 %406, i32 undef, i32 %408, i32 %409, i32 %407, i32 %407, i8 15, i32 %tmp249) + ; CHECK: %tmp255 = zext <2 x i1> %tmp248 to <2 x i32> + %tmp241 = add i32 %ix1, 2 + %tmp242 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A" + %tmp243 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp242) + %tmp244 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp243, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp245 = add <2 x i32> %ix2, + %tmp246 = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp244, <2 x i32> %tmp245, i32 %tmp241) + %tmp247 = load <2 x i32>, <2 x i32>* %tmp246 + %tmp248 = icmp ne <2 x i32> %tmp247, zeroinitializer + %tmp249 = add i32 %ix1, 3 + %tmp250 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A" + %tmp251 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp250) + %tmp252 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp251, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp253 = add <2 x i32> %ix2, + %tmp254 = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp252, <2 x i32> %tmp253, i32 %tmp249) + %tmp255 = zext <2 x i1> %tmp248 to <2 x i32> + store <2 x i32> %tmp255, <2 x i32>* %tmp254 + + ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 4 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 [[sax]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3 + ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64 + ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64 + ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32 + ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]] + ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0 + ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1 + ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 5 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS, 0>"(i32 160, %"class.RWTexture2DMS, 0>" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1 + ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32 + ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32 + ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32 + ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32 + ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32 + ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32 + ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15, i32 [[sax]]) + %tmp256 = add i32 %ix1, 4 + %tmp257 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A" + %tmp258 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp257) + %tmp259 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp258, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp260 = add <2 x i32> %ix2, + %tmp261 = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp259, <2 x i32> %tmp260, i32 %tmp256) + %tmp262 = load <2 x i64>, <2 x i64>* %tmp261 + %tmp263 = add i32 %ix1, 5 + %tmp264 = load %"class.RWTexture2DMS, 0>", %"class.RWTexture2DMS, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A" + %tmp265 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32 0, %"class.RWTexture2DMS, 0>" %tmp264) + %tmp266 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32 14, %dx.types.Handle %tmp265, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS, 0>" zeroinitializer) + %tmp267 = add <2 x i32> %ix2, + %tmp268 = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp266, <2 x i32> %tmp267, i32 %tmp263) + store <2 x i64> %tmp262, <2 x i64>* %tmp268 + + ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 6 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS"(i32 160, %"class.RWTexture2DMS" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 [[sax]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1 + ; CHECK: %447 = call double @dx.op.makeDouble.f64(i32 101, i32 %445, i32 %446) + ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 7 + ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS"(i32 160, %"class.RWTexture2DMS" + ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }) + ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], + ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0 + ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1 + ; CHECK: %452 = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %447) + ; CHECK: %453 = extractvalue %dx.types.splitdouble %452, 0 + ; CHECK: %454 = extractvalue %dx.types.splitdouble %452, 1 + ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle %449, i32 %450, i32 %451, i32 undef, i32 %453, i32 %454, i32 %453, i32 %454, i8 15, i32 %tmp276) + %tmp269 = add i32 %ix1, 6 + %tmp270 = load %"class.RWTexture2DMS", %"class.RWTexture2DMS"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A" + %tmp271 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" %tmp270) + %tmp272 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle %tmp271, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" zeroinitializer) + %tmp273 = add <2 x i32> %ix2, + %tmp274 = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp272, <2 x i32> %tmp273, i32 %tmp269) + %tmp275 = load double, double* %tmp274 + %tmp276 = add i32 %ix1, 7 + %tmp277 = load %"class.RWTexture2DMS", %"class.RWTexture2DMS"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A" + %tmp278 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32 0, %"class.RWTexture2DMS" %tmp277) + %tmp279 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32 14, %dx.types.Handle %tmp278, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS" zeroinitializer) + %tmp280 = add <2 x i32> %ix2, + %tmp281 = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp279, <2 x i32> %tmp280, i32 %tmp276) + store double %tmp275, double* %tmp281 + + + ; CHECK: ret void + ret void +} + + +declare <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32, %"class.RWBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer >") #1 +declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32, %"class.RWBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer >") #1 +declare <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer >\22)"(i32, %"class.RWBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer >") #1 +declare double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer\22)"(i32, %"class.RWBuffer") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32, %"class.RWTexture1D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32, %"class.RWTexture1D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D >\22)"(i32, %"class.RWTexture1D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D >") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D\22)"(i32, %"class.RWTexture1D") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D") #1 +declare <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32, %"class.RWTexture2D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D >") #1 +declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32, %"class.RWTexture2D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D >") #1 +declare <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D >\22)"(i32, %"class.RWTexture2D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D >") #1 +declare double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D\22)"(i32, %"class.RWTexture2D") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D") #1 +declare <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32, %"class.RWTexture3D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D >") #1 +declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32, %"class.RWTexture3D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D >") #1 +declare <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D >\22)"(i32, %"class.RWTexture3D >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D >") #1 +declare double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D\22)"(i32, %"class.RWTexture3D") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32, %"class.RWTexture2DMS, 0>") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS, 0>") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32, %"class.RWTexture2DMS, 0>") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS, 0>") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS, 0>\22)"(i32, %"class.RWTexture2DMS, 0>") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS, 0>") #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS\22)"(i32, %"class.RWTexture2DMS") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS") #1 +declare <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1 +declare <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1 +declare <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1 +declare double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6} +!dx.entryPoints = !{!19} +!dx.fnprops = !{!44} +!dx.options = !{!45, !46} + +!3 = !{i32 1, i32 6} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 6} +!6 = !{i32 1, void (i32, <2 x i32>, <3 x i32>)* @main, !7} +!7 = !{!8, !10, !13, !16} +!8 = !{i32 1, !9, !9} +!9 = !{} +!10 = !{i32 0, !11, !12} +!11 = !{i32 4, !"IX1", i32 7, i32 5} +!12 = !{i32 1} +!13 = !{i32 0, !14, !15} +!14 = !{i32 4, !"IX2", i32 7, i32 5} +!15 = !{i32 2} +!16 = !{i32 0, !17, !18} +!17 = !{i32 4, !"IX3", i32 7, i32 5} +!18 = !{i32 3} +!19 = !{void (i32, <2 x i32>, <3 x i32>)* @main, !"main", null, !20, null} +!20 = !{null, !21, null, null} +!21 = !{!22, !24, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43} +!22 = !{i32 0, %"class.RWBuffer >"* @"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A", !"FTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !23} +!23 = !{i32 0, i32 9} +!24 = !{i32 1, %"class.RWBuffer >"* @"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A", !"BTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !25} +!25 = !{i32 0, i32 5} +!26 = !{i32 2, %"class.RWBuffer >"* @"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A", !"LTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !25} +!27 = !{i32 3, %"class.RWBuffer"* @"\01?DTyBuf@@3V?$RWBuffer@N@@A", !"DTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !25} +!28 = !{i32 4, %"class.RWTexture1D >"* @"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A", !"FTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !23} +!29 = !{i32 5, %"class.RWTexture1D >"* @"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A", !"BTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !25} +!30 = !{i32 6, %"class.RWTexture1D >"* @"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A", !"LTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !25} +!31 = !{i32 7, %"class.RWTexture1D"* @"\01?DTex1d@@3V?$RWTexture1D@N@@A", !"DTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !25} +!32 = !{i32 8, %"class.RWTexture2D >"* @"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A", !"FTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !23} +!33 = !{i32 9, %"class.RWTexture2D >"* @"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A", !"BTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !25} +!34 = !{i32 10, %"class.RWTexture2D >"* @"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A", !"LTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !25} +!35 = !{i32 11, %"class.RWTexture2D"* @"\01?DTex2d@@3V?$RWTexture2D@N@@A", !"DTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !25} +!36 = !{i32 12, %"class.RWTexture3D >"* @"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A", !"FTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !23} +!37 = !{i32 13, %"class.RWTexture3D >"* @"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A", !"BTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !25} +!38 = !{i32 14, %"class.RWTexture3D >"* @"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A", !"LTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !25} +!39 = !{i32 15, %"class.RWTexture3D"* @"\01?DTex3d@@3V?$RWTexture3D@N@@A", !"DTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !25} +!40 = !{i32 16, %"class.RWTexture2DMS, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A", !"FTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !23} +!41 = !{i32 17, %"class.RWTexture2DMS, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A", !"BTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !25} +!42 = !{i32 18, %"class.RWTexture2DMS, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A", !"LTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !25} +!43 = !{i32 19, %"class.RWTexture2DMS"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A", !"DTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !25} +!44 = !{void (i32, <2 x i32>, <3 x i32>)* @main, i32 1} +!45 = !{i32 64} +!46 = !{i32 -1} From c5f62d93c18ab5aa4ad6c5fa5288d3f445aa1f03 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Tue, 25 Mar 2025 17:19:26 +0100 Subject: [PATCH 46/88] [SER] Patch 1: HitObject type lowering and SM 6.9 enablement (#7097) Reduction of the complete SER implementation to just the HitObject type and its default constructor. This has most of the infrastructure changes in DXC to support SER, eg static member functions for builtins, HitObject scalar type. Specification PR: https://github.com/microsoft/hlsl-specs/pull/277 --- include/dxc/DXIL/DxilUtil.h | 2 + include/dxc/HlslIntrinsicOp.h | 4 +- include/dxc/dxcapi.internal.h | 5 +- lib/DXIL/DxilUtil.cpp | 21 + lib/HLSL/HLOperationLower.cpp | 24 + tools/clang/include/clang/AST/HlslTypes.h | 3 + tools/clang/include/clang/Basic/Attr.td | 8 + .../clang/Basic/DiagnosticSemaKinds.td | 9 + tools/clang/lib/AST/ASTContextHLSL.cpp | 52 +- tools/clang/lib/AST/HlslTypes.cpp | 4 + tools/clang/lib/CodeGen/CGHLSLMS.cpp | 8 +- tools/clang/lib/CodeGen/CodeGenTypes.cpp | 11 +- tools/clang/lib/Sema/SemaExpr.cpp | 11 +- tools/clang/lib/Sema/SemaHLSL.cpp | 443 +++++++++++++----- tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp | 91 +++- tools/clang/lib/Sema/SemaOverload.cpp | 8 +- .../DXC/Passes/DxilGen/hitobject_dxilgen.ll | 101 ++++ .../Passes/DxilGen/maybereorder_dxilgen.ll | 106 +++++ .../objects/HitObject/hitobject_make.hlsl | 12 + .../objects/HitObject/hitobject_make_ast.hlsl | 24 + .../hlsl/objects/HitObject/maybereorder.hlsl | 13 + .../objects/HitObject/maybereorder_ast.hlsl | 28 ++ .../intrinsics/reorder/hitobject_reorder.hlsl | 10 + .../reorder/reorder-entry-errors.hlsl | 62 +++ .../reorder/reorder-unavailable-pre-sm69.hlsl | 9 + .../hlsl/namespace/dx-namespace-pre-sm69.hlsl | 8 + .../HitObject/hitobject-entry-errors.hlsl | 58 +++ .../HitObject/hitobject-in-buffer.hlsl | 4 + .../hitobject-unavailable-pre-sm69.hlsl | 11 + .../HitObject/hitobject-unsupported-vs.hlsl | 8 + .../HitObject/hitobject-using-namespace.hlsl | 36 ++ .../hitobject-without-namespace.hlsl | 39 ++ .../maybereorderthread-without-namespace.hlsl | 31 ++ utils/hct/gen_intrin_main.txt | 11 + utils/hct/hctdb.py | 39 +- utils/hct/hctdb_instrhelp.py | 12 +- utils/hct/hlsl_intrinsic_opcodes.json | 6 +- 37 files changed, 1174 insertions(+), 158 deletions(-) create mode 100644 tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll create mode 100644 tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/hitobject_reorder.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-entry-errors.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-unavailable-pre-sm69.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/namespace/dx-namespace-pre-sm69.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-entry-errors.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unavailable-pre-sm69.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unsupported-vs.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-using-namespace.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-without-namespace.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/maybereorderthread-without-namespace.hlsl diff --git a/include/dxc/DXIL/DxilUtil.h b/include/dxc/DXIL/DxilUtil.h index 490f335db5..5652c56f50 100644 --- a/include/dxc/DXIL/DxilUtil.h +++ b/include/dxc/DXIL/DxilUtil.h @@ -162,6 +162,8 @@ GetHLSLResourceProperties(llvm::Type *Ty); bool IsHLSLResourceType(llvm::Type *Ty); bool IsHLSLObjectType(llvm::Type *Ty); bool IsHLSLRayQueryType(llvm::Type *Ty); +llvm::Type *GetHLSLHitObjectType(llvm::Module *M); +bool IsHLSLHitObjectType(llvm::Type *Ty); bool IsHLSLResourceDescType(llvm::Type *Ty); bool IsResourceSingleComponent(llvm::Type *Ty); uint8_t GetResourceComponentCount(llvm::Type *Ty); diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index 41c72d1a51..90f3fafd79 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -333,6 +333,8 @@ enum class IntrinsicOp { MOP_TraceRayInline = 325, MOP_WorldRayDirection = 326, MOP_WorldRayOrigin = 327, + MOP_DxHitObject_MakeNop = 358, + IOP_DxMaybeReorderThread = 359, MOP_Count = 328, MOP_FinishedCrossGroupSharing = 329, MOP_GetGroupNodeOutputRecords = 330, @@ -364,7 +366,7 @@ enum class IntrinsicOp { IOP_usign = 355, MOP_InterlockedUMax = 356, MOP_InterlockedUMin = 357, - Num_Intrinsics = 358, + Num_Intrinsics = 360, }; inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) { switch (opcode) { diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h index 4b8e237201..bf8a040673 100644 --- a/include/dxc/dxcapi.internal.h +++ b/include/dxc/dxcapi.internal.h @@ -126,7 +126,9 @@ enum LEGAL_INTRINSIC_COMPTYPES { LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS = 49, LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS = 50, - LICOMPTYPE_COUNT = 51 + LICOMPTYPE_HIT_OBJECT = 51, + + LICOMPTYPE_COUNT = 52 }; static const BYTE IA_SPECIAL_BASE = 0xf0; @@ -164,6 +166,7 @@ struct HLSL_INTRINSIC_ARGUMENT { static const UINT INTRIN_FLAG_READ_ONLY = 1U << 0; static const UINT INTRIN_FLAG_READ_NONE = 1U << 1; static const UINT INTRIN_FLAG_IS_WAVE = 1U << 2; +static const UINT INTRIN_FLAG_STATIC_MEMBER = 1U << 3; struct HLSL_INTRINSIC { UINT Op; // Intrinsic Op ID diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp index 865fad487c..0a4fb1160a 100644 --- a/lib/DXIL/DxilUtil.cpp +++ b/lib/DXIL/DxilUtil.cpp @@ -574,6 +574,9 @@ bool IsHLSLObjectType(llvm::Type *Ty) { if (IsHLSLNodeIOType(Ty)) return true; + + if (IsHLSLHitObjectType(Ty)) + return true; } return false; } @@ -591,6 +594,24 @@ bool IsHLSLRayQueryType(llvm::Type *Ty) { return false; } +llvm::Type *GetHLSLHitObjectType(llvm::Module *M) { + using namespace llvm; + StructType *HitObjectTy = M->getTypeByName("dx.types.HitObject"); + if (!HitObjectTy) + HitObjectTy = StructType::create({Type::getInt8PtrTy(M->getContext(), 0)}, + "dx.types.HitObject", false); + return HitObjectTy; +} + +bool IsHLSLHitObjectType(llvm::Type *Ty) { + llvm::StructType *ST = dyn_cast(Ty); + if (!ST) + return false; + if (!ST->hasName()) + return false; + return ST->getName() == "dx.types.HitObject"; +} + bool IsHLSLResourceDescType(llvm::Type *Ty) { if (llvm::StructType *ST = dyn_cast(Ty)) { if (!ST->hasName()) diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 5a0dadf7f4..3ab1f9fdec 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -6062,6 +6062,24 @@ Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, } // namespace +// Shader Execution Reordering. +namespace { +Value *TranslateHitObjectMake(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +Value *TranslateMaybeReorderThread(CallInst *CI, IntrinsicOp IOP, + OP::OpCode opcode, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return nullptr; // TODO: Merge SER DXIL patches +} +} // namespace + // Resource Handle. namespace { Value *TranslateGetHandleFromHeap(CallInst *CI, IntrinsicOp IOP, @@ -6794,6 +6812,12 @@ IntrinsicLower gLowerTable[] = { DXIL::OpCode::NumOpCodes}, {IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes}, + {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMake, + DXIL::OpCode::NumOpCodes_Dxil_1_8}, // FIXME: Just a placeholder Dxil + // opcode + {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread, + DXIL::OpCode::NumOpCodes_Dxil_1_8}, // FIXME: Just a placeholder Dxil + // opcode }; } // namespace static_assert( diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index e6a50de8fb..3b517576fe 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -391,6 +391,7 @@ clang::CXXRecordDecl * DeclareConstantBufferViewType(clang::ASTContext &context, clang::InheritableAttr *Attr); clang::CXXRecordDecl *DeclareRayQueryType(clang::ASTContext &context); +clang::CXXRecordDecl *DeclareHitObjectType(clang::NamespaceDecl &NSDecl); clang::CXXRecordDecl *DeclareResourceType(clang::ASTContext &context, bool bSampler); @@ -472,6 +473,7 @@ bool IsHLSLNodeInputType(clang::QualType type); bool IsHLSLDynamicResourceType(clang::QualType type); bool IsHLSLDynamicSamplerType(clang::QualType type); bool IsHLSLNodeType(clang::QualType type); +bool IsHLSLHitObjectType(clang::QualType type); bool IsHLSLObjectWithImplicitMemberAccess(clang::QualType type); bool IsHLSLObjectWithImplicitROMemberAccess(clang::QualType type); @@ -545,6 +547,7 @@ clang::CXXMethodDecl *CreateObjectFunctionDeclarationWithParams( clang::QualType resultType, llvm::ArrayRef paramTypes, llvm::ArrayRef paramNames, clang::DeclarationName declarationName, bool isConst, + clang::StorageClass SC = clang::StorageClass::SC_None, bool isTemplateFunction = false); DXIL::ResourceClass GetResourceClassForType(const clang::ASTContext &context, diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index 3afbaa91c7..48193f7077 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -1157,6 +1157,14 @@ def HLSLRayQueryObject : InheritableAttr { let Documentation = [Undocumented]; } +// HLSL HitObject Attribute + +def HLSLHitObject : InheritableAttr { + let Spellings = []; // No spellings! + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + // HLSL Parameter Attributes def HLSLMaxRecords : InheritableAttr { diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 16ff7777a7..6ae59cac14 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7665,6 +7665,9 @@ def err_hlsl_unsupported_builtin_op: Error< def warn_hlsl_builtin_constant_unavailable: Warning< "potential misuse of built-in constant %0 in shader model %1; introduced" " in shader model %2">, InGroup; +def warn_hlsl_builtin_type_unavailable: Warning< + "potential misuse of built-in type %0 in shader model %1; introduced" + " in shader model %2">, DefaultError, InGroup; def err_hlsl_unsupported_char_literal : Error< "unsupported style of char literal - use a single-character char-based literal">; def err_hlsl_unsupported_clipplane_argument_expression : Error< @@ -7991,6 +7994,12 @@ def warn_hlsl_legacy_integer_literal_signedness: Warning< InGroup, DefaultIgnore; def err_hlsl_unsupported_semantic_index: Error< "'%0' is defined with semantic index %1, but only values 0 through %2 are supported">; + +// Shader Execution Reordering +def err_hlsl_reorder_unsupported_stage : Error< + "dx::MaybeReorderThread is unavailable in shader stage '%0' (requires 'raygeneration')">; +def err_hlsl_hitobject_unsupported_stage : Error< + "dx::HitObject is unavailable in shader stage '%0' (requires 'raygeneration', 'closesthit' or 'miss')">; // HLSL Change Ends // SPIRV Change Starts diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index 1b6c346acd..dcd3e89e9a 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -23,6 +23,7 @@ #include "clang/AST/ExternalASTSource.h" #include "clang/AST/HlslBuiltinTypeDeclBuilder.h" #include "clang/AST/TypeLoc.h" +#include "clang/Basic/Specifiers.h" #include "clang/Sema/Overload.h" #include "clang/Sema/Sema.h" #include "clang/Sema/SemaDiagnostic.h" @@ -1070,7 +1071,7 @@ static void CreateConstructorDeclaration( static void CreateObjectFunctionDeclaration( ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType, ArrayRef args, DeclarationName declarationName, bool isConst, - CXXMethodDecl **functionDecl, TypeSourceInfo **tinfo) { + StorageClass SC, CXXMethodDecl **functionDecl, TypeSourceInfo **tinfo) { DXASSERT_NOMSG(recordDecl != nullptr); DXASSERT_NOMSG(functionDecl != nullptr); @@ -1082,8 +1083,8 @@ static void CreateObjectFunctionDeclaration( *tinfo = context.getTrivialTypeSourceInfo(functionQT, NoLoc); DXASSERT_NOMSG(*tinfo != nullptr); *functionDecl = CXXMethodDecl::Create( - context, recordDecl, NoLoc, declNameInfo, functionQT, *tinfo, - StorageClass::SC_None, InlineSpecifiedFalse, IsConstexprFalse, NoLoc); + context, recordDecl, NoLoc, declNameInfo, functionQT, *tinfo, SC, + InlineSpecifiedFalse, IsConstexprFalse, NoLoc); DXASSERT_NOMSG(*functionDecl != nullptr); (*functionDecl)->setLexicalDeclContext(recordDecl); (*functionDecl)->setAccess(AccessSpecifier::AS_public); @@ -1092,7 +1093,8 @@ static void CreateObjectFunctionDeclaration( CXXMethodDecl *hlsl::CreateObjectFunctionDeclarationWithParams( ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType, ArrayRef paramTypes, ArrayRef paramNames, - DeclarationName declarationName, bool isConst, bool isTemplateFunction) { + DeclarationName declarationName, bool isConst, StorageClass SC, + bool isTemplateFunction) { DXASSERT_NOMSG(recordDecl != nullptr); DXASSERT_NOMSG(!resultType.isNull()); DXASSERT_NOMSG(paramTypes.size() == paramNames.size()); @@ -1100,7 +1102,7 @@ CXXMethodDecl *hlsl::CreateObjectFunctionDeclarationWithParams( TypeSourceInfo *tinfo; CXXMethodDecl *functionDecl; CreateObjectFunctionDeclaration(context, recordDecl, resultType, paramTypes, - declarationName, isConst, &functionDecl, + declarationName, isConst, SC, &functionDecl, &tinfo); // Create and associate parameters to method. @@ -1215,6 +1217,46 @@ CXXRecordDecl *hlsl::DeclareRayQueryType(ASTContext &context) { return typeDeclBuilder.getRecordDecl(); } +CXXRecordDecl *hlsl::DeclareHitObjectType(NamespaceDecl &NSDecl) { + ASTContext &Context = NSDecl.getASTContext(); + // HitObject { ... } + BuiltinTypeDeclBuilder TypeDeclBuilder(&NSDecl, "HitObject"); + TypeDeclBuilder.startDefinition(); + + // Add handle to mark as HLSL object. + TypeDeclBuilder.addField("h", GetHLSLObjectHandleType(Context)); + CXXRecordDecl *RecordDecl = TypeDeclBuilder.getRecordDecl(); + + CanQualType canQualType = Context.getCanonicalType( + Context.getRecordType(TypeDeclBuilder.getRecordDecl())); + + // Add constructor that will be lowered to MOP_HitObject_MakeNop. + CXXConstructorDecl *pConstructorDecl = nullptr; + TypeSourceInfo *pTypeSourceInfo = nullptr; + CreateConstructorDeclaration( + Context, RecordDecl, Context.VoidTy, {}, + Context.DeclarationNames.getCXXConstructorName(canQualType), false, + &pConstructorDecl, &pTypeSourceInfo); + RecordDecl->addDecl(pConstructorDecl); + pConstructorDecl->addAttr(HLSLIntrinsicAttr::CreateImplicit( + Context, "op", "", + static_cast(hlsl::IntrinsicOp::MOP_DxHitObject_MakeNop))); + pConstructorDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(Context)); + + // Add AvailabilityAttribute for SM6.9+ + VersionTuple VT69 = VersionTuple(6, 9); + RecordDecl->addAttr(ConstructAvailabilityAttribute(Context, VT69)); + + // Add the implicit HLSLHitObjectAttr attribute to unambiguously recognize the + // builtin HitObject type. + RecordDecl->addAttr(HLSLHitObjectAttr::CreateImplicit(Context)); + RecordDecl->setImplicit(true); + + // Add to namespace + RecordDecl->setDeclContext(&NSDecl); + return RecordDecl; +} + CXXRecordDecl *hlsl::DeclareResourceType(ASTContext &context, bool bSampler) { // struct ResourceDescriptor { uint8 desc; } StringRef Name = bSampler ? ".Sampler" : ".Resource"; diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index 630e969881..8f9460ce63 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -507,6 +507,10 @@ bool IsHLSLResourceType(clang::QualType type) { return false; } +bool IsHLSLHitObjectType(QualType type) { + return nullptr != getAttr(type); +} + DXIL::NodeIOKind GetNodeIOType(clang::QualType type) { if (const HLSLNodeObjectAttr *Attr = getAttr(type)) return Attr->getNodeIOType(); diff --git a/tools/clang/lib/CodeGen/CGHLSLMS.cpp b/tools/clang/lib/CodeGen/CGHLSLMS.cpp index 29ed954425..b041db95a7 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp +++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp @@ -2500,9 +2500,11 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { // Type annotation for this pointer. if (const CXXMethodDecl *MFD = dyn_cast(FD)) { - const CXXRecordDecl *RD = MFD->getParent(); - QualType Ty = CGM.getContext().getTypeDeclType(RD); - AddTypeAnnotation(Ty, dxilTypeSys, arrayEltSize); + if (!MFD->isStatic()) { + const CXXRecordDecl *RD = MFD->getParent(); + QualType Ty = CGM.getContext().getTypeDeclType(RD); + AddTypeAnnotation(Ty, dxilTypeSys, arrayEltSize); + } } for (const ValueDecl *param : FD->params()) { diff --git a/tools/clang/lib/CodeGen/CodeGenTypes.cpp b/tools/clang/lib/CodeGen/CodeGenTypes.cpp index d11575d359..82328c8fb5 100644 --- a/tools/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/tools/clang/lib/CodeGen/CodeGenTypes.cpp @@ -14,21 +14,23 @@ #include "CodeGenTypes.h" #include "CGCXXABI.h" #include "CGCall.h" +#include "CGHLSLRuntime.h" // HLSL Change #include "CGOpenCLRuntime.h" #include "CGRecordLayout.h" +#include "CodeGenModule.h" // HLSL Change #include "TargetInfo.h" +#include "dxc/DXIL/DxilUtil.h" // HLSL Change #include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" -#include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclTemplate.h" // HLSL Change - clang-format #include "clang/AST/Expr.h" +#include "clang/AST/HlslTypes.h" // HLSL Change #include "clang/AST/RecordLayout.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" -#include "CodeGenModule.h" // HLSL Change -#include "CGHLSLRuntime.h" // HLSL Change using namespace clang; using namespace CodeGen; @@ -365,7 +367,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { .getConstantArrayType(eltTy, llvm::APInt(32, count), ArrayType::ArraySizeModifier::Normal, 0) .getTypePtr(); - } + } else if (hlsl::IsHLSLHitObjectType(T)) // HLSL Change + return hlsl::dxilutil::GetHLSLHitObjectType(&TheModule); else return ConvertRecordDeclType(RT->getDecl()); } diff --git a/tools/clang/lib/Sema/SemaExpr.cpp b/tools/clang/lib/Sema/SemaExpr.cpp index c8c762a0a1..507b6a7508 100644 --- a/tools/clang/lib/Sema/SemaExpr.cpp +++ b/tools/clang/lib/Sema/SemaExpr.cpp @@ -2787,13 +2787,18 @@ bool Sema::UseArgumentDependentLookup(const CXXScopeSpec &SS, // Never if a scope specifier was provided. if (SS.isSet()) { // HLSL Change begins - // We want to be able to have intrinsics inside the "vk" namespace. + // We want to be able to have intrinsics inside the "vk" and "dx" + // namespaces. const bool isVkNamespace = SS.getScopeRep() && SS.getScopeRep()->getAsNamespace() && SS.getScopeRep()->getAsNamespace()->getName() == "vk"; - if (!isVkNamespace) - // HLSL Change ends + const bool isDxNamespace = + SS.getScopeRep() && SS.getScopeRep()->getAsNamespace() && + SS.getScopeRep()->getAsNamespace()->getName() == "dx"; + + if (!isVkNamespace && !isDxNamespace) + // HLSL Change ends return false; } diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 66cbea12ce..40010b1596 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -14,6 +14,7 @@ #include "VkConstantsTables.h" #include "dxc/DXIL/DxilFunctionProps.h" #include "dxc/DXIL/DxilShaderModel.h" +#include "dxc/DXIL/DxilUtil.h" #include "dxc/HLSL/HLOperations.h" #include "dxc/HlslIntrinsicOp.h" #include "dxc/Support/Global.h" @@ -31,6 +32,8 @@ #include "clang/AST/HlslTypes.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/Diagnostic.h" +#include "clang/Basic/Specifiers.h" +#include "clang/Parse/ParseDiagnostic.h" #include "clang/Sema/ExternalSemaSource.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" @@ -40,6 +43,7 @@ #include "clang/Sema/TemplateDeduction.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include @@ -243,6 +247,9 @@ enum ArBasicKind { AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS, + // Shader Execution Reordering + AR_OBJECT_HIT_OBJECT, + AR_BASIC_MAXIMUM_COUNT }; @@ -593,6 +600,9 @@ const UINT g_uBasicKindProps[] = { BPROP_OBJECT | BPROP_RWBUFFER, // AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, BPROP_OBJECT | BPROP_RWBUFFER, // AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS, + // Shader Execution Reordering + LICOMPTYPE_HIT_OBJECT, // AR_OBJECT_HIT_OBJECT, + // AR_BASIC_MAXIMUM_COUNT }; @@ -1218,6 +1228,10 @@ static const ArBasicKind g_AnyOutputRecordCT[] = { AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS, AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, AR_BASIC_UNKNOWN}; +// Shader Execution Reordering +static const ArBasicKind g_DxHitObjectCT[] = {AR_OBJECT_HIT_OBJECT, + AR_BASIC_UNKNOWN}; + // Basic kinds, indexed by a LEGAL_INTRINSIC_COMPTYPES value. const ArBasicKind *g_LegalIntrinsicCompTypes[] = { g_NullCT, // LICOMPTYPE_VOID @@ -1272,6 +1286,7 @@ const ArBasicKind *g_LegalIntrinsicCompTypes[] = { g_AnyOutputRecordCT, // LICOMPTYPE_ANY_NODE_OUTPUT_RECORD g_GroupNodeOutputRecordsCT, // LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS + g_DxHitObjectCT, // LICOMPTYPE_HIT_OBJECT }; static_assert( ARRAYSIZE(g_LegalIntrinsicCompTypes) == LICOMPTYPE_COUNT, @@ -1360,7 +1375,10 @@ static const ArBasicKind g_ArBasicKindsAsTypes[] = { AR_OBJECT_NODE_OUTPUT, AR_OBJECT_EMPTY_NODE_OUTPUT, AR_OBJECT_NODE_OUTPUT_ARRAY, AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY, - AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS}; + AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS, + + // Shader Execution Reordering + AR_OBJECT_HIT_OBJECT}; // Count of template arguments for basic kind of objects that look like // templates (one or more type arguments). @@ -1476,6 +1494,9 @@ static const uint8_t g_ArBasicKindsTemplateCount[] = { 1, // AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, 1, // AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS + + // Shader Execution Reordering + 0, // AR_OBJECT_HIT_OBJECT, }; C_ASSERT(_countof(g_ArBasicKindsAsTypes) == @@ -1622,76 +1643,176 @@ static const SubscriptOperatorRecord g_ArBasicKindsSubscripts[] = { {1, MipsFalse, SampleFalse}, // AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS {1, MipsFalse, SampleFalse}, // AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS + + // Shader Execution Reordering + {0, MipsFalse, SampleFalse}, // AR_OBJECT_HIT_OBJECT, }; C_ASSERT(_countof(g_ArBasicKindsAsTypes) == _countof(g_ArBasicKindsSubscripts)); // Type names for ArBasicKind values. static const char *g_ArBasicTypeNames[] = { - "bool", "float", "half", "half", "float", "double", "int", "sbyte", "byte", - "short", "ushort", "int", "uint", "long", "ulong", "min10float", - "min16float", "min12int", "min16int", "min16uint", "int8_t4_packed", - "uint8_t4_packed", "enum", - - "", "", "", "", "", "", + "bool", + "float", + "half", + "half", + "float", + "double", + "int", + "sbyte", + "byte", + "short", + "ushort", + "int", + "uint", + "long", + "ulong", + "min10float", + "min16float", + "min12int", + "min16int", + "min16uint", + "int8_t4_packed", + "uint8_t4_packed", + "enum", + + "", + "", + "", + "", + "", + "", "enum class", - "null", "literal string", "string", + "null", + "literal string", + "string", // "texture", - "Texture1D", "Texture1DArray", "Texture2D", "Texture2DArray", "Texture3D", - "TextureCube", "TextureCubeArray", "Texture2DMS", "Texture2DMSArray", - "SamplerState", "sampler1D", "sampler2D", "sampler3D", "samplerCUBE", - "SamplerComparisonState", "Buffer", "RenderTargetView", "DepthStencilView", - "ComputeShader", "DomainShader", "GeometryShader", "HullShader", - "PixelShader", "VertexShader", "pixelfragment", "vertexfragment", - "StateBlock", "Rasterizer", "DepthStencil", "Blend", "PointStream", - "LineStream", "TriangleStream", "InputPatch", "OutputPatch", "RWTexture1D", - "RWTexture1DArray", "RWTexture2D", "RWTexture2DArray", "RWTexture3D", - "RWBuffer", "ByteAddressBuffer", "RWByteAddressBuffer", "StructuredBuffer", - "RWStructuredBuffer", "RWStructuredBuffer(Incrementable)", - "RWStructuredBuffer(Decrementable)", "AppendStructuredBuffer", + "Texture1D", + "Texture1DArray", + "Texture2D", + "Texture2DArray", + "Texture3D", + "TextureCube", + "TextureCubeArray", + "Texture2DMS", + "Texture2DMSArray", + "SamplerState", + "sampler1D", + "sampler2D", + "sampler3D", + "samplerCUBE", + "SamplerComparisonState", + "Buffer", + "RenderTargetView", + "DepthStencilView", + "ComputeShader", + "DomainShader", + "GeometryShader", + "HullShader", + "PixelShader", + "VertexShader", + "pixelfragment", + "vertexfragment", + "StateBlock", + "Rasterizer", + "DepthStencil", + "Blend", + "PointStream", + "LineStream", + "TriangleStream", + "InputPatch", + "OutputPatch", + "RWTexture1D", + "RWTexture1DArray", + "RWTexture2D", + "RWTexture2DArray", + "RWTexture3D", + "RWBuffer", + "ByteAddressBuffer", + "RWByteAddressBuffer", + "StructuredBuffer", + "RWStructuredBuffer", + "RWStructuredBuffer(Incrementable)", + "RWStructuredBuffer(Decrementable)", + "AppendStructuredBuffer", "ConsumeStructuredBuffer", - "ConstantBuffer", "TextureBuffer", + "ConstantBuffer", + "TextureBuffer", - "RasterizerOrderedBuffer", "RasterizerOrderedByteAddressBuffer", - "RasterizerOrderedStructuredBuffer", "RasterizerOrderedTexture1D", - "RasterizerOrderedTexture1DArray", "RasterizerOrderedTexture2D", - "RasterizerOrderedTexture2DArray", "RasterizerOrderedTexture3D", + "RasterizerOrderedBuffer", + "RasterizerOrderedByteAddressBuffer", + "RasterizerOrderedStructuredBuffer", + "RasterizerOrderedTexture1D", + "RasterizerOrderedTexture1DArray", + "RasterizerOrderedTexture2D", + "RasterizerOrderedTexture2DArray", + "RasterizerOrderedTexture3D", - "FeedbackTexture2D", "FeedbackTexture2DArray", + "FeedbackTexture2D", + "FeedbackTexture2DArray", // SPIRV change starts #ifdef ENABLE_SPIRV_CODEGEN - "SubpassInput", "SubpassInputMS", "SpirvType", "SpirvOpaqueType", - "integral_constant", "Literal", "ext_type", "ext_result_id", + "SubpassInput", + "SubpassInputMS", + "SpirvType", + "SpirvOpaqueType", + "integral_constant", + "Literal", + "ext_type", + "ext_result_id", #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends "", - "deprecated effect object", "wave_t", "RayDesc", - "RaytracingAccelerationStructure", "user defined type", + "deprecated effect object", + "wave_t", + "RayDesc", + "RaytracingAccelerationStructure", + "user defined type", "BuiltInTriangleIntersectionAttributes", // subobjects - "StateObjectConfig", "GlobalRootSignature", "LocalRootSignature", - "SubobjectToExportsAssociation", "RaytracingShaderConfig", - "RaytracingPipelineConfig", "TriangleHitGroup", - "ProceduralPrimitiveHitGroup", "RaytracingPipelineConfig1", - - "RayQuery", "HEAP_Resource", "HEAP_Sampler", - - "RWTexture2DMS", "RWTexture2DMSArray", + "StateObjectConfig", + "GlobalRootSignature", + "LocalRootSignature", + "SubobjectToExportsAssociation", + "RaytracingShaderConfig", + "RaytracingPipelineConfig", + "TriangleHitGroup", + "ProceduralPrimitiveHitGroup", + "RaytracingPipelineConfig1", + + "RayQuery", + "HEAP_Resource", + "HEAP_Sampler", + + "RWTexture2DMS", + "RWTexture2DMSArray", // Workgraphs - "EmptyNodeInput", "DispatchNodeInputRecord", "RWDispatchNodeInputRecord", - "GroupNodeInputRecords", "RWGroupNodeInputRecords", "ThreadNodeInputRecord", + "EmptyNodeInput", + "DispatchNodeInputRecord", + "RWDispatchNodeInputRecord", + "GroupNodeInputRecords", + "RWGroupNodeInputRecords", + "ThreadNodeInputRecord", "RWThreadNodeInputRecord", - "NodeOutput", "EmptyNodeOutput", "NodeOutputArray", "EmptyNodeOutputArray", + "NodeOutput", + "EmptyNodeOutput", + "NodeOutputArray", + "EmptyNodeOutputArray", - "ThreadNodeOutputRecords", "GroupNodeOutputRecords"}; + "ThreadNodeOutputRecords", + "GroupNodeOutputRecords", + + // Shader Execution Reordering + "HitObject", +}; C_ASSERT(_countof(g_ArBasicTypeNames) == AR_BASIC_MAXIMUM_COUNT); @@ -1731,6 +1852,10 @@ static const char *g_DeprecatedEffectObjectNames[] = { "RenderTargetView", // 16 }; +static bool IsStaticMember(const HLSL_INTRINSIC *fn) { + return fn->Flags & INTRIN_FLAG_STATIC_MEMBER; +} + static bool IsVariadicIntrinsicFunction(const HLSL_INTRINSIC *fn) { return fn->pArgs[fn->uNumArgs - 1].uTemplateId == INTRIN_TEMPLATE_VARARGS; } @@ -1816,15 +1941,13 @@ static void AddHLSLIntrinsicAttr(FunctionDecl *FD, ASTContext &context, FD->addAttr(PureAttr::CreateImplicit(context)); if (pIntrinsic->Flags & INTRIN_FLAG_IS_WAVE) FD->addAttr(HLSLWaveSensitiveAttr::CreateImplicit(context)); - // TBD: Add availability attribute if MinShaderModel is set. - // if (pIntrinsic->MinShaderModel) { - // unsigned Major = pIntrinsic->MinShaderModel >> 4; - // unsigned Minor = pIntrinsic->MinShaderModel & 0xF; - // FD->addAttr(AvailabilityAttr::CreateImplicit( - // context, &context.Idents.get(""), clang::VersionTuple(Major, Minor), - // clang::VersionTuple(), clang::VersionTuple(), false, - // "HLSL Intrinsic availability limited by shader model.")); - //} + if (pIntrinsic->MinShaderModel) { + unsigned Major = pIntrinsic->MinShaderModel >> 4; + unsigned Minor = pIntrinsic->MinShaderModel & 0xF; + FD->addAttr(AvailabilityAttr::CreateImplicit( + context, &context.Idents.get(""), clang::VersionTuple(Major, Minor), + clang::VersionTuple(), clang::VersionTuple(), false, "")); + } } static FunctionDecl * @@ -1870,12 +1993,14 @@ AddHLSLIntrinsicFunction(ASTContext &context, NamespaceDecl *NS, const QualType fnReturnType = functionArgQualTypes[0]; std::vector fnArgTypes(functionArgQualTypes.begin() + 1, functionArgQualTypes.end()); + + StorageClass SC = IsStaticMember(pIntrinsic) ? SC_Static : SC_Extern; QualType functionType = context.getFunctionType(fnReturnType, fnArgTypes, protoInfo, paramMods); FunctionDecl *functionDecl = FunctionDecl::Create( context, currentDeclContext, NoLoc, - DeclarationNameInfo(functionName, NoLoc), functionType, nullptr, - StorageClass::SC_Extern, InlineSpecifiedFalse, HasWrittenPrototypeTrue); + DeclarationNameInfo(functionName, NoLoc), functionType, nullptr, SC, + InlineSpecifiedFalse, HasWrittenPrototypeTrue); currentDeclContext->addDecl(functionDecl); functionDecl->setLexicalDeclContext(currentDeclContext); @@ -2284,6 +2409,10 @@ static void GetIntrinsicMethods(ArBasicKind kind, *intrinsics = g_RayQueryMethods; *intrinsicCount = _countof(g_RayQueryMethods); break; + case AR_OBJECT_HIT_OBJECT: + *intrinsics = g_DxHitObjectMethods; + *intrinsicCount = _countof(g_DxHitObjectMethods); + break; case AR_OBJECT_RWTEXTURE2DMS: *intrinsics = g_RWTexture2DMSMethods; *intrinsicCount = _countof(g_RWTexture2DMSMethods); @@ -2846,6 +2975,9 @@ class HLSLExternalSource : public ExternalSemaSource { // Namespace decl for Vulkan-specific intrinsic functions NamespaceDecl *m_vkNSDecl; + // Namespace decl for dx intrinsic functions + NamespaceDecl *m_dxNSDecl; + // Context being processed. ASTContext *m_context; @@ -3063,10 +3195,13 @@ class HLSLExternalSource : public ExternalSemaSource { IdentifierInfo *ii = &m_context->Idents.get(StringRef(intrinsic->pArgs[0].pName)); DeclarationName declarationName = DeclarationName(ii); + + StorageClass SC = IsStaticMember(intrinsic) ? SC_Static : SC_None; + CXXMethodDecl *functionDecl = CreateObjectFunctionDeclarationWithParams( *m_context, recordDecl, functionResultQT, ArrayRef(argsQTs, numParams), - ArrayRef(argNames, numParams), declarationName, true, + ArrayRef(argNames, numParams), declarationName, true, SC, templateParamNamedDeclsCount > 0); functionDecl->setImplicit(true); @@ -3268,7 +3403,7 @@ class HLSLExternalSource : public ExternalSemaSource { *m_context, recordDecl, resultType, ArrayRef(indexType), ArrayRef(StringRef("index")), m_context->DeclarationNames.getCXXOperatorName(OO_Subscript), true, - true); + StorageClass::SC_None, true); hlsl::CreateFunctionTemplateDecl( *m_context, recordDecl, functionDecl, reinterpret_cast(&templateTypeParmDecl), 1); @@ -3312,9 +3447,8 @@ class HLSLExternalSource : public ExternalSemaSource { return -1; } -#ifdef ENABLE_SPIRV_CODEGEN - SmallVector CreateTemplateTypeParmDeclsForVkIntrinsicFunction( - const HLSL_INTRINSIC *intrinsic) { + SmallVector CreateTemplateTypeParmDeclsForIntrinsicFunction( + const HLSL_INTRINSIC *intrinsic, NamespaceDecl *nsDecl) { SmallVector templateTypeParmDecls; auto &context = m_sema->getASTContext(); const HLSL_INTRINSIC_ARGUMENT *pArgs = intrinsic->pArgs; @@ -3325,9 +3459,8 @@ class HLSLExternalSource : public ExternalSemaSource { pArgs[i].uLegalTemplates == LITEMPLATE_ANY) { IdentifierInfo *id = &context.Idents.get("T"); TemplateTypeParmDecl *templateTypeParmDecl = - TemplateTypeParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc, 0, - 0, id, TypenameTrue, - ParameterPackFalse); + TemplateTypeParmDecl::Create(context, nsDecl, NoLoc, NoLoc, 0, 0, + id, TypenameTrue, ParameterPackFalse); if (TInfo == nullptr) { TInfo = m_sema->getASTContext().CreateTypeSourceInfo( m_context->UnsignedIntTy, 0); @@ -3341,7 +3474,7 @@ class HLSLExternalSource : public ExternalSemaSource { } SmallVector - CreateParmDeclsForVkIntrinsicFunction( + CreateParmDeclsForIntrinsicFunction( const HLSL_INTRINSIC *intrinsic, const SmallVectorImpl ¶mTypes, const SmallVectorImpl ¶mMods) { @@ -3366,7 +3499,7 @@ class HLSLExternalSource : public ExternalSemaSource { return paramDecls; } - SmallVector VkIntrinsicFunctionParamTypes( + SmallVector getIntrinsicFunctionParamTypes( const HLSL_INTRINSIC *intrinsic, const SmallVectorImpl &templateTypeParmDecls) { auto &context = m_sema->getASTContext(); @@ -3401,8 +3534,11 @@ class HLSLExternalSource : public ExternalSemaSource { case LICOMPTYPE_VOID: paramTypes.push_back(context.VoidTy); break; + case LICOMPTYPE_HIT_OBJECT: + paramTypes.push_back(GetBasicKindType(AR_OBJECT_HIT_OBJECT)); + break; default: - DXASSERT(false, "Argument type of vk:: intrinsic function is not " + DXASSERT(false, "Argument type of intrinsic function is not " "supported"); break; } @@ -3410,9 +3546,9 @@ class HLSLExternalSource : public ExternalSemaSource { return paramTypes; } - QualType - VkIntrinsicFunctionType(const SmallVectorImpl ¶mTypes, - const SmallVectorImpl ¶mMods) { + QualType getIntrinsicFunctionType( + const SmallVectorImpl ¶mTypes, + const SmallVectorImpl ¶mMods) { DXASSERT(!paramTypes.empty(), "Given param type vector is empty"); ArrayRef params({}); @@ -3425,7 +3561,7 @@ class HLSLExternalSource : public ExternalSemaSource { EmptyEPI, paramMods); } - void SetParmDeclsForVkIntrinsicFunction( + void SetParmDeclsForIntrinsicFunction( TypeSourceInfo *TInfo, FunctionDecl *functionDecl, const SmallVectorImpl ¶mDecls) { FunctionProtoTypeLoc Proto = @@ -3440,47 +3576,39 @@ class HLSLExternalSource : public ExternalSemaSource { functionDecl->setParams(paramDecls); } - // Adds intrinsic function declarations to the "vk" namespace. - // It does so only if SPIR-V code generation is being done. - // Assumes the implicit "vk" namespace has already been created. - void AddVkIntrinsicFunctions() { - // If not doing SPIR-V CodeGen, return. - if (!m_sema->getLangOpts().SPIRV) - return; - - DXASSERT(m_vkNSDecl, "caller has not created the vk namespace yet"); - + void AddIntrinsicFunctionsToNamespace(const HLSL_INTRINSIC *table, + uint32_t tableSize, + NamespaceDecl *nsDecl) { auto &context = m_sema->getASTContext(); - for (uint32_t i = 0; i < _countof(g_VkIntrinsics); ++i) { - const HLSL_INTRINSIC *intrinsic = &g_VkIntrinsics[i]; + for (uint32_t i = 0; i < tableSize; ++i) { + const HLSL_INTRINSIC *intrinsic = &table[i]; const IdentifierInfo &fnII = context.Idents.get( intrinsic->pArgs->pName, tok::TokenKind::identifier); DeclarationName functionName(&fnII); // Create TemplateTypeParmDecl. SmallVector templateTypeParmDecls = - CreateTemplateTypeParmDeclsForVkIntrinsicFunction(intrinsic); + CreateTemplateTypeParmDeclsForIntrinsicFunction(intrinsic, nsDecl); // Get types for parameters. SmallVector paramTypes = - VkIntrinsicFunctionParamTypes(intrinsic, templateTypeParmDecls); + getIntrinsicFunctionParamTypes(intrinsic, templateTypeParmDecls); SmallVector paramMods; InitParamMods(intrinsic, paramMods); // Create FunctionDecl. - QualType fnType = VkIntrinsicFunctionType(paramTypes, paramMods); + StorageClass SC = IsStaticMember(intrinsic) ? SC_Static : SC_Extern; + QualType fnType = getIntrinsicFunctionType(paramTypes, paramMods); TypeSourceInfo *TInfo = m_sema->getASTContext().CreateTypeSourceInfo(fnType, 0); FunctionDecl *functionDecl = FunctionDecl::Create( - context, m_vkNSDecl, NoLoc, DeclarationNameInfo(functionName, NoLoc), - fnType, TInfo, StorageClass::SC_Extern, InlineSpecifiedFalse, - HasWrittenPrototypeTrue); + context, nsDecl, NoLoc, DeclarationNameInfo(functionName, NoLoc), + fnType, TInfo, SC, InlineSpecifiedFalse, HasWrittenPrototypeTrue); // Create and set ParmVarDecl. SmallVector paramDecls = - CreateParmDeclsForVkIntrinsicFunction(intrinsic, paramTypes, - paramMods); - SetParmDeclsForVkIntrinsicFunction(TInfo, functionDecl, paramDecls); + CreateParmDeclsForIntrinsicFunction(intrinsic, paramTypes, paramMods); + SetParmDeclsForIntrinsicFunction(TInfo, functionDecl, paramDecls); if (!templateTypeParmDecls.empty()) { TemplateParameterList *templateParmList = TemplateParameterList::Create( @@ -3488,22 +3616,52 @@ class HLSLExternalSource : public ExternalSemaSource { templateTypeParmDecls.size(), NoLoc); functionDecl->setTemplateParameterListsInfo(context, 1, &templateParmList); - FunctionTemplateDecl *functionTemplate = FunctionTemplateDecl::Create( - context, m_vkNSDecl, NoLoc, functionName, templateParmList, - functionDecl); + FunctionTemplateDecl *functionTemplate = + FunctionTemplateDecl::Create(context, nsDecl, NoLoc, functionName, + templateParmList, functionDecl); functionDecl->setDescribedFunctionTemplate(functionTemplate); - m_vkNSDecl->addDecl(functionTemplate); - functionTemplate->setDeclContext(m_vkNSDecl); + nsDecl->addDecl(functionTemplate); + functionTemplate->setDeclContext(nsDecl); } else { - m_vkNSDecl->addDecl(functionDecl); - functionDecl->setLexicalDeclContext(m_vkNSDecl); - functionDecl->setDeclContext(m_vkNSDecl); + nsDecl->addDecl(functionDecl); + functionDecl->setLexicalDeclContext(nsDecl); + functionDecl->setDeclContext(nsDecl); } functionDecl->setImplicit(true); } } + // Adds intrinsic function declarations to the "dx" namespace. + // Assumes the implicit "vk" namespace has already been created. + void AddDxIntrinsicFunctions() { + DXASSERT(m_dxNSDecl, "caller has not created the dx namespace yet"); + + AddIntrinsicFunctionsToNamespace(g_DxIntrinsics, _countof(g_DxIntrinsics), + m_dxNSDecl); + // Eagerly declare HitObject methods. This is required to make lookup of + // 'static' HLSL member functions work without special-casing HLSL scope + // lookup. + CXXRecordDecl *HitObjectDecl = + GetBasicKindType(AR_OBJECT_HIT_OBJECT)->getAsCXXRecordDecl(); + CompleteType(HitObjectDecl); + } + +#ifdef ENABLE_SPIRV_CODEGEN + // Adds intrinsic function declarations to the "vk" namespace. + // It does so only if SPIR-V code generation is being done. + // Assumes the implicit "vk" namespace has already been created. + void AddVkIntrinsicFunctions() { + // If not doing SPIR-V CodeGen, return. + if (!m_sema->getLangOpts().SPIRV) + return; + + DXASSERT(m_vkNSDecl, "caller has not created the vk namespace yet"); + + AddIntrinsicFunctionsToNamespace(g_VkIntrinsics, _countof(g_VkIntrinsics), + m_vkNSDecl); + } + // Adds implicitly defined Vulkan-specific constants to the "vk" namespace. // It does so only if SPIR-V code generation is being done. // Assumes the implicit "vk" namespace has already been created. @@ -3619,6 +3777,10 @@ class HLSLExternalSource : public ExternalSemaSource { recordDecl = DeclareConstantBufferViewType(*m_context, Attr); } else if (kind == AR_OBJECT_RAY_QUERY) { recordDecl = DeclareRayQueryType(*m_context); + } else if (kind == AR_OBJECT_HIT_OBJECT) { + // Declare 'HitObject' in '::dx' extension namespace. + DXASSERT(m_dxNSDecl, "namespace ::dx must be declared in SM6.9+"); + recordDecl = DeclareHitObjectType(*m_dxNSDecl); } else if (kind == AR_OBJECT_HEAP_RESOURCE) { recordDecl = DeclareResourceType(*m_context, /*bSampler*/ false); if (SM->IsSM66Plus()) { @@ -3866,8 +4028,8 @@ class HLSLExternalSource : public ExternalSemaSource { : m_matrixTemplateDecl(nullptr), m_vectorTemplateDecl(nullptr), m_vkIntegralConstantTemplateDecl(nullptr), m_vkLiteralTemplateDecl(nullptr), m_hlslNSDecl(nullptr), - m_vkNSDecl(nullptr), m_context(nullptr), m_sema(nullptr), - m_hlslStringTypedef(nullptr) { + m_vkNSDecl(nullptr), m_dxNSDecl(nullptr), m_context(nullptr), + m_sema(nullptr), m_hlslStringTypedef(nullptr) { memset(m_matrixTypes, 0, sizeof(m_matrixTypes)); memset(m_matrixShorthandTypes, 0, sizeof(m_matrixShorthandTypes)); memset(m_vectorTypes, 0, sizeof(m_vectorTypes)); @@ -3896,6 +4058,14 @@ class HLSLExternalSource : public ExternalSemaSource { m_sema = &S; S.addExternalSource(this); + m_dxNSDecl = + NamespaceDecl::Create(context, context.getTranslationUnitDecl(), + /*Inline*/ false, SourceLocation(), + SourceLocation(), &context.Idents.get("dx"), + /*PrevDecl*/ nullptr); + m_dxNSDecl->setImplicit(); + context.getTranslationUnitDecl()->addDecl(m_dxNSDecl); + #ifdef ENABLE_SPIRV_CODEGEN if (m_sema->getLangOpts().SPIRV) { // Create the "vk" namespace which contains Vulkan-specific intrinsics. @@ -3914,6 +4084,8 @@ class HLSLExternalSource : public ExternalSemaSource { AddIntrinsicTableMethods(intrinsic); } + AddDxIntrinsicFunctions(); + #ifdef ENABLE_SPIRV_CODEGEN if (m_sema->getLangOpts().SPIRV) { // Add Vulkan-specific intrinsics. @@ -4596,6 +4768,7 @@ class HLSLExternalSource : public ExternalSemaSource { case AR_OBJECT_WAVE: case AR_OBJECT_ACCELERATION_STRUCT: case AR_OBJECT_RAY_DESC: + case AR_OBJECT_HIT_OBJECT: case AR_OBJECT_TRIANGLE_INTERSECTION_ATTRIBUTES: case AR_OBJECT_RWTEXTURE2DMS: case AR_OBJECT_RWTEXTURE2DMS_ARRAY: @@ -4919,12 +5092,18 @@ class HLSLExternalSource : public ExternalSemaSource { ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace && ULE->getQualifier()->getAsNamespace()->getName() == "vk"; + const bool isDxNamespace = + ULE->getQualifier() && + ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace && + ULE->getQualifier()->getAsNamespace()->getName() == "dx"; + // Intrinsics live in the global namespace, so references to their names // should be either unqualified or '::'-prefixed. - // Exception: Vulkan-specific intrinsics live in the 'vk::' namespace. - if (isQualified && !isGlobalNamespace && !isVkNamespace) { + // Exceptions: + // - Vulkan-specific intrinsics live in the 'vk::' namespace. + // - DirectX-specific intrinsics live in the 'dx::' namespace. + if (isQualified && !isGlobalNamespace && !isVkNamespace && !isDxNamespace) return false; - } const DeclarationNameInfo declName = ULE->getNameInfo(); IdentifierInfo *idInfo = declName.getName().getAsIdentifierInfo(); @@ -4935,6 +5114,10 @@ class HLSLExternalSource : public ExternalSemaSource { StringRef nameIdentifier = idInfo->getName(); const HLSL_INTRINSIC *table = g_Intrinsics; auto tableCount = _countof(g_Intrinsics); + if (isDxNamespace) { + table = g_DxIntrinsics; + tableCount = _countof(g_DxIntrinsics); + } #ifdef ENABLE_SPIRV_CODEGEN if (isVkNamespace) { table = g_VkIntrinsics; @@ -4971,11 +5154,16 @@ class HLSLExternalSource : public ExternalSemaSource { m_usedIntrinsics.insert(UsedIntrinsic(pIntrinsic, functionArgTypes)); bool insertedNewValue = insertResult.second; if (insertedNewValue) { + NamespaceDecl *nsDecl = m_hlslNSDecl; + if (isVkNamespace) + nsDecl = m_vkNSDecl; + else if (isDxNamespace) + nsDecl = m_dxNSDecl; DXASSERT(tableName, "otherwise IDxcIntrinsicTable::GetTableName() failed"); - intrinsicFuncDecl = AddHLSLIntrinsicFunction( - *m_context, isVkNamespace ? m_vkNSDecl : m_hlslNSDecl, tableName, - lowering, pIntrinsic, &functionArgTypes); + intrinsicFuncDecl = + AddHLSLIntrinsicFunction(*m_context, nsDecl, tableName, lowering, + pIntrinsic, &functionArgTypes); insertResult.first->setFunctionDecl(intrinsicFuncDecl); } else { intrinsicFuncDecl = (*insertResult.first).getFunctionDecl(); @@ -5742,11 +5930,12 @@ class HLSLExternalSource : public ExternalSemaSource { Params.push_back(paramDecl); } + StorageClass SC = IsStaticMember(intrinsic) ? SC_Static : SC_Extern; QualType T = TInfo->getType(); DeclarationNameInfo NameInfo(FunctionTemplate->getDeclName(), NoLoc); CXXMethodDecl *method = CXXMethodDecl::Create( *m_context, dyn_cast(owner), NoLoc, NameInfo, T, TInfo, - SC_Extern, InlineSpecifiedFalse, IsConstexprFalse, NoLoc); + SC, InlineSpecifiedFalse, IsConstexprFalse, NoLoc); // Add intrinsic attr AddHLSLIntrinsicAttr(method, *m_context, tableName, lowering, intrinsic); @@ -8007,7 +8196,8 @@ void HLSLExternalSource::InitializeInitSequenceForHLSL( DXASSERT_NOMSG(initSequence != nullptr); // In HLSL there are no default initializers, eg float4x4 m(); - // Except for RayQuery constructor (also handle InitializationKind::IK_Value) + // Except for RayQuery and HitObject constructors (also handle + // InitializationKind::IK_Value) if (Kind.getKind() == InitializationKind::IK_Default || Kind.getKind() == InitializationKind::IK_Value) { QualType destBaseType = m_context->getBaseElementType(Entity.getType()); @@ -8018,7 +8208,9 @@ void HLSLExternalSource::InitializeInitSequenceForHLSL( GetRecordDeclForBuiltInOrStruct(typeRecordDecl)); DXASSERT(index != -1, "otherwise can't find type we already determined was an object"); - if (g_ArBasicKindsAsTypes[index] == AR_OBJECT_RAY_QUERY) { + + if (g_ArBasicKindsAsTypes[index] == AR_OBJECT_RAY_QUERY || + g_ArBasicKindsAsTypes[index] == AR_OBJECT_HIT_OBJECT) { CXXConstructorDecl *Constructor = *typeRecordDecl->ctor_begin(); initSequence->AddConstructorInitializationStep( Constructor, AccessSpecifier::AS_public, destBaseType, false, false, @@ -11650,6 +11842,35 @@ static bool isStringLiteral(QualType type) { return eType->isSpecificBuiltinType(BuiltinType::Char_S); } +static void DiagnoseReachableSERCall(Sema &S, CallExpr *CE, + DXIL::ShaderKind EntrySK, + const FunctionDecl *EntryDecl, + bool IsReorderOperation) { + bool ValidEntry = false; + switch (EntrySK) { + default: + break; + case DXIL::ShaderKind::ClosestHit: + case DXIL::ShaderKind::Miss: + ValidEntry = !IsReorderOperation; + break; + case DXIL::ShaderKind::RayGeneration: + ValidEntry = true; + break; + } + + if (ValidEntry) + return; + + int DiagID = IsReorderOperation ? diag::err_hlsl_reorder_unsupported_stage + : diag::err_hlsl_hitobject_unsupported_stage; + + SourceLocation EntryLoc = EntryDecl->getLocation(); + SourceLocation Loc = CE->getExprLoc(); + S.Diag(Loc, DiagID) << ShaderModel::FullNameFromKind(EntrySK); + S.Diag(EntryLoc, diag::note_hlsl_entry_defined_here); +} + // Check HLSL member call constraints for used functions. // locallyVisited is true if this call has been visited already from any other // entry function. Used to avoid duplicate diagnostics when not dependent on @@ -11690,6 +11911,12 @@ void Sema::DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM, case hlsl::IntrinsicOp::MOP_TraceRayInline: DiagnoseTraceRayInline(*this, CE); break; + case hlsl::IntrinsicOp::MOP_DxHitObject_MakeNop: + DiagnoseReachableSERCall(*this, CE, EntrySK, EntryDecl, false); + break; + case hlsl::IntrinsicOp::IOP_DxMaybeReorderThread: + DiagnoseReachableSERCall(*this, CE, EntrySK, EntryDecl, true); + break; default: break; } diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index 827798a852..ed727af149 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -9,6 +9,7 @@ // // /////////////////////////////////////////////////////////////////////////////// +#include "dxc/DXIL/DxilFunctionProps.h" #include "dxc/DXIL/DxilShaderModel.h" #include "dxc/HLSL/HLOperations.h" #include "dxc/HlslIntrinsicOp.h" @@ -16,12 +17,16 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/HlslTypes.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/TypeLoc.h" #include "clang/Sema/SemaDiagnostic.h" #include "clang/Sema/SemaHLSL.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include using namespace clang; @@ -334,17 +339,19 @@ ValidateNoRecursion(CallGraphWithRecurseGuard &callGraph, return nullptr; } -class HLSLCallDiagnoseVisitor // Could rename to HLSLReachableDiagnoseVisitor - : public RecursiveASTVisitor { +class HLSLReachableDiagnoseVisitor + : public RecursiveASTVisitor { public: - explicit HLSLCallDiagnoseVisitor( + explicit HLSLReachableDiagnoseVisitor( Sema *S, const hlsl::ShaderModel *SM, DXIL::ShaderKind EntrySK, DXIL::NodeLaunchType NodeLaunchTy, const FunctionDecl *EntryDecl, llvm::SmallPtrSetImpl &DiagnosedCalls, - llvm::SmallPtrSetImpl &DeclAvailabilityChecked) + llvm::SmallPtrSetImpl &DeclAvailabilityChecked, + llvm::SmallSet &DiagnosedTypeLocs) : sema(S), SM(SM), EntrySK(EntrySK), NodeLaunchTy(NodeLaunchTy), EntryDecl(EntryDecl), DiagnosedCalls(DiagnosedCalls), - DeclAvailabilityChecked(DeclAvailabilityChecked) {} + DeclAvailabilityChecked(DeclAvailabilityChecked), + DiagnosedTypeLocs(DiagnosedTypeLocs) {} bool VisitCallExpr(CallExpr *CE) { // Set flag if already diagnosed from another entry, allowing some @@ -401,16 +408,41 @@ class HLSLCallDiagnoseVisitor // Could rename to HLSLReachableDiagnoseVisitor return true; } + bool VisitTypeLoc(TypeLoc TL) { + // Diagnose availability for used type. + if (AvailabilityAttr *AAttr = GetAvailabilityAttrOnce(TL)) { + UnqualTypeLoc UTL = TL.getUnqualifiedLoc(); + DiagnoseAvailability(AAttr, TL.getType(), UTL.getLocStart()); + } + + return true; + } + bool VisitDeclRefExpr(DeclRefExpr *DRE) { // Diagnose availability for referenced decl. if (AvailabilityAttr *AAttr = GetAvailabilityAttrOnce(DRE)) { - NamedDecl *ND = DRE->getDecl(); - DiagnoseAvailability(AAttr, ND, DRE->getExprLoc()); + DiagnoseAvailability(AAttr, DRE->getDecl(), DRE->getExprLoc()); } return true; } + AvailabilityAttr *GetAvailabilityAttrOnce(TypeLoc TL) { + QualType Ty = TL.getType(); + CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); + if (!RD) + return nullptr; + AvailabilityAttr *AAttr = RD->getAttr(); + if (!AAttr) + return nullptr; + // Skip redundant availability diagnostics for the same Type. + // Use the end location to avoid diagnosing the same type multiple times. + if (!DiagnosedTypeLocs.insert(TL.getEndLoc()).second) + return nullptr; + + return AAttr; + } + AvailabilityAttr *GetAvailabilityAttrOnce(DeclRefExpr *DRE) { AvailabilityAttr *AAttr = DRE->getDecl()->getAttr(); if (!AAttr) @@ -422,21 +454,36 @@ class HLSLCallDiagnoseVisitor // Could rename to HLSLReachableDiagnoseVisitor return AAttr; } - void DiagnoseAvailability(AvailabilityAttr *AAttr, NamedDecl *ND, + bool CheckSMVersion(VersionTuple AAttrVT) { + VersionTuple SMVT = VersionTuple(SM->GetMajor(), SM->GetMinor()); + return SMVT >= AAttrVT; + } + + void DiagnoseAvailability(AvailabilityAttr *AAttr, QualType Ty, SourceLocation Loc) { VersionTuple AAttrVT = AAttr->getIntroduced(); - VersionTuple SMVT = VersionTuple(SM->GetMajor(), SM->GetMinor()); + if (CheckSMVersion(AAttrVT)) + return; - // if the current shader model is lower than what - // is stated in the availability attribute, emit - // the availability warning. + sema->Diag(Loc, diag::warn_hlsl_builtin_type_unavailable) + << Ty << SM->GetName() << AAttrVT.getAsString(); + } - if (SMVT < AAttrVT) { - // TBD: Determine best way to distinguish between builtin constant decls - // and other decls. - sema->Diag(Loc, diag::warn_hlsl_builtin_constant_unavailable) - << ND << SM->GetName() << AAttrVT.getAsString(); + void DiagnoseAvailability(AvailabilityAttr *AAttr, NamedDecl *ND, + SourceLocation Loc) { + VersionTuple AAttrVT = AAttr->getIntroduced(); + if (CheckSMVersion(AAttrVT)) + return; + + if (isa(ND)) { + sema->Diag(Loc, diag::warn_hlsl_intrinsic_in_wrong_shader_model) + << ND->getQualifiedNameAsString() << EntryDecl + << AAttrVT.getAsString(); + return; } + + sema->Diag(Loc, diag::warn_hlsl_builtin_constant_unavailable) + << ND << SM->GetName() << AAttrVT.getAsString(); } clang::Sema *getSema() { return sema; } @@ -449,6 +496,7 @@ class HLSLCallDiagnoseVisitor // Could rename to HLSLReachableDiagnoseVisitor const FunctionDecl *EntryDecl; llvm::SmallPtrSetImpl &DiagnosedCalls; llvm::SmallPtrSetImpl &DeclAvailabilityChecked; + llvm::SmallSet &DiagnosedTypeLocs; }; std::optional @@ -550,6 +598,8 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { std::set DiagnosedRecursiveDecls; llvm::SmallPtrSet DiagnosedCalls; llvm::SmallPtrSet DeclAvailabilityChecked; + llvm::SmallSet DiagnosedTypeLocs; + // for each FDecl, check for recursion for (FunctionDecl *FDecl : FDeclsToCheck) { CallGraphWithRecurseGuard callGraph; @@ -671,11 +721,12 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { NodeLaunchTy = DXIL::NodeLaunchType::Broadcasting; } } + // Visit all visited functions in call graph to collect illegal intrinsic // calls. - HLSLCallDiagnoseVisitor Visitor(self, shaderModel, EntrySK, NodeLaunchTy, - FDecl, DiagnosedCalls, - DeclAvailabilityChecked); + HLSLReachableDiagnoseVisitor Visitor( + self, shaderModel, EntrySK, NodeLaunchTy, FDecl, DiagnosedCalls, + DeclAvailabilityChecked, DiagnosedTypeLocs); // Visit globals with initializers when processing entry point. for (VarDecl *VD : InitGlobals) Visitor.TraverseDecl(VD); diff --git a/tools/clang/lib/Sema/SemaOverload.cpp b/tools/clang/lib/Sema/SemaOverload.cpp index 650fe38adc..636eaf0213 100644 --- a/tools/clang/lib/Sema/SemaOverload.cpp +++ b/tools/clang/lib/Sema/SemaOverload.cpp @@ -10936,7 +10936,13 @@ bool Sema::buildOverloadedCallSet(Scope *S, Expr *Fn, ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace && ULE->getQualifier()->getAsNamespace()->getName() == "vk"; - assert((!ULE->getQualifier() || isVkNamespace) && "non-vk qualified name with ADL"); + bool isDxNamespace = + ULE->getQualifier() && + ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace && + ULE->getQualifier()->getAsNamespace()->getName() == "dx"; + + assert((!ULE->getQualifier() || isVkNamespace || isDxNamespace) && + "expected vk or dx qualified name with ADL"); // HLSL Change Ends // We don't perform ADL for implicit declarations of builtins. diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll new file mode 100644 index 0000000000..01dafe5e86 --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll @@ -0,0 +1,101 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; REQUIRES: dxil-1-9 + +; CHECK-NOT: @dx.op.hitObject_ +; CHECK-NOT: @dx.op.maybeReorderThread + +; +; Buffer Definitions: +; +; cbuffer $Globals +; { +; +; [0 x i8] (type annotation not present) +; +; } +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; $Globals cbuffer NA NA CB0 cb4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%ConstantBuffer = type opaque +%dx.types.HitObject = type { i8* } +%"class.dx::HitObject" = type { i32 } + +@"$Globals" = external constant %ConstantBuffer + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %hit = alloca %dx.types.HitObject, align 4 + %tmp = alloca %dx.types.HitObject, align 4 + %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !19 ; line:9 col:3 + call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !19 ; line:9 col:3 + %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !23 ; line:9 col:17 + %2 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !24 ; line:10 col:3 + call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !24 ; line:10 col:3 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %tmp), !dbg !24 ; line:10 col:3 + %3 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !24 ; line:10 col:3 + call void @llvm.lifetime.end(i64 4, i8* %3) #0, !dbg !24 ; line:10 col:3 + %4 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !25 ; line:11 col:1 + call void @llvm.lifetime.end(i64 4, i8* %4) #0, !dbg !25 ; line:11 col:1 + ret void, !dbg !25 ; line:11 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !8} +!dx.entryPoints = !{!12} +!dx.fnprops = !{!16} +!dx.options = !{!17, !18} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4840 (ser_patch_1 9ffd030b1)"} +!3 = !{i32 1, i32 9} +!4 = !{!"lib", i32 6, i32 9} +!5 = !{i32 0, %"class.dx::HitObject" undef, !6} +!6 = !{i32 4, !7} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!8 = !{i32 1, void ()* @"\01?main@@YAXXZ", !9} +!9 = !{!10} +!10 = !{i32 1, !11, !11} +!11 = !{} +!12 = !{null, !"", null, !13, null} +!13 = !{null, null, !14, null} +!14 = !{!15} +!15 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!16 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!17 = !{i32 -2147483584} +!18 = !{i32 -1} +!19 = !DILocation(line: 9, column: 3, scope: !20) +!20 = !DISubprogram(name: "main", scope: !21, file: !21, line: 8, type: !22, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!21 = !DIFile(filename: "tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl", directory: "") +!22 = !DISubroutineType(types: !11) +!23 = !DILocation(line: 9, column: 17, scope: !20) +!24 = !DILocation(line: 10, column: 3, scope: !20) +!25 = !DILocation(line: 11, column: 1, scope: !20) diff --git a/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll new file mode 100644 index 0000000000..f5130bca3f --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll @@ -0,0 +1,106 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; REQUIRES: dxil-1-9 + +; CHECK-NOT: @dx.op.hitObject_ +; CHECK-NOT: @dx.op.maybeReorderThread + +; +; Buffer Definitions: +; +; cbuffer $Globals +; { +; +; [0 x i8] (type annotation not present) +; +; } +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; $Globals cbuffer NA NA CB0 cb4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%ConstantBuffer = type opaque +%dx.types.HitObject = type { i8* } +%"class.dx::HitObject" = type { i32 } + +@"$Globals" = external constant %ConstantBuffer + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %hit = alloca %dx.types.HitObject, align 4 + %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !19 ; line:9 col:3 + call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !19 ; line:9 col:3 + %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !23 ; line:9 col:17 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %hit), !dbg !24 ; line:10 col:3 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32)"(i32 359, %dx.types.HitObject* %hit, i32 241, i32 3), !dbg !25 ; line:11 col:3 + call void @"dx.hl.op..void (i32, i32, i32)"(i32 359, i32 242, i32 7), !dbg !26 ; line:12 col:3 + %2 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !27 ; line:13 col:1 + call void @llvm.lifetime.end(i64 4, i8* %2) #0, !dbg !27 ; line:13 col:1 + ret void, !dbg !27 ; line:13 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32)"(i32, %dx.types.HitObject*, i32, i32) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, i32, i32)"(i32, i32, i32) #0 + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !8} +!dx.entryPoints = !{!12} +!dx.fnprops = !{!16} +!dx.options = !{!17, !18} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4840 ser_patch_1 9ffd030b1)"} +!3 = !{i32 1, i32 9} +!4 = !{!"lib", i32 6, i32 9} +!5 = !{i32 0, %"class.dx::HitObject" undef, !6} +!6 = !{i32 4, !7} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!8 = !{i32 1, void ()* @"\01?main@@YAXXZ", !9} +!9 = !{!10} +!10 = !{i32 1, !11, !11} +!11 = !{} +!12 = !{null, !"", null, !13, null} +!13 = !{null, null, !14, null} +!14 = !{!15} +!15 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!16 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!17 = !{i32 -2147483584} +!18 = !{i32 -1} +!19 = !DILocation(line: 9, column: 3, scope: !20) +!20 = !DISubprogram(name: "main", scope: !21, file: !21, line: 8, type: !22, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!21 = !DIFile(filename: "tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl", directory: "") +!22 = !DISubroutineType(types: !11) +!23 = !DILocation(line: 9, column: 17, scope: !20) +!24 = !DILocation(line: 10, column: 3, scope: !20) +!25 = !DILocation(line: 11, column: 3, scope: !20) +!26 = !DILocation(line: 12, column: 3, scope: !20) +!27 = !DILocation(line: 13, column: 1, scope: !20) diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl new file mode 100644 index 0000000000..4e09b770ec --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl @@ -0,0 +1,12 @@ +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s +// REQUIRES: dxil-1-9 + +// TODO: Implement lowering for dx::HitObject::MakeNop + +// CHECK-NOT: call + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + dx::HitObject::MakeNop(); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl new file mode 100644 index 0000000000..fd2fbc5974 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl @@ -0,0 +1,24 @@ +// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s + +// CHECK: | |-CXXRecordDecl {{[^ ]+}} <> implicit referenced class HitObject definition +// CHECK-NEXT: | | |-FinalAttr {{[^ ]+}} <> Implicit final +// CHECK-NEXT: | | |-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// CHECK-NEXT: | | |-HLSLHitObjectAttr {{[^ ]+}} <> Implicit +// CHECK-NEXT: | | |-FieldDecl {{[^ ]+}} <> implicit h 'int' +// CHECK-NEXT: | | |-CXXConstructorDecl {{[^ ]+}} <> used HitObject 'void ()' +// CHECK-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 358 +// CHECK-NEXT: | | | `-HLSLCXXOverloadAttr {{[^ ]+}} <> Implicit + +// CHECK: | | |-FunctionTemplateDecl {{[^ ]+}} <> MakeNop +// CHECK-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// CHECK-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit MakeNop 'TResult () const' static +// CHECK-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used MakeNop 'dx::HitObject ()' static +// CHECK-NEXT: | | | |-TemplateArgument type 'dx::HitObject' +// CHECK-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 358 +// CHECK-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + dx::HitObject::MakeNop(); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl new file mode 100644 index 0000000000..8824cffaec --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl @@ -0,0 +1,13 @@ +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s + +// TODO: Implement lowering for dx::MaybeReorderThread + +// CHECK-NOT: call + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + dx::MaybeReorderThread(hit); + dx::MaybeReorderThread(hit, 0xf1, 3); + dx::MaybeReorderThread(0xf2, 7); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl new file mode 100644 index 0000000000..d570ef021f --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s + +// CHECK: |-FunctionDecl {{[^ ]+}} <> implicit used MaybeReorderThread 'void (dx::HitObject)' extern +// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <> HitObject 'dx::HitObject':'dx::HitObject' +// CHECK-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 359 +// CHECK-NEXT: | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// CHECK: |-FunctionDecl {{[^ ]+}} <> implicit used MaybeReorderThread 'void (dx::HitObject, unsigned int, unsigned int)' extern +// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <> HitObject 'dx::HitObject':'dx::HitObject' +// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <> CoherenceHint 'unsigned int' +// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <> NumCoherenceHintBitsFromLSB 'unsigned int' +// CHECK-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 359 +// CHECK-NEXT: | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// CHECK: `-FunctionDecl {{[^ ]+}} <> implicit used MaybeReorderThread 'void (unsigned int, unsigned int)' extern +// CHECK-NEXT: |-ParmVarDecl {{[^ ]+}} <> CoherenceHint 'unsigned int' +// CHECK-NEXT: |-ParmVarDecl {{[^ ]+}} <> NumCoherenceHintBitsFromLSB 'unsigned int' +// CHECK-NEXT: |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 359 +// CHECK-NEXT: `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + dx::MaybeReorderThread(hit); + dx::MaybeReorderThread(hit, 0xf1, 3); + dx::MaybeReorderThread(0xf2, 7); +} \ No newline at end of file diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/hitobject_reorder.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/hitobject_reorder.hlsl new file mode 100644 index 0000000000..fa3ab68506 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/hitobject_reorder.hlsl @@ -0,0 +1,10 @@ +// RUN: %dxc -T lib_6_9 -E main %s -verify + +// expected-no-diagnostics + +[shader("raygeneration")] void main() { + dx::HitObject hit; + dx::MaybeReorderThread(hit); + dx::MaybeReorderThread(hit, 0xf1, 3); + dx::MaybeReorderThread(0xf2, 7); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-entry-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-entry-errors.hlsl new file mode 100644 index 0000000000..3c97ea0a77 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-entry-errors.hlsl @@ -0,0 +1,62 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +struct [raypayload] Payload +{ + float elem + : write(caller,closesthit,anyhit,miss) + : read(caller,closesthit,anyhit,miss); +}; + +struct Attribs { float2 barys; }; +void CallReorder() +{ +// expected-error@+6{{dx::MaybeReorderThread is unavailable in shader stage 'compute' (requires 'raygeneration')}} +// expected-error@+5{{dx::MaybeReorderThread is unavailable in shader stage 'callable' (requires 'raygeneration')}} +// expected-error@+4{{dx::MaybeReorderThread is unavailable in shader stage 'intersection' (requires 'raygeneration')}} +// expected-error@+3{{dx::MaybeReorderThread is unavailable in shader stage 'anyhit' (requires 'raygeneration')}} +// expected-error@+2{{dx::MaybeReorderThread is unavailable in shader stage 'closesthit' (requires 'raygeneration')}} +// expected-error@+1{{dx::MaybeReorderThread is unavailable in shader stage 'miss' (requires 'raygeneration')}} + dx::MaybeReorderThread(0,0); +} + +// expected-note@+3{{entry function defined here}} +[shader("compute")] +[numthreads(4,4,4)] +void mainReorderCS(uint ix : SV_GroupIndex, uint3 id : SV_GroupThreadID) { + CallReorder(); +} + +[shader("raygeneration")] +void mainReorderRG() { + CallReorder(); +} + +// expected-note@+2{{entry function defined here}} +[shader("callable")] +void mainReorderCALL(inout Attribs attrs) { + CallReorder(); +} + +// expected-note@+2{{entry function defined here}} +[shader("intersection")] +void mainReorderIS() { + CallReorder(); +} + +// expected-note@+2{{entry function defined here}} +[shader("anyhit")] +void mainReorderAH(inout Payload pld, in Attribs attrs) { + CallReorder(); +} + +// expected-note@+2{{entry function defined here}} +[shader("closesthit")] +void mainReorderCH(inout Payload pld, in Attribs attrs) { + CallReorder(); +} + +// expected-note@+2{{entry function defined here}} +[shader("miss")] +void mainReorderMS(inout Payload pld) { + CallReorder(); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-unavailable-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-unavailable-pre-sm69.hlsl new file mode 100644 index 0000000000..db2d0fd2e3 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-unavailable-pre-sm69.hlsl @@ -0,0 +1,9 @@ +// RUN: %dxc -T lib_6_8 %s -verify + +// Check that inciwMaybeReorderThread is unavailable pre SM 6.9. + +[shader("raygeneration")] +void main() { + // expected-error@+1{{intrinsic dx::MaybeReorderThread potentially used by ''main'' requires shader model 6.9 or greater}} + dx::MaybeReorderThread(15u, 4u); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/namespace/dx-namespace-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/namespace/dx-namespace-pre-sm69.hlsl new file mode 100644 index 0000000000..e23f398538 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/namespace/dx-namespace-pre-sm69.hlsl @@ -0,0 +1,8 @@ +// RUN: %dxc -T lib_6_8 %s -verify + +// expected-no-diagnostics +using namespace dx; + +[shader("raygeneration")] +void main() { +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-entry-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-entry-errors.hlsl new file mode 100644 index 0000000000..44afcf47e7 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-entry-errors.hlsl @@ -0,0 +1,58 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +struct [raypayload] Payload +{ + float elem + : write(caller,anyhit,closesthit,miss) + : read(caller,anyhit,closesthit,miss); +}; + +struct Attribs { float2 barys; }; + +dx::HitObject UseHitObject() { + return dx::HitObject::MakeNop(); +} + +// expected-note@+3{{entry function defined here}} +[shader("compute")] +[numthreads(4,4,4)] +void mainHitCS(uint ix : SV_GroupIndex, uint3 id : SV_GroupThreadID) { +// expected-error@-7{{dx::HitObject is unavailable in shader stage 'compute' (requires 'raygeneration', 'closesthit' or 'miss')}} + UseHitObject(); +} + +// expected-note@+2{{entry function defined here}} +[shader("callable")] +void mainHitCALL(inout Attribs attrs) { +// expected-error@-14{{dx::HitObject is unavailable in shader stage 'callable' (requires 'raygeneration', 'closesthit' or 'miss')}} + UseHitObject(); +} + +// expected-note@+2{{entry function defined here}} +[shader("intersection")] +void mainHitIS() { +// expected-error@-21{{dx::HitObject is unavailable in shader stage 'intersection' (requires 'raygeneration', 'closesthit' or 'miss')}} + UseHitObject(); +} + +// expected-note@+2{{entry function defined here}} +[shader("anyhit")] +void mainHitAH(inout Payload pld, in Attribs attrs) { +// expected-error@-28{{dx::HitObject is unavailable in shader stage 'anyhit' (requires 'raygeneration', 'closesthit' or 'miss')}} + UseHitObject(); +} + +[shader("raygeneration")] +void mainHitRG() { + UseHitObject(); +} + +[shader("closesthit")] +void mainHitCH(inout Payload pld, in Attribs attrs) { + UseHitObject(); +} + +[shader("miss")] +void mainHitMS(inout Payload pld) { + UseHitObject(); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl new file mode 100644 index 0000000000..baa3a07a5b --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl @@ -0,0 +1,4 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +// expected-error@+1{{'dx::HitObject' is an object and cannot be used as a type parameter}} +RWStructuredBuffer InvalidBuffer; diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unavailable-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unavailable-pre-sm69.hlsl new file mode 100644 index 0000000000..59c8dfbe2f --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unavailable-pre-sm69.hlsl @@ -0,0 +1,11 @@ +// RUN: %dxc -T lib_6_8 %s -verify + +// Check that the HitObject is unavailable pre SM 6.9. + +[shader("raygeneration")] +void main() { + // expected-error@+3{{intrinsic dx::HitObject::MakeNop potentially used by ''main'' requires shader model 6.9 or greater}} + // expected-error@+2{{potential misuse of built-in type 'dx::HitObject' in shader model lib_6_8; introduced in shader model 6.9}} + // expected-error@+1{{potential misuse of built-in type 'dx::HitObject' in shader model lib_6_8; introduced in shader model 6.9}} + dx::HitObject hit = dx::HitObject::MakeNop(); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unsupported-vs.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unsupported-vs.hlsl new file mode 100644 index 0000000000..4b6c45806b --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unsupported-vs.hlsl @@ -0,0 +1,8 @@ +// RUN: %dxc -T vs_6_9 %s -verify + +// expected-note@+1{{entry function defined here}} +float main(RayDesc rayDesc: RAYDESC) : OUT { +// expected-error@+1{{dx::HitObject is unavailable in shader stage 'vertex' (requires 'raygeneration', 'closesthit' or 'miss')}} + dx::HitObject::MakeNop(); + return 0.f; +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-using-namespace.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-using-namespace.hlsl new file mode 100644 index 0000000000..c266d81ddb --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-using-namespace.hlsl @@ -0,0 +1,36 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +// This test checks that HitObject can be used with 'using namespace dx' instead of explicit namespace prefix +// expected-no-diagnostics + +struct [raypayload] Payload +{ + float elem + : write(caller,anyhit,closesthit,miss) + : read(caller,anyhit,closesthit,miss); +}; + +struct Attribs { float2 barys; }; + +using namespace dx; + +[shader("raygeneration")] +void main() +{ + HitObject hit; + MaybeReorderThread(hit); +} + +[shader("closesthit")] +void closestHit(inout Payload pld, in Attribs attrs) +{ + // Create a HitObject + HitObject hit; +} + +[shader("miss")] +void missShader(inout Payload pld) +{ + // Also test using a static method + HitObject hit = HitObject::MakeNop(); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-without-namespace.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-without-namespace.hlsl new file mode 100644 index 0000000000..cb7a24e1c7 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-without-namespace.hlsl @@ -0,0 +1,39 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +struct [raypayload] Payload +{ + float elem + : write(caller,anyhit,closesthit,miss) + : read(caller,anyhit,closesthit,miss); +}; + +struct Attribs { float2 barys; }; + +[shader("raygeneration")] +void main() +{ + // expected-error@+1{{unknown type name 'HitObject'}} + HitObject hit; +} + +[shader("closesthit")] +void closestHit(inout Payload pld, in Attribs attrs) +{ + // expected-error@+1{{unknown type name 'HitObject'}} + HitObject hit; +} + +[shader("miss")] +void missShader(inout Payload pld) +{ + // expected-error@+1{{unknown type name 'HitObject'}} + HitObject hit; +} + +// Also test API methods +[shader("raygeneration")] +void main2() +{ + // expected-error@+1{{use of undeclared identifier 'HitObject'}} + HitObject::MakeNop(); +} \ No newline at end of file diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/maybereorderthread-without-namespace.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/maybereorderthread-without-namespace.hlsl new file mode 100644 index 0000000000..edf7e4fa71 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/maybereorderthread-without-namespace.hlsl @@ -0,0 +1,31 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +struct [raypayload] Payload +{ + float elem + : write(caller,anyhit,closesthit,miss) + : read(caller,anyhit,closesthit,miss); +}; + +struct Attribs { float2 barys; }; + +[shader("raygeneration")] +void main() +{ + // expected-error@+1{{use of undeclared identifier 'MaybeReorderThread'}} + MaybeReorderThread(1); +} + +[shader("closesthit")] +void closestHit(inout Payload pld, in Attribs attrs) +{ + // expected-error@+1{{use of undeclared identifier 'MaybeReorderThread'}} + MaybeReorderThread(2); +} + +[shader("miss")] +void missShader(inout Payload pld) +{ + // expected-error@+1{{use of undeclared identifier 'MaybeReorderThread'}} + MaybeReorderThread(3); +} diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 51ea6b3176..0ca5b0716b 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -1089,6 +1089,17 @@ uint [[ro]] CommittedInstanceContributionToHitGroupIndex(); } namespace +// Shader Execution Reordering +namespace DxHitObjectMethods { + DxHitObject [[static,class_prefix,min_sm=6.9]] MakeNop(); +} namespace + +namespace DxIntrinsics { +void [[min_sm=6.9]] MaybeReorderThread(in DxHitObject HitObject); +void [[min_sm=6.9]] MaybeReorderThread(in uint CoherenceHint, in uint NumCoherenceHintBitsFromLSB); +void [[min_sm=6.9]] MaybeReorderThread(in DxHitObject HitObject, in uint CoherenceHint, in uint NumCoherenceHintBitsFromLSB); +} namespace + // Work Graphs objects and methods // EmptyNodeInput diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 6f4611db32..0a9ab062a3 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -8209,6 +8209,8 @@ def __init__( overload_idx, hidden, min_shader_model, + static_member, + class_prefix, ): self.name = name # Function name self.idx = idx # Unique number within namespace @@ -8217,14 +8219,27 @@ def __init__( self.ns = ns # Function namespace self.ns_idx = ns_idx # Namespace index self.doc = doc # Documentation - id_prefix = "IOP" if ns == "Intrinsics" else "MOP" + id_prefix = "IOP" if ns.endswith("Intrinsics") else "MOP" + + class_name = None + if ns.endswith("Methods"): + class_name = ns[0 : -len("Methods")] + # SPIR-V Change Starts if ns == "VkIntrinsics": name = "Vk" + name self.name = "Vk" + self.name id_prefix = "IOP" # SPIR-V Change Ends - self.enum_name = "%s_%s" % (id_prefix, name) # enum name + if ns.startswith("Dx"): + if not class_prefix: + name = "Dx" + name + self.name = name + + if class_prefix: + self.enum_name = "%s_%s_%s" % (id_prefix, class_name, name) + else: + self.enum_name = "%s_%s" % (id_prefix, name) self.readonly = ro # Only read memory self.readnone = rn # Not read memory self.argmemonly = amo # Only accesses memory through argument pointers @@ -8242,6 +8257,7 @@ def __init__( self.min_shader_model = (min_shader_model[0] << 4) | ( min_shader_model[1] & 0x0F ) + self.static_member = static_member # HLSL static member function self.key = ( ("%3d" % ns_idx) + "!" @@ -8355,6 +8371,7 @@ def __init__(self, intrinsic_defs, opcode_data): "AnyNodeOutputRecord": "LICOMPTYPE_ANY_NODE_OUTPUT_RECORD", "GroupNodeOutputRecords": "LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS", "ThreadNodeOutputRecords": "LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS", + "DxHitObject": "LICOMPTYPE_HIT_OBJECT", } self.trans_rowcol = {"r": "IA_R", "c": "IA_C", "r2": "IA_R2", "c2": "IA_C2"} @@ -8414,7 +8431,7 @@ def load_intrinsics(self, intrinsic_defs): r"""( sampler\w* | string | (?:RW)?(?:Texture\w*|ByteAddressBuffer) | - acceleration_struct | ray_desc | + acceleration_struct | ray_desc | RayQuery | DxHitObject | Node\w* | RWNode\w* | EmptyNode\w* | AnyNodeOutput\w* | NodeOutputRecord\w* | GroupShared\w* $)""", @@ -8620,7 +8637,9 @@ def process_attr(attr): readonly = False # Only read memory readnone = False # Not read memory argmemonly = False # Only reads memory through pointer arguments + static_member = False # Static member function is_wave = False + class_prefix = False # Insert class name as enum_prefix # Is wave-sensitive unsigned_op = "" # Unsigned opcode if exist overload_param_index = ( @@ -8646,6 +8665,12 @@ def process_attr(attr): if a == "hidden": hidden = True continue + if a == "static": + static_member = True + continue + if a == "class_prefix": + class_prefix = True + continue assign = a.split("=") @@ -8689,6 +8714,8 @@ def process_attr(attr): overload_param_index, hidden, min_shader_model, + static_member, + class_prefix, ) current_namespace = None @@ -8737,6 +8764,8 @@ def process_attr(attr): overload_param_index, hidden, min_shader_model, + static_member, + class_prefix, ) = process_attr(attr) # Add an entry for this intrinsic. if bracket_cleanup_re.search(opts): @@ -8753,6 +8782,8 @@ def process_attr(attr): for in_arg in in_args: args.append(process_arg(in_arg, arg_idx, args, name)) arg_idx += 1 + if class_prefix: + assert current_namespace.endswith("Methods") # We have to process the return type description last # to match the compiler's handling of it and allow # the return type to match an input type. @@ -8776,6 +8807,8 @@ def process_attr(attr): overload_param_index, hidden, min_shader_model, + static_member, + class_prefix, ) ) num_entries += 1 diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py index 2a0359d274..4580e6c12c 100644 --- a/utils/hct/hctdb_instrhelp.py +++ b/utils/hct/hctdb_instrhelp.py @@ -620,6 +620,7 @@ def print_opfunc_table(self): "noderecordhandle": "A(pNodeRecordHandle);", "nodeproperty": "A(nodeProperty);", "noderecordproperty": "A(nodeRecordProperty);", + "hit_object": "A(pHit);", } last_category = None for i in self.instrs: @@ -985,15 +986,11 @@ def get_hlsl_intrinsics(): last_ns = "" ns_table = "" is_vk_table = False # SPIRV Change - id_prefix = "" arg_idx = 0 opcode_namespace = db.opcode_namespace for i in sorted(db.intrinsics, key=lambda x: x.key): if last_ns != i.ns: last_ns = i.ns - id_prefix = ( - "IOP" if last_ns == "Intrinsics" or last_ns == "VkIntrinsics" else "MOP" - ) # SPIRV Change if len(ns_table): result += ns_table + "};\n" # SPIRV Change Starts @@ -1017,14 +1014,15 @@ def get_hlsl_intrinsics(): flags.append("INTRIN_FLAG_READ_NONE") if i.wave: flags.append("INTRIN_FLAG_IS_WAVE") + if i.static_member: + flags.append("INTRIN_FLAG_STATIC_MEMBER") if flags: flags = " | ".join(flags) else: flags = "0" - ns_table += " {(UINT)%s::%s_%s, %s, 0x%x, %d, %d, g_%s_Args%s},\n" % ( + ns_table += " {(UINT)%s::%s, %s, 0x%x, %d, %d, g_%s_Args%s},\n" % ( opcode_namespace, - id_prefix, - i.name, + i.enum_name, flags, i.min_shader_model, i.overload_param_index, diff --git a/utils/hct/hlsl_intrinsic_opcodes.json b/utils/hct/hlsl_intrinsic_opcodes.json index 48a0b74c17..4c85069488 100644 --- a/utils/hct/hlsl_intrinsic_opcodes.json +++ b/utils/hct/hlsl_intrinsic_opcodes.json @@ -1,6 +1,6 @@ { "IntrinsicOpCodes": { - "Num_Intrinsics": 358, + "Num_Intrinsics": 360, "IOP_AcceptHitAndEndSearch": 0, "IOP_AddUint64": 1, "IOP_AllMemoryBarrier": 2, @@ -358,6 +358,8 @@ "IOP_umul": 354, "IOP_usign": 355, "MOP_InterlockedUMax": 356, - "MOP_InterlockedUMin": 357 + "MOP_InterlockedUMin": 357, + "MOP_DxHitObject_MakeNop": 358, + "IOP_DxMaybeReorderThread": 359 } } From 7269298ed01919ad7cb0592f51cdf896a5e3ee4a Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Tue, 25 Mar 2025 17:28:21 +0100 Subject: [PATCH 47/88] [SER] HitObject_MakeNop|Miss DXIL opcodes and verification tests (#7201) - DXIL opcodes for HitObject_MakeNop and HitObject_MakeMiss - DXV validation test --- include/dxc/DXIL/DxilConstants.h | 12 ++- include/dxc/DXIL/DxilInstructions.h | 74 +++++++++++++++++++ include/dxc/DXIL/DxilOperations.h | 2 + lib/DXIL/DxilOperations.cpp | 64 ++++++++++++---- .../ser_hitobject_make_passing.ll | 46 ++++++++++++ utils/hct/hctdb.py | 53 ++++++++++++- 6 files changed, 229 insertions(+), 22 deletions(-) create mode 100644 tools/clang/test/DXILValidation/ser_hitobject_make_passing.ll diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 54131f3948..0a9c6a4ffd 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -520,9 +520,7 @@ enum class OpCode : unsigned { ReservedB27 = 289, // reserved ReservedB28 = 290, // reserved ReservedB29 = 291, // reserved - ReservedB3 = 265, // reserved ReservedB30 = 292, // reserved - ReservedB4 = 266, // reserved ReservedB5 = 267, // reserved ReservedB6 = 268, // reserved ReservedB7 = 269, // reserved @@ -909,6 +907,10 @@ enum class OpCode : unsigned { WriteSamplerFeedbackLevel = 176, // updates a feedback texture for a sampling // operation with a mipmap-level offset + // Shader Execution Reordering + HitObject_MakeMiss = 265, // Creates a new HitObject representing a miss + HitObject_MakeNop = 266, // Creates an empty nop HitObject + // Synchronization AtomicBinOp = 78, // performs an atomic operation on two operands AtomicCompareExchange = 79, // atomic compare and exchange to memory @@ -1281,6 +1283,10 @@ enum class OpCodeClass : unsigned { WriteSamplerFeedbackGrad, WriteSamplerFeedbackLevel, + // Shader Execution Reordering + HitObject_MakeMiss, + HitObject_MakeNop, + // Synchronization AtomicBinOp, AtomicCompareExchange, @@ -1345,7 +1351,7 @@ enum class OpCodeClass : unsigned { NumOpClasses_Dxil_1_7 = 153, NumOpClasses_Dxil_1_8 = 174, - NumOpClasses = 175 // exclusive last value of enumeration + NumOpClasses = 177 // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 11ab8e3b8d..6a28a2a806 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -8813,5 +8813,79 @@ struct DxilInst_AllocateRayQuery2 { llvm::APInt(32, (uint64_t)val))); } }; + +/// This instruction Creates a new HitObject representing a miss +struct DxilInst_HitObject_MakeMiss { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_MakeMiss(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_MakeMiss); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (11 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_RayFlags = 1, + arg_MissShaderIndex = 2, + arg_Origin_X = 3, + arg_Origin_Y = 4, + arg_Origin_Z = 5, + arg_TMin = 6, + arg_Direction_X = 7, + arg_Direction_Y = 8, + arg_Direction_Z = 9, + arg_TMax = 10, + }; + // Accessors + llvm::Value *get_RayFlags() const { return Instr->getOperand(1); } + void set_RayFlags(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_MissShaderIndex() const { return Instr->getOperand(2); } + void set_MissShaderIndex(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_Origin_X() const { return Instr->getOperand(3); } + void set_Origin_X(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_Origin_Y() const { return Instr->getOperand(4); } + void set_Origin_Y(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_Origin_Z() const { return Instr->getOperand(5); } + void set_Origin_Z(llvm::Value *val) { Instr->setOperand(5, val); } + llvm::Value *get_TMin() const { return Instr->getOperand(6); } + void set_TMin(llvm::Value *val) { Instr->setOperand(6, val); } + llvm::Value *get_Direction_X() const { return Instr->getOperand(7); } + void set_Direction_X(llvm::Value *val) { Instr->setOperand(7, val); } + llvm::Value *get_Direction_Y() const { return Instr->getOperand(8); } + void set_Direction_Y(llvm::Value *val) { Instr->setOperand(8, val); } + llvm::Value *get_Direction_Z() const { return Instr->getOperand(9); } + void set_Direction_Z(llvm::Value *val) { Instr->setOperand(9, val); } + llvm::Value *get_TMax() const { return Instr->getOperand(10); } + void set_TMax(llvm::Value *val) { Instr->setOperand(10, val); } +}; + +/// This instruction Creates an empty nop HitObject +struct DxilInst_HitObject_MakeNop { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_MakeNop(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_MakeNop); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (1 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } +}; // INSTR-HELPER:END } // namespace hlsl diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h index 3514701327..e522e06204 100644 --- a/include/dxc/DXIL/DxilOperations.h +++ b/include/dxc/DXIL/DxilOperations.h @@ -64,6 +64,7 @@ class OP { void RemoveFunction(llvm::Function *F); llvm::LLVMContext &GetCtx() { return m_Ctx; } llvm::Type *GetHandleType() const; + llvm::Type *GetHitObjectType() const; llvm::Type *GetNodeHandleType() const; llvm::Type *GetNodeRecordHandleType() const; llvm::Type *GetResourcePropertiesType() const; @@ -146,6 +147,7 @@ class OP { llvm::Module *m_pModule; llvm::Type *m_pHandleType; + llvm::Type *m_pHitObjectType; llvm::Type *m_pNodeHandleType; llvm::Type *m_pNodeRecordHandleType; llvm::Type *m_pResourcePropertiesType; diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index b3e552da18..86049fee9c 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2670,24 +2670,29 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { false}, Attribute::None, }, + + // Shader Execution Reordering void, h, f, d, i1, i8, i16, + // i32, i64, udt, obj , function attribute { - OC::ReservedB3, - "ReservedB3", - OCC::Reserved, - "reserved", + OC::HitObject_MakeMiss, + "HitObject_MakeMiss", + OCC::HitObject_MakeMiss, + "hitObject_MakeMiss", {true, false, false, false, false, false, false, false, false, false, false}, - Attribute::None, + Attribute::ReadNone, }, { - OC::ReservedB4, - "ReservedB4", - OCC::Reserved, - "reserved", + OC::HitObject_MakeNop, + "HitObject_MakeNop", + OCC::HitObject_MakeNop, + "hitObject_MakeNop", {true, false, false, false, false, false, false, false, false, false, false}, - Attribute::None, + Attribute::ReadNone, }, + + // void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute { OC::ReservedB5, "ReservedB5", @@ -3750,6 +3755,14 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, minor = 9; return; } + // Instructions: HitObject_MakeMiss=265, HitObject_MakeNop=266 + if ((265 <= op && op <= 266)) { + major = 6; + minor = 9; + mask = + SFLAG(Library) | SFLAG(RayGeneration) | SFLAG(ClosestHit) | SFLAG(Miss); + return; + } // OPCODE-SMMASK:END } @@ -3851,6 +3864,8 @@ OP::OP(LLVMContext &Ctx, Module *pModule) m_pHandleType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx), "dx.types.Handle", pModule); + m_pHitObjectType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx), + "dx.types.HitObject", pModule); m_pNodeHandleType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx), "dx.types.NodeHandle", pModule); m_pNodeRecordHandleType = GetOrCreateStructType( @@ -3993,6 +4008,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { Type *pF64 = Type::getDoubleTy(m_Ctx); Type *pSDT = GetSplitDoubleType(); // Split double type. Type *p4I32 = GetFourI32Type(); // 4 i32s in a struct. + Type *pHit = GetHitObjectType(); Type *udt = pOverloadType; Type *obj = pOverloadType; @@ -5871,14 +5887,28 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pV); A(pI32); break; - case OpCode::ReservedB3: - A(pV); + + // Shader Execution Reordering + case OpCode::HitObject_MakeMiss: + A(pHit); + A(pI32); + A(pI32); A(pI32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); break; - case OpCode::ReservedB4: - A(pV); + case OpCode::HitObject_MakeNop: + A(pHit); A(pI32); break; + + // case OpCode::ReservedB5: A(pV); A(pI32); @@ -6288,8 +6318,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::ReservedB0: case OpCode::ReservedB1: case OpCode::ReservedB2: - case OpCode::ReservedB3: - case OpCode::ReservedB4: + case OpCode::HitObject_MakeMiss: + case OpCode::HitObject_MakeNop: case OpCode::ReservedB5: case OpCode::ReservedB6: case OpCode::ReservedB7: @@ -6431,6 +6461,8 @@ Type *OP::GetHandleType() const { return m_pHandleType; } Type *OP::GetNodeHandleType() const { return m_pNodeHandleType; } +Type *OP::GetHitObjectType() const { return m_pHitObjectType; } + Type *OP::GetNodeRecordHandleType() const { return m_pNodeRecordHandleType; } Type *OP::GetResourcePropertiesType() const { diff --git a/tools/clang/test/DXILValidation/ser_hitobject_make_passing.ll b/tools/clang/test/DXILValidation/ser_hitobject_make_passing.ll new file mode 100644 index 0000000000..88b71ff3e0 --- /dev/null +++ b/tools/clang/test/DXILValidation/ser_hitobject_make_passing.ll @@ -0,0 +1,46 @@ +; RUN: %dxv %s | FileCheck %s + +; CHECK: Validation succeeded. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.HitObject = type { i8* } + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + ; Test HitObject_MakeMiss (opcode 265) + %r265 = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 4, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03) ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax) + + ; Test HitObject_MakeNop (opcode 266) + %r266 = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() + + ret void +} + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32, i32, i32, float, float, float, float, float, float, float, float) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.typeAnnotations = !{!2} +!dx.entryPoints = !{!9, !11} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !3} +!3 = !{!4} +!4 = !{i32 1, !5, !5} +!5 = !{} +!9 = !{null, !"", null, null, !10} +!10 = !{i32 0, i64 0} +!11 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !12} +!12 = !{i32 8, i32 7, i32 5, !13} +!13 = !{i32 0} diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 0a9ab062a3..fc4c427580 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -699,6 +699,15 @@ def populate_categories_and_models(self): self.name_idx[i].category = "Extended Command Information" self.name_idx[i].shader_stages = ("vertex",) self.name_idx[i].shader_model = 6, 8 + for i in ("HitObject_MakeMiss,HitObject_MakeNop").split(","): + self.name_idx[i].category = "Shader Execution Reordering" + self.name_idx[i].shader_model = 6, 9 + self.name_idx[i].shader_stages = ( + "library", + "raygeneration", + "closesthit", + "miss", + ) def populate_llvm_instructions(self): # Add instructions that map to LLVM instructions. @@ -5550,7 +5559,43 @@ def UFI(name, **mappings): next_op_idx = self.reserve_dxil_op_range("ReservedA", next_op_idx, 3) # Shader Execution Reordering - next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 31) + next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 3) + + self.add_dxil_op( + "HitObject_MakeMiss", + next_op_idx, + "HitObject_MakeMiss", + "Creates a new HitObject representing a miss", + "v", + "rn", + [ + db_dxil_param(0, "hit_object", "", "HitObject with a committed miss"), + db_dxil_param(2, "i32", "RayFlags", "ray flags"), + db_dxil_param(3, "i32", "MissShaderIndex", "Miss shader index"), + db_dxil_param(4, "f", "Origin_X", "Origin x of the ray"), + db_dxil_param(5, "f", "Origin_Y", "Origin y of the ray"), + db_dxil_param(6, "f", "Origin_Z", "Origin z of the ray"), + db_dxil_param(7, "f", "TMin", "Tmin of the ray"), + db_dxil_param(8, "f", "Direction_X", "Direction x of the ray"), + db_dxil_param(9, "f", "Direction_Y", "Direction y of the ray"), + db_dxil_param(10, "f", "Direction_Z", "Direction z of the ray"), + db_dxil_param(11, "f", "TMax", "Tmax of the ray"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_MakeNop", + next_op_idx, + "HitObject_MakeNop", + "Creates an empty nop HitObject", + "v", + "rn", + [db_dxil_param(0, "hit_object", "", "Empty nop HitObject")], + ) + next_op_idx += 1 + + next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 26, 5) # Reserved block C next_op_idx = self.reserve_dxil_op_range("ReservedC", next_op_idx, 10) @@ -8145,10 +8190,12 @@ def add_dxil_op_reserved(self, name, code_id): ) self.instr.append(i) - def reserve_dxil_op_range(self, group_name, start_id, count): + def reserve_dxil_op_range(self, group_name, start_id, count, start_reserved_id=0): "Reserve a range of dxil opcodes for future use; returns next id" for i in range(0, count): - self.add_dxil_op_reserved("{0}{1}".format(group_name, i), start_id + i) + self.add_dxil_op_reserved( + "{0}{1}".format(group_name, start_reserved_id + i), start_id + i + ) return start_id + count def get_instr_by_llvm_name(self, llvm_name): From 33bc44a3d370a1f3f835079dc5049e9989b79d89 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Tue, 25 Mar 2025 10:20:31 -0700 Subject: [PATCH 48/88] Update github actions versions to enable coverage (#7183) Coverage generation has failed because of the deprecation of versions of upload-artifact before v4 which the version of upload-pages-artifact that DXC used made use of. This bumps that and all other actions to the latest versions. --- .github/workflows/coverage-gh-pages.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/coverage-gh-pages.yml b/.github/workflows/coverage-gh-pages.yml index 4c7b2c2018..07e63584e3 100644 --- a/.github/workflows/coverage-gh-pages.yml +++ b/.github/workflows/coverage-gh-pages.yml @@ -26,11 +26,11 @@ jobs: timeout-minutes: 240 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: true - name: Setup Pages - uses: actions/configure-pages@v2 + uses: actions/configure-pages@v5 - name: Install dependencies run: sudo apt install -y ninja-build - name: Configure @@ -44,7 +44,7 @@ jobs: - name: Force artifact permissions run: chmod -c -R +rX ${{github.workspace}}/build/report - name: Upload artifact - uses: actions/upload-pages-artifact@v1 + uses: actions/upload-pages-artifact@v3 with: path: ${{github.workspace}}/build/report @@ -60,4 +60,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v1 + uses: actions/deploy-pages@v4 From 1eb83c777f8efc8e761f6cd83e52f6cb879deaac Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Wed, 26 Mar 2025 04:19:06 -0700 Subject: [PATCH 49/88] Allow native vectors for LLVM operations (#7155) Disables various forms of scalarization and vector elimination to permit vectors to pass through to final DXIL when used in native LLVM operations and loading/storing. Introduces a few vector manipulation llvm instructions to DXIL allowing for them to appear in output DXIL. Skips passes for 6.9 that scalarize, convert to arrays, or otherwise eliminate vectors. This eliminates the element-by-element extraction, application, and reconstitution of the vectors to operators. In many cases, this required plumbing the shader model information to passes that didn't have it before and also the recreation of dxil version information from metadata where necessary. Many changes were needed for the MatrixBitcastLower pass related to linking to avoid converting matrix vectors, but also to perform the conversion if a shader was compiled for 6.9+, but then linked to a earlier target. This now adapts to the linker target to either preserve vectors for 6.9 or arrays for previous versions. This requires running the DynamicIndexing VectorToArray pass during linking since 6_x and 6_9+ will fail to run this in the initial compile, but will still need to lower vectors to arrays. This required making the pass particularly robust to different sources of version information as compiling, linking, and running optimization in isolation each require retrieval from a different source. The latter two sources are facilitated with a dxilutil function. Ternary conditional/select operators were element extracted in codegen. Removing this allows 6.9 to preserve the vectors, but also maintains behavior for previous shader models because the operations get scalarized later anyway. This was in the region of work to allow short circuiting, but the effect of that is to introduce the select and skip the later code that implements short circuiting for supported cases. Test confirm that no short circuiting is introduced for native vectors. Adds extensive tests for these operations using different types and sizes and testing them appropriately. Booleans produce significantly different code, so they get their own test. Vec1s have some special treatment as they are not allowed in final dxil, so they still need to be scalarized. This requires value specific conditionals in transformation passes. Testing confirms that this is done. Fixes #7123 --- include/dxc/DXIL/DxilInstructions.h | 36 + include/dxc/DXIL/DxilMetadataHelper.h | 2 + include/dxc/DXIL/DxilUtil.h | 4 + lib/DXIL/DxilMetadataHelper.cpp | 23 +- lib/DXIL/DxilUtil.cpp | 13 + lib/DxilValidation/DxilValidation.cpp | 23 +- lib/HLSL/DxilLinker.cpp | 6 + lib/HLSL/HLMatrixBitcastLowerPass.cpp | 60 +- lib/HLSL/HLModule.cpp | 3 + lib/Transforms/Scalar/LowerTypePasses.cpp | 40 +- .../Scalar/ScalarReplAggregatesHLSL.cpp | 56 +- lib/Transforms/Scalar/Scalarizer.cpp | 43 +- tools/clang/lib/CodeGen/CGExprScalar.cpp | 15 +- tools/clang/lib/Sema/SemaHLSL.cpp | 8 +- .../hlsl/types/longvec-operators-bool.hlsl | 464 +++++++++++ .../hlsl/types/longvec-operators-int.hlsl | 73 ++ .../hlsl/types/longvec-operators-scalars.hlsl | 342 ++++++++ .../types/longvec-operators-shortcircuit.hlsl | 57 ++ .../hlsl/types/longvec-operators-vec1s.hlsl | 479 +++++++++++ .../hlsl/types/longvec-operators.hlsl | 581 ++++++++++++++ .../passes/longvec-alloca-gv-dynvec2array.ll | 269 +++++++ .../passes/longvec-alloca-gv-sroa.ll | 324 ++++++++ .../CodeGenDXIL/passes/longvec-alloca-gv.hlsl | 112 +++ .../passes/longvec-operators-scalarizer.ll | 660 ++++++++++++++++ .../longvec-operators-vec1-scalarizer.ll | 745 ++++++++++++++++++ .../passes/longvec-operators-vec1.hlsl | 425 ++++++++++ .../CodeGenDXIL/passes/longvec-operators.hlsl | 420 ++++++++++ .../passes/dxil/lower_type/vec_array_param.ll | 1 - tools/clang/unittests/HLSL/LinkerTest.cpp | 5 + utils/hct/hctdb.py | 31 + 30 files changed, 5242 insertions(+), 78 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 6a28a2a806..f8d9ae77f3 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -645,6 +645,42 @@ struct LlvmInst_VAArg { bool isAllowed() const { return false; } }; +/// This instruction extracts from vector +struct LlvmInst_ExtractElement { + llvm::Instruction *Instr; + // Construction and identification + LlvmInst_ExtractElement(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return Instr->getOpcode() == llvm::Instruction::ExtractElement; + } + // Validation support + bool isAllowed() const { return true; } +}; + +/// This instruction inserts into vector +struct LlvmInst_InsertElement { + llvm::Instruction *Instr; + // Construction and identification + LlvmInst_InsertElement(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return Instr->getOpcode() == llvm::Instruction::InsertElement; + } + // Validation support + bool isAllowed() const { return true; } +}; + +/// This instruction Shuffle two vectors +struct LlvmInst_ShuffleVector { + llvm::Instruction *Instr; + // Construction and identification + LlvmInst_ShuffleVector(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return Instr->getOpcode() == llvm::Instruction::ShuffleVector; + } + // Validation support + bool isAllowed() const { return true; } +}; + /// This instruction extracts from aggregate struct LlvmInst_ExtractValue { llvm::Instruction *Instr; diff --git a/include/dxc/DXIL/DxilMetadataHelper.h b/include/dxc/DXIL/DxilMetadataHelper.h index fa13f6d766..9df155e6e7 100644 --- a/include/dxc/DXIL/DxilMetadataHelper.h +++ b/include/dxc/DXIL/DxilMetadataHelper.h @@ -427,6 +427,8 @@ class DxilMDHelper { // Dxil version. void EmitDxilVersion(unsigned Major, unsigned Minor); void LoadDxilVersion(unsigned &Major, unsigned &Minor); + static bool LoadDxilVersion(const llvm::Module *pModule, unsigned &Major, + unsigned &Minor); // Validator version. void EmitValidatorVersion(unsigned Major, unsigned Minor); diff --git a/include/dxc/DXIL/DxilUtil.h b/include/dxc/DXIL/DxilUtil.h index 5652c56f50..ca8f2ac755 100644 --- a/include/dxc/DXIL/DxilUtil.h +++ b/include/dxc/DXIL/DxilUtil.h @@ -223,6 +223,10 @@ bool DeleteDeadAllocas(llvm::Function &F); llvm::Value *GEPIdxToOffset(llvm::GetElementPtrInst *GEP, llvm::IRBuilder<> &Builder, hlsl::OP *OP, const llvm::DataLayout &DL); + +// Passes back Dxil version of the given module on true return. +bool LoadDxilVersion(const llvm::Module *M, unsigned &Major, unsigned &Minor); + } // namespace dxilutil } // namespace hlsl diff --git a/lib/DXIL/DxilMetadataHelper.cpp b/lib/DXIL/DxilMetadataHelper.cpp index fdd6d6b946..19d199ee29 100644 --- a/lib/DXIL/DxilMetadataHelper.cpp +++ b/lib/DXIL/DxilMetadataHelper.cpp @@ -177,17 +177,28 @@ void DxilMDHelper::EmitDxilVersion(unsigned Major, unsigned Minor) { pDxilVersionMD->addOperand(MDNode::get(m_Ctx, MDVals)); } -void DxilMDHelper::LoadDxilVersion(unsigned &Major, unsigned &Minor) { - NamedMDNode *pDxilVersionMD = m_pModule->getNamedMetadata(kDxilVersionMDName); - IFTBOOL(pDxilVersionMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA); - IFTBOOL(pDxilVersionMD->getNumOperands() == 1, DXC_E_INCORRECT_DXIL_METADATA); +// Load dxil version from metadata contained in pModule. +// Returns true and passes result through +// the dxil major/minor version params if valid. +// Returns false if metadata is missing or invalid. +bool DxilMDHelper::LoadDxilVersion(const Module *pModule, unsigned &Major, + unsigned &Minor) { + NamedMDNode *pDxilVersionMD = pModule->getNamedMetadata(kDxilVersionMDName); + IFRBOOL(pDxilVersionMD != nullptr, false); + IFRBOOL(pDxilVersionMD->getNumOperands() == 1, false); MDNode *pVersionMD = pDxilVersionMD->getOperand(0); - IFTBOOL(pVersionMD->getNumOperands() == kDxilVersionNumFields, - DXC_E_INCORRECT_DXIL_METADATA); + IFRBOOL(pVersionMD->getNumOperands() == kDxilVersionNumFields, false); Major = ConstMDToUint32(pVersionMD->getOperand(kDxilVersionMajorIdx)); Minor = ConstMDToUint32(pVersionMD->getOperand(kDxilVersionMinorIdx)); + + return true; +} + +void DxilMDHelper::LoadDxilVersion(unsigned &Major, unsigned &Minor) { + IFTBOOL(LoadDxilVersion(m_pModule, Major, Minor), + DXC_E_INCORRECT_DXIL_METADATA); } // diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp index 0a4fb1160a..966c2e189c 100644 --- a/lib/DXIL/DxilUtil.cpp +++ b/lib/DXIL/DxilUtil.cpp @@ -1415,5 +1415,18 @@ bool DeleteDeadAllocas(llvm::Function &F) { return Changed; } +// Retrieve dxil version in the given module. +// Where the module doesn't already have a Dxil module, +// it identifies and returns the version info from the metatdata. +// Returns false where none of that works, but that shouldn't happen much. +bool LoadDxilVersion(const Module *M, unsigned &Major, unsigned &Minor) { + if (M->HasDxilModule()) { + M->GetDxilModule().GetShaderModel()->GetDxilVersion(Major, Minor); + return true; + } + // No module, try metadata. + return DxilMDHelper::LoadDxilVersion(M, Major, Minor); +} + } // namespace dxilutil } // namespace hlsl diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 0a2001a745..4622256dfe 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -2193,6 +2193,9 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, return true; if (Ty->isVectorTy()) { + if (Ty->getVectorNumElements() > 1 && + ValCtx.DxilMod.GetShaderModel()->IsSM69Plus()) + return true; ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoVector); return false; } @@ -2669,6 +2672,23 @@ static bool IsLLVMInstructionAllowedForLib(Instruction &I, } } +// Shader model specific checks for valid LLVM instructions. +// Currently only checks for pre 6.9 usage of vector operations. +// Returns false if shader model is pre 6.9 and I represents a vector +// operation. Returns true otherwise. +static bool IsLLVMInstructionAllowedForShaderModel(Instruction &I, + ValidationContext &ValCtx) { + if (ValCtx.DxilMod.GetShaderModel()->IsSM69Plus()) + return true; + unsigned OpCode = I.getOpcode(); + if (OpCode == Instruction::InsertElement || + OpCode == Instruction::ExtractElement || + OpCode == Instruction::ShuffleVector) + return false; + + return true; +} + static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { bool SupportsMinPrecision = ValCtx.DxilMod.GetGlobalFlags() & DXIL::kEnableMinPrecision; @@ -2691,7 +2711,8 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { } // Instructions must be allowed. - if (!IsLLVMInstructionAllowed(I)) { + if (!IsLLVMInstructionAllowed(I) || + !IsLLVMInstructionAllowedForShaderModel(I, ValCtx)) { if (!IsLLVMInstructionAllowedForLib(I, ValCtx)) { ValCtx.EmitInstrError(&I, ValidationRule::InstrAllowed); continue; diff --git a/lib/HLSL/DxilLinker.cpp b/lib/HLSL/DxilLinker.cpp index 68c83fc037..ca343662ab 100644 --- a/lib/HLSL/DxilLinker.cpp +++ b/lib/HLSL/DxilLinker.cpp @@ -1255,6 +1255,12 @@ void DxilLinkJob::RunPreparePass(Module &M) { // For static global handle. PM.add(createLowerStaticGlobalIntoAlloca()); + // Change dynamic indexing vector to array where vectors aren't + // supported, but might be there from the initial compile. + if (!pSM->IsSM69Plus()) + PM.add( + createDynamicIndexingVectorToArrayPass(false /* ReplaceAllVector */)); + // Remove MultiDimArray from function call arg. PM.add(createMultiDimArrayToOneDimArrayPass()); diff --git a/lib/HLSL/HLMatrixBitcastLowerPass.cpp b/lib/HLSL/HLMatrixBitcastLowerPass.cpp index 93ba3b9816..db20d8a324 100644 --- a/lib/HLSL/HLMatrixBitcastLowerPass.cpp +++ b/lib/HLSL/HLMatrixBitcastLowerPass.cpp @@ -76,6 +76,7 @@ Type *TryLowerMatTy(Type *Ty) { } class MatrixBitcastLowerPass : public FunctionPass { + bool SupportsVectors = false; public: static char ID; // Pass identification, replacement for typeid @@ -83,6 +84,9 @@ class MatrixBitcastLowerPass : public FunctionPass { StringRef getPassName() const override { return "Matrix Bitcast lower"; } bool runOnFunction(Function &F) override { + DxilModule &DM = F.getParent()->GetOrCreateDxilModule(); + SupportsVectors = DM.GetShaderModel()->IsSM69Plus(); + bool bUpdated = false; std::unordered_set matCastSet; for (auto blkIt = F.begin(); blkIt != F.end(); ++blkIt) { @@ -100,7 +104,6 @@ class MatrixBitcastLowerPass : public FunctionPass { } } - DxilModule &DM = F.getParent()->GetOrCreateDxilModule(); // Remove bitcast which has CallInst user. if (DM.GetShaderModel()->IsLib()) { for (auto it = matCastSet.begin(); it != matCastSet.end();) { @@ -185,7 +188,7 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) { User *U = *(it++); if (GetElementPtrInst *GEP = dyn_cast(U)) { Type *EltTy = GEP->getType()->getPointerElementType(); - if (HLMatrixType::isa(EltTy)) { + if (HLMatrixType MatTy = HLMatrixType::dyn_cast(EltTy)) { // Change gep matrixArray, 0, index // into // gep oneDimArray, 0, index * matSize @@ -193,10 +196,11 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) { SmallVector idxList(GEP->idx_begin(), GEP->idx_end()); DXASSERT(idxList.size() == 2, "else not one dim matrix array index to matrix"); - - HLMatrixType MatTy = HLMatrixType::cast(EltTy); - Value *matSize = Builder.getInt32(MatTy.getNumElements()); - idxList.back() = Builder.CreateMul(idxList.back(), matSize); + unsigned NumElts = MatTy.getNumElements(); + if (!SupportsVectors || NumElts == 1) { + Value *MatSize = Builder.getInt32(NumElts); + idxList.back() = Builder.CreateMul(idxList.back(), MatSize); + } Value *NewGEP = Builder.CreateGEP(A, idxList); lowerMatrix(GEP, NewGEP); DXASSERT(GEP->user_empty(), "else lower matrix fail"); @@ -211,13 +215,23 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) { } else if (LoadInst *LI = dyn_cast(U)) { if (VectorType *Ty = dyn_cast(LI->getType())) { IRBuilder<> Builder(LI); - Value *zeroIdx = Builder.getInt32(0); - unsigned vecSize = Ty->getNumElements(); - Value *NewVec = UndefValue::get(LI->getType()); - for (unsigned i = 0; i < vecSize; i++) { - Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder); - Value *Elt = Builder.CreateLoad(GEP); - NewVec = Builder.CreateInsertElement(NewVec, Elt, i); + Value *NewVec = nullptr; + unsigned VecSize = Ty->getVectorNumElements(); + if (SupportsVectors && VecSize > 1) { + // Create a replacement load using the vector pointer. + Instruction *NewLd = LI->clone(); + unsigned VecIdx = NewLd->getNumOperands() - 1; + NewLd->setOperand(VecIdx, A); + Builder.Insert(NewLd); + NewVec = NewLd; + } else { + Value *zeroIdx = Builder.getInt32(0); + NewVec = UndefValue::get(LI->getType()); + for (unsigned i = 0; i < VecSize; i++) { + Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder); + Value *Elt = Builder.CreateLoad(GEP); + NewVec = Builder.CreateInsertElement(NewVec, Elt, i); + } } LI->replaceAllUsesWith(NewVec); LI->eraseFromParent(); @@ -228,12 +242,20 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) { Value *V = ST->getValueOperand(); if (VectorType *Ty = dyn_cast(V->getType())) { IRBuilder<> Builder(LI); - Value *zeroIdx = Builder.getInt32(0); - unsigned vecSize = Ty->getNumElements(); - for (unsigned i = 0; i < vecSize; i++) { - Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder); - Value *Elt = Builder.CreateExtractElement(V, i); - Builder.CreateStore(Elt, GEP); + if (SupportsVectors && Ty->getVectorNumElements() > 1) { + // Create a replacement store using the vector pointer. + Instruction *NewSt = ST->clone(); + unsigned VecIdx = NewSt->getNumOperands() - 1; + NewSt->setOperand(VecIdx, A); + Builder.Insert(NewSt); + } else { + Value *zeroIdx = Builder.getInt32(0); + unsigned vecSize = Ty->getNumElements(); + for (unsigned i = 0; i < vecSize; i++) { + Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder); + Value *Elt = Builder.CreateExtractElement(V, i); + Builder.CreateStore(Elt, GEP); + } } ST->eraseFromParent(); } else { diff --git a/lib/HLSL/HLModule.cpp b/lib/HLSL/HLModule.cpp index 037885c9d8..a67877ef3e 100644 --- a/lib/HLSL/HLModule.cpp +++ b/lib/HLSL/HLModule.cpp @@ -604,6 +604,9 @@ MDTuple *HLModule::EmitHLResources() { void HLModule::LoadHLResources(const llvm::MDOperand &MDO) { const llvm::MDTuple *pSRVs, *pUAVs, *pCBuffers, *pSamplers; + // No resources. Nothing to do. + if (MDO.get() == nullptr) + return; m_pMDHelper->GetDxilResources(MDO, pSRVs, pUAVs, pCBuffers, pSamplers); // Load SRV records. diff --git a/lib/Transforms/Scalar/LowerTypePasses.cpp b/lib/Transforms/Scalar/LowerTypePasses.cpp index feeb23a5da..d2438c7e22 100644 --- a/lib/Transforms/Scalar/LowerTypePasses.cpp +++ b/lib/Transforms/Scalar/LowerTypePasses.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "dxc/DXIL/DxilConstants.h" +#include "dxc/DXIL/DxilModule.h" #include "dxc/DXIL/DxilOperations.h" #include "dxc/DXIL/DxilUtil.h" #include "dxc/HLSL/HLModule.h" @@ -180,10 +181,12 @@ bool LowerTypePass::runOnModule(Module &M) { namespace { class DynamicIndexingVectorToArray : public LowerTypePass { bool ReplaceAllVectors; + bool SupportsVectors; public: explicit DynamicIndexingVectorToArray(bool ReplaceAll = false) - : LowerTypePass(ID), ReplaceAllVectors(ReplaceAll) {} + : LowerTypePass(ID), ReplaceAllVectors(ReplaceAll), + SupportsVectors(false) {} static char ID; // Pass identification, replacement for typeid void applyOptions(PassOptions O) override; void dumpConfig(raw_ostream &OS) override; @@ -194,6 +197,7 @@ class DynamicIndexingVectorToArray : public LowerTypePass { Type *lowerType(Type *Ty) override; Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override; StringRef getGlobalPrefix() override { return ".v"; } + void initialize(Module &M) override; private: bool HasVectorDynamicIndexing(Value *V); @@ -207,6 +211,18 @@ class DynamicIndexingVectorToArray : public LowerTypePass { void ReplaceAddrSpaceCast(ConstantExpr *CE, Value *A, IRBuilder<> &Builder); }; +void DynamicIndexingVectorToArray::initialize(Module &M) { + // Set vector support according to available Dxil version. + // Use HLModule or metadata for version info. + // Otherwise retrieve from dxil module or metadata. + unsigned Major = 0, Minor = 0; + if (M.HasHLModule()) + M.GetHLModule().GetShaderModel()->GetDxilVersion(Major, Minor); + else + dxilutil::LoadDxilVersion(&M, Major, Minor); + SupportsVectors = (Major == 1 && Minor >= 9); +} + void DynamicIndexingVectorToArray::applyOptions(PassOptions O) { GetPassOptionBool(O, "ReplaceAllVectors", &ReplaceAllVectors, ReplaceAllVectors); @@ -306,9 +322,21 @@ void DynamicIndexingVectorToArray::ReplaceStaticIndexingOnVector(Value *V) { } bool DynamicIndexingVectorToArray::needToLower(Value *V) { + bool MustReplaceVector = ReplaceAllVectors; Type *Ty = V->getType()->getPointerElementType(); - if (dyn_cast(Ty)) { - if (isa(V) || ReplaceAllVectors) { + + if (ArrayType *AT = dyn_cast(Ty)) { + // Array must be replaced even without dynamic indexing to remove vector + // type in dxil. + MustReplaceVector = true; + Ty = dxilutil::GetArrayEltTy(AT); + } + + if (isa(Ty)) { + // Only needed for 2+ vectors where native vectors unsupported. + if (SupportsVectors && Ty->getVectorNumElements() > 1) + return false; + if (isa(V) || MustReplaceVector) { return true; } // Don't lower local vector which only static indexing. @@ -319,12 +347,6 @@ bool DynamicIndexingVectorToArray::needToLower(Value *V) { ReplaceStaticIndexingOnVector(V); return false; } - } else if (ArrayType *AT = dyn_cast(Ty)) { - // Array must be replaced even without dynamic indexing to remove vector - // type in dxil. - // TODO: optimize static array index in later pass. - Type *EltTy = dxilutil::GetArrayEltTy(AT); - return isa(EltTy); } return false; } diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index 0c3e13f608..ec17fce9c8 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -81,16 +81,18 @@ class SROA_Helper { static bool DoScalarReplacement(Value *V, std::vector &Elts, Type *&BrokenUpTy, uint64_t &NumInstances, IRBuilder<> &Builder, bool bFlatVector, - bool hasPrecise, DxilTypeSystem &typeSys, - const DataLayout &DL, + bool SupportsVectors, bool hasPrecise, + DxilTypeSystem &typeSys, const DataLayout &DL, SmallVector &DeadInsts, DominatorTree *DT); - static bool - DoScalarReplacement(GlobalVariable *GV, std::vector &Elts, - IRBuilder<> &Builder, bool bFlatVector, bool hasPrecise, - DxilTypeSystem &typeSys, const DataLayout &DL, - SmallVector &DeadInsts, DominatorTree *DT); + static bool DoScalarReplacement(GlobalVariable *GV, + std::vector &Elts, + IRBuilder<> &Builder, bool bFlatVector, + bool SupportsVectors, bool hasPrecise, + DxilTypeSystem &typeSys, const DataLayout &DL, + SmallVector &DeadInsts, + DominatorTree *DT); static unsigned GetEltAlign(unsigned ValueAlign, const DataLayout &DL, Type *EltTy, unsigned Offset); // Lower memcpy related to V. @@ -1714,6 +1716,7 @@ bool isGroupShareOrConstStaticArray(GlobalVariable *GV) { bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) { Module &M = *HLM.GetModule(); + bool SupportsVectors = HLM.GetShaderModel()->IsSM69Plus(); DxilTypeSystem &typeSys = HLM.GetTypeSystem(); const DataLayout &DL = M.getDataLayout(); @@ -1878,7 +1881,8 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) { uint64_t NumInstances = 1; bool SROAed = SROA_Helper::DoScalarReplacement( AI, Elts, BrokenUpTy, NumInstances, Builder, - /*bFlatVector*/ true, hasPrecise, typeSys, DL, DeadInsts, &DT); + /*bFlatVector*/ true, SupportsVectors, hasPrecise, typeSys, DL, + DeadInsts, &DT); if (SROAed) { Type *Ty = AI->getAllocatedType(); @@ -1945,7 +1949,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) { continue; } - // Flat Global vector if no dynamic vector indexing. + // Flatten global vector if it has no dynamic vector indexing. bool bFlatVector = !hasDynamicVectorIndexing(GV); if (bFlatVector) { @@ -1981,7 +1985,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) { // SROA_Parameter_HLSL has no access to a domtree, if one is needed, // it'll be generated SROAed = SROA_Helper::DoScalarReplacement( - GV, Elts, Builder, bFlatVector, + GV, Elts, Builder, bFlatVector, SupportsVectors, // TODO: set precise. /*hasPrecise*/ false, typeSys, DL, DeadInsts, /*DT*/ nullptr); } @@ -2920,7 +2924,8 @@ static ArrayType *CreateNestArrayTy(Type *FinalEltTy, bool SROA_Helper::DoScalarReplacement(Value *V, std::vector &Elts, Type *&BrokenUpTy, uint64_t &NumInstances, IRBuilder<> &Builder, bool bFlatVector, - bool hasPrecise, DxilTypeSystem &typeSys, + bool SupportsVectors, bool hasPrecise, + DxilTypeSystem &typeSys, const DataLayout &DL, SmallVector &DeadInsts, DominatorTree *DT) { @@ -3033,6 +3038,10 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector &Elts, if (!bFlatVector) return false; + // Skip vector where supported if it has more than 1 element. + if (SupportsVectors && ElTy->getVectorNumElements() > 1) + return false; + // for array of vector // split into arrays of scalar VectorType *ElVT = cast(ElTy); @@ -3114,13 +3123,11 @@ unsigned SROA_Helper::GetEltAlign(unsigned ValueAlign, const DataLayout &DL, /// DoScalarReplacement - Split V into AllocaInsts with Builder and save the new /// AllocaInsts into Elts. Then do SROA on V. -bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV, - std::vector &Elts, - IRBuilder<> &Builder, bool bFlatVector, - bool hasPrecise, DxilTypeSystem &typeSys, - const DataLayout &DL, - SmallVector &DeadInsts, - DominatorTree *DT) { +bool SROA_Helper::DoScalarReplacement( + GlobalVariable *GV, std::vector &Elts, IRBuilder<> &Builder, + bool bFlatVector, bool SupportsVectors, bool hasPrecise, + DxilTypeSystem &typeSys, const DataLayout &DL, + SmallVector &DeadInsts, DominatorTree *DT) { DEBUG(dbgs() << "Found inst to SROA: " << *GV << '\n'); Type *Ty = GV->getType(); // Skip none pointer types. @@ -3134,6 +3141,9 @@ bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV, // Skip basic types. if (Ty->isSingleValueType() && !Ty->isVectorTy()) return false; + // Skip vector where supported if it has more than 1 element. + if (Ty->isVectorTy() && SupportsVectors && Ty->getVectorNumElements() > 1) + return false; // Skip matrix types. if (HLMatrixType::isa(Ty)) return false; @@ -3240,6 +3250,10 @@ bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV, if (!bFlatVector) return false; + // Skip vector where supported if it has more than 1 element. + if (SupportsVectors && ElTy->getVectorNumElements() > 1) + return false; + // for array of vector // split into arrays of scalar VectorType *ElVT = cast(ElTy); @@ -5277,6 +5291,8 @@ void SROA_Parameter_HLSL::flattenArgument( std::vector &FlatAnnotationList, BasicBlock *EntryBlock, ArrayRef DDIs) { std::deque WorkList; + bool SupportsVectors = m_pHLModule->GetShaderModel()->IsSM69Plus(); + WorkList.push_back({Arg, paramAnnotation}); unsigned startArgIndex = FlatAnnotationList.size(); @@ -5351,8 +5367,8 @@ void SROA_Parameter_HLSL::flattenArgument( // DomTree isn't used by arguments SROAed = SROA_Helper::DoScalarReplacement( V, Elts, BrokenUpTy, NumInstances, Builder, - /*bFlatVector*/ false, annotation.IsPrecise(), dxilTypeSys, DL, - DeadInsts, /*DT*/ nullptr); + /*bFlatVector*/ false, SupportsVectors, annotation.IsPrecise(), + dxilTypeSys, DL, DeadInsts, /*DT*/ nullptr); } if (SROAed) { diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index 729771c7c7..730354af99 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -14,6 +14,9 @@ // //===----------------------------------------------------------------------===// +#include "dxc/DXIL/DxilModule.h" +#include "dxc/DXIL/DxilUtil.h" + #include "llvm/ADT/STLExtras.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -151,6 +154,7 @@ class Scalarizer : public FunctionPass, // HLSL Change Begin bool AllowFolding = false; + bool SupportsVectors = false; Scalarizer(bool AllowFolding) : FunctionPass(ID), AllowFolding(AllowFolding) { @@ -290,6 +294,13 @@ bool Scalarizer::doInitialization(Module &M) { } bool Scalarizer::runOnFunction(Function &F) { + // HLSL Change start - set SupportsVectors + const Module *M = F.getParent(); + unsigned Major = 0, Minor = 0; + if (hlsl::dxilutil::LoadDxilVersion(M, Major, Minor)) + SupportsVectors = (Major == 1 && Minor >= 9); + // HLSL Change end - set SupportsVectors + for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { BasicBlock *BB = BBI; for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { @@ -436,7 +447,8 @@ bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment, template bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) { VectorType *VT = dyn_cast(I.getType()); - if (!VT) + // HLSL Change - allow > 1 vectors where supported. + if (!VT || (SupportsVectors && VT->getNumElements() > 1)) return false; unsigned NumElems = VT->getNumElements(); @@ -457,7 +469,8 @@ bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) { bool Scalarizer::visitSelectInst(SelectInst &SI) { VectorType *VT = dyn_cast(SI.getType()); - if (!VT) + // HLSL Change - allow > 1 vectors where supported. + if (!VT || (SupportsVectors && VT->getNumElements() > 1)) return false; unsigned NumElems = VT->getNumElements(); @@ -500,7 +513,8 @@ bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) { bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) { VectorType *VT = dyn_cast(GEPI.getType()); - if (!VT) + // HLSL Change - allow > 1 vectors where supported. + if (!VT || (SupportsVectors && VT->getNumElements() > 1)) return false; IRBuilder<> Builder(GEPI.getParent(), &GEPI); @@ -534,7 +548,8 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) { bool Scalarizer::visitCastInst(CastInst &CI) { VectorType *VT = dyn_cast(CI.getDestTy()); - if (!VT) + // HLSL Change - allow > 1 vectors where supported. + if (!VT || (SupportsVectors && VT->getNumElements() > 1)) return false; unsigned NumElems = VT->getNumElements(); @@ -559,6 +574,12 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) { unsigned DstNumElems = DstVT->getNumElements(); unsigned SrcNumElems = SrcVT->getNumElements(); + + // HLSL Change Begin - allow > 1 vectors where supported. + if (SupportsVectors && (DstNumElems > 1 || SrcNumElems > 1)) + return false; + // HLSL Change End - allow > 1 vectors where supported. + IRBuilder<> Builder(BCI.getParent(), &BCI); Builder.AllowFolding = this->AllowFolding; // HLSL Change Scatterer Op0 = scatter(&BCI, BCI.getOperand(0)); @@ -609,7 +630,8 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) { bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) { VectorType *VT = dyn_cast(SVI.getType()); - if (!VT) + // HLSL Change - allow > 1 vectors where supported. + if (!VT || (SupportsVectors && VT->getNumElements() > 1)) return false; unsigned NumElems = VT->getNumElements(); @@ -643,7 +665,8 @@ bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) { bool Scalarizer::visitPHINode(PHINode &PHI) { VectorType *VT = dyn_cast(PHI.getType()); - if (!VT) + // HLSL Change - allow > 1 vectors where supported. + if (!VT || (SupportsVectors && VT->getNumElements() > 1)) return false; unsigned NumElems = VT->getNumElements(); @@ -679,6 +702,10 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) { return false; unsigned NumElems = Layout.VecTy->getNumElements(); + // HLSL Change Begin - allow > 1 vectors where supported. + if (SupportsVectors && NumElems > 1) + return false; + // HLSL Change End - allow > 1 vectors where supported. IRBuilder<> Builder(LI.getParent(), &LI); Builder.AllowFolding = this->AllowFolding; // HLSL Change Scatterer Ptr = scatter(&LI, LI.getPointerOperand()); @@ -705,6 +732,10 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) { return false; unsigned NumElems = Layout.VecTy->getNumElements(); + // HLSL Change Begin - allow > 1 vectors where supported. + if (SupportsVectors && NumElems > 1) + return false; + // HLSL Change End - allow > 1 vectors where supported. IRBuilder<> Builder(SI.getParent(), &SI); Builder.AllowFolding = this->AllowFolding; // HLSL Change Scatterer Ptr = scatter(&SI, SI.getPointerOperand()); diff --git a/tools/clang/lib/CodeGen/CGExprScalar.cpp b/tools/clang/lib/CodeGen/CGExprScalar.cpp index 0cb993e6f4..530c791fcc 100644 --- a/tools/clang/lib/CodeGen/CGExprScalar.cpp +++ b/tools/clang/lib/CodeGen/CGExprScalar.cpp @@ -3713,20 +3713,7 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { llvm::Value *CondV = CGF.EmitScalarExpr(condExpr); llvm::Value *LHS = Visit(lhsExpr); llvm::Value *RHS = Visit(rhsExpr); - if (llvm::VectorType *VT = dyn_cast(CondV->getType())) { - llvm::VectorType *ResultVT = cast(LHS->getType()); - llvm::Value *result = llvm::UndefValue::get(ResultVT); - for (unsigned i = 0; i < VT->getNumElements(); i++) { - llvm::Value *EltCond = Builder.CreateExtractElement(CondV, i); - llvm::Value *EltL = Builder.CreateExtractElement(LHS, i); - llvm::Value *EltR = Builder.CreateExtractElement(RHS, i); - llvm::Value *EltSelect = Builder.CreateSelect(EltCond, EltL, EltR); - result = Builder.CreateInsertElement(result, EltSelect, i); - } - return result; - } else { - return Builder.CreateSelect(CondV, LHS, RHS); - } + return Builder.CreateSelect(CondV, LHS, RHS); } if (hlsl::IsHLSLMatType(E->getType())) { llvm::Value *Cond = CGF.EmitScalarExpr(condExpr); diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 40010b1596..243471bc55 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -6892,6 +6892,9 @@ bool HLSLExternalSource::MatchArguments( } } + std::string profile = m_sema->getLangOpts().HLSLProfile; + const ShaderModel *SM = hlsl::ShaderModel::GetByName(profile.c_str()); + // Populate argTypes. for (size_t i = 0; i <= Args.size(); i++) { const HLSL_INTRINSIC_ARGUMENT *pArgument = &pIntrinsic->pArgs[i]; @@ -7062,8 +7065,9 @@ bool HLSLExternalSource::MatchArguments( } // Verify that the final results are in bounds. - CAB(uCols > 0 && uCols <= MaxVectorSize && uRows > 0 && - uRows <= MaxVectorSize, + CAB((uCols > 0 && uRows > 0 && + ((uCols <= MaxVectorSize && uRows <= MaxVectorSize) || + (SM->IsSM69Plus() && uRows == 1))), i); // Const diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl new file mode 100644 index 0000000000..12955c87f9 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl @@ -0,0 +1,464 @@ +// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=2 %s | FileCheck %s +// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=5 %s | FileCheck %s +// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=3 %s | FileCheck %s +// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=9 %s | FileCheck %s + +// Test relevant operators on an assortment bool vector sizes with 6.9 native vectors. +// Bools have a different representation in memory and a smaller set of interesting ops. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// Uses non vector buffer to avoid interacting with that implementation. +// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z_0-9]*]] +RWStructuredBuffer< bool > buf; + +groupshared vector gs_vec1, gs_vec2; +groupshared vector gs_vec3; + + +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +// CHECK-LABEL: define void @"\01?assignments +export void assignments(inout vector things[10], bool scales[10]) { + + // Another trick to capture the size. + // CHECK: [[res:%[0-9]*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{[^,]*}}, i32 [[NUM:[0-9]*]] + // CHECK: [[scl:%[0-9]*]] = extractvalue %dx.types.ResRet.i32 [[res]], 0 + // CHECK: [[bscl:%[0-9]*]] = icmp ne i32 [[scl]], 0 + bool scalar = buf.Load(NUM); + + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 9 + // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add9]] + // CHECK: [[bvec9:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec9]], zeroinitializer + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0 + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec9]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]] + things[0] = things[9]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i1> undef, i1 [[bscl]], i32 0 + // CHECK: [[res:%[0-9]*]] = shufflevector <[[NUM]] x i1> [[spt]], <[[NUM]] x i1> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5 + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[res]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]] + things[5] = scalar; + +} + +// Test arithmetic operators. +// CHECK-LABEL: define void @"\01?arithmetic +export vector arithmetic(inout vector things[10])[10] { + vector res[10]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]] + + // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[svec0:%[0-9]*]] = sext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32> + // CHECK: [[bsvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[svec0]], zeroinitializer + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bsvec0]] to <[[NUM]] x i32> + res[0] = -things[0]; + + // CHECK: [[vec0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32> + // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32> + res[1] = +things[0]; + + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[vec1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec1]] to <[[NUM]] x i32> + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32> + // CHECK: [[res2:%[0-9]*]] = add nuw nsw <[[NUM]] x i32> [[vec2]], [[vec1]] + // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer + // CHECK: [[res2:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + res[2] = things[1] + things[2]; + + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32> + // CHECK: [[res3:%[0-9]*]] = sub nsw <[[NUM]] x i32> [[vec2]], [[vec3]] + // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer + // CHECK: [[res3:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + res[3] = things[2] - things[3]; + + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32> + // CHECK: [[res4:%[0-9]*]] = mul nuw nsw <[[NUM]] x i32> [[vec4]], [[vec3]] + // CHECK: [[bres4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res4]], zeroinitializer + // CHECK: [[res4:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32> + res[4] = things[3] * things[4]; + + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32> + // CHECK: [[res5:%[0-9]*]] = sdiv <[[NUM]] x i32> [[vec4]], [[vec5]] + // CHECK: [[bres5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res5]], zeroinitializer + // CHECK: [[res5:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32> + res[5] = things[4] / things[5]; + + // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer + // CHECK: [[vec6:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec6]] to <[[NUM]] x i32> + // CHECK: [[res6:%[0-9]*]] = {{[ufs]?rem( fast)?}} <[[NUM]] x i32> [[vec5]], [[vec6]] + res[6] = things[5] % things[6]; + + // Stores into res[]. Previous were for things[] inout. + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1 + // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2 + // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3 + // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]] + // CHECK: ret void + + + return res; +} + +// Test arithmetic operators with scalars. +// CHECK-LABEL: define void @"\01?scarithmetic +export vector scarithmetic(inout vector things[10], bool scales[10])[10] { + vector res[10]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]] + + // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[vec0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32> + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 0 + // CHECK: [[scl0:%[0-9]*]] = load i32, i32* [[add0]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl0]], i32 0 + // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res0:%[0-9]*]] = add <[[NUM]] x i32> [[spt0]], [[vec0]] + // CHECK: [[bres0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res0]], zeroinitializer + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bres0]] to <[[NUM]] x i32> + res[0] = things[0] + scales[0]; + + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[vec1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec1]] to <[[NUM]] x i32> + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 1 + // CHECK: [[scl1:%[0-9]*]] = load i32, i32* [[add1]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl1]], i32 0 + // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res1:%[0-9]*]] = sub <[[NUM]] x i32> [[vec1]], [[spt1]] + // CHECK: [[bres1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res1]], zeroinitializer + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + res[1] = things[1] - scales[1]; + + + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32> + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 2 + // CHECK: [[scl2:%[0-9]*]] = load i32, i32* [[add2]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl2]], i32 0 + // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res2:%[0-9]*]] = mul nuw <[[NUM]] x i32> [[spt2]], [[vec2]] + // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + res[2] = things[2] * scales[2]; + + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32> + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 3 + // CHECK: [[scl3:%[0-9]*]] = load i32, i32* [[add3]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl3]], i32 0 + // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res3:%[0-9]*]] = sdiv <[[NUM]] x i32> [[vec3]], [[spt3]] + // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer + // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + res[3] = things[3] / scales[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 4 + // CHECK: [[scl4:%[0-9]*]] = load i32, i32* [[add4]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl4]], i32 0 + // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32> + // CHECK: [[res4:%[0-9]*]] = add <[[NUM]] x i32> [[spt4]], [[vec4]] + // CHECK: [[bres4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res4]], zeroinitializer + // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32> + res[4] = scales[4] + things[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 5 + // CHECK: [[scl5:%[0-9]*]] = load i32, i32* [[add5]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl5]], i32 0 + // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32> + // CHECK: [[res5:%[0-9]*]] = sub <[[NUM]] x i32> [[spt5]], [[vec5]] + // CHECK: [[bres5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res5]], zeroinitializer + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32> + res[5] = scales[5] - things[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 6 + // CHECK: [[scl6:%[0-9]*]] = load i32, i32* [[add6]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl6]], i32 0 + // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer + // CHECK: [[vec6:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec6]] to <[[NUM]] x i32> + // CHECK: [[res6:%[0-9]*]] = mul nuw <[[NUM]] x i32> [[spt6]], [[vec6]] + // CHECK: [[bres6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res6]], zeroinitializer + // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32> + res[6] = scales[6] * things[6]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1 + // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2 + // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3 + // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6 + // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]] + // CHECK: ret void + + + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +// CHECK-LABEL: define void @"\01?logic +export vector logic(vector truth[10], vector consequences[10])[10] { + vector res[10]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32> + res[0] = !truth[0]; + + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]] + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]] + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + res[1] = truth[1] || truth[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]] + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]] + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + res[2] = truth[2] && truth[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]] + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]] + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // MORE STUFF + + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]] + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[bres4:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[bvec0]], [[bvec1]] + // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32> + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[bres5:%[0-9]*]] = icmp {{u?}}ne <[[NUM]] x i1> [[bvec1]], [[bvec2]] + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32> + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]] + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[bres6:%[0-9]*]] = icmp {{[osu]?}}lt <[[NUM]] x i1> [[bvec2]], [[bvec3]] + // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32> + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]] + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[bres7:%[0-9]*]] = icmp {{[osu]]?}}gt <[[NUM]] x i1> [[bvec3]], [[bvec4]] + // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[bres7]] to <[[NUM]] x i32> + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]] + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[bres8:%[0-9]*]] = icmp {{[osu]]?}}le <[[NUM]] x i1> [[bvec4]], [[bvec5]] + // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[bres8]] to <[[NUM]] x i32> + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]] + // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer + // CHECK: [[bres9:%[0-9]*]] = icmp {{[osu]?}}ge <[[NUM]] x i1> [[bvec5]], [[bvec6]] + // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[bres9]] to <[[NUM]] x i32> + res[9] = consequences[5] >= consequences[6]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6 + // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]] + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 7 + // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[add7]] + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 8 + // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[add8]] + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 9 + // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[add9]] + // CHECK: ret void + + return res; +} + +static const int Ix = 2; + +// Test indexing operators +// CHECK-LABEL: define void @"\01?index +export vector index(vector things[10], int i, bool val)[10] { + vector res[10]; + + // CHECK: [[res:%[0-9]*]] = alloca [10 x <[[NUM]] x i32>] + // CHECK: [[res0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> zeroinitializer, <[[NUM]] x i32>* [[res0]] + res[0] = 0; + + // CHECK: [[resi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 %i + // CHECK: store <[[NUM]] x i32> , <[[NUM]] x i32>* [[resi]] + res[i] = 1; + + // CHECK: [[res2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 2 + // CHECK: store <[[NUM]] x i32> , <[[NUM]] x i32>* [[res2]] + res[Ix] = true; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0 + // CHECK: [[thg0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[bthg0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thg0]], zeroinitializer + // CHECK: [[res3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 3 + // CHECK: [[thg0:%[0-9]*]] = zext <[[NUM]] x i1> [[bthg0]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[thg0]], <[[NUM]] x i32>* [[res3]] + res[3] = things[0]; + + // CHECK: [[addi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 %i + // CHECK: [[thgi:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[addi]] + // CHECK: [[bthgi:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thgi]], zeroinitializer + // CHECK: [[res4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 4 + // CHECK: [[thgi:%[0-9]*]] = zext <[[NUM]] x i1> [[bthgi]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[thgi]], <[[NUM]] x i32>* [[res4]] + res[4] = things[i]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2 + // CHECK: [[thg2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[bthg2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thg2]], zeroinitializer + // CHECK: [[res5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 5 + // CHECK: [[thg2:%[0-9]*]] = zext <[[NUM]] x i1> [[bthg2]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[thg2]], <[[NUM]] x i32>* [[res5]] + res[5] = things[Ix]; + // CHECK: ret void + return res; + +} + +// Test bit twiddling operators. +// CHECK-LABEL: define void @"\01?bittwiddlers +export void bittwiddlers(inout vector things[10]) { + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32> + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32> + // CHECK: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]] + // CHECK: [[bres1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res1]], zeroinitializer + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1 + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]] + things[1] = things[2] | things[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec4]], [[bvec3]] + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]] + things[2] = things[3] & things[4]; + + // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32> + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32> + // CHECK: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer + // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]] + things[3] = things[4] ^ things[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer + // CHECK: [[bres4:%[0-9]*]] = or <[[NUM]] x i1> [[bvec6]], [[bvec4]] + // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]] + things[4] |= things[6]; + + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]] + // CHECK: [[bvec7:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec7]], zeroinitializer + // CHECK: [[bres5:%[0-9]*]] = and <[[NUM]] x i1> [[bvec7]], [[bvec5]] + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]] + things[5] &= things[7]; + + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]] + // CHECK: [[bvec8:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec8]], zeroinitializer + // CHECK: [[bres6:%[0-9]*]] = xor <[[NUM]] x i1> [[bvec6]], [[bvec8]] + // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]] + things[6] ^= things[8]; + + // CHECK: ret void +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl new file mode 100644 index 0000000000..b749a3b255 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl @@ -0,0 +1,73 @@ +// RUN: %dxc -T lib_6_9 -DTYPE=uint -DNUM=5 %s | FileCheck %s --check-prefixes=CHECK,UNSIG +// RUN: %dxc -T lib_6_9 -DTYPE=int64_t -DNUM=3 %s | FileCheck %s --check-prefixes=CHECK,SIG +// RUN: %dxc -T lib_6_9 -DTYPE=uint16_t -DNUM=9 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,UNSIG + +// Test bitwise operators on an assortment vector sizes and integer types with 6.9 native vectors. + +// Test bit twiddling operators. +// CHECK-LABEL: define void @"\01?bittwiddlers +// CHECK-SAME: ([11 x <[[NUM:[0-9][0-9]*]] x [[TYPE:[a-z0-9]*]]>]* +export void bittwiddlers(inout vector things[11]) { + // CHECK: [[adr1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]] + // CHECK: [[res1:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec1]], <[[TYPE]] -1, + // CHECK: [[adr0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr0]] + things[0] = ~things[1]; + + // CHECK: [[adr2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + + // CHECK: [[adr3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]] + // CHECK: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr1]] + things[1] = things[2] | things[3]; + + // CHECK: [[adr4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]] + // CHECK: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]] + things[2] = things[3] & things[4]; + + // CHECK: [[adr5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // CHECK: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]] + things[3] = things[4] ^ things[5]; + + // CHECK: [[adr6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]] + // CHECK: [[shv6:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec6]], <[[TYPE]] + // CHECK: [[res4:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[vec5]], [[shv6]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]] + things[4] = things[5] << things[6]; + + // CHECK: [[adr7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]] + // CHECK: [[shv7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec7]], <[[TYPE]] + // UNSIG: [[res5:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]] + // SIG: [[res5:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]] + things[5] = things[6] >> things[7]; + + // CHECK: [[adr8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]] + // CHECK: [[res6:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec8]], [[vec6]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[adr6]] + things[6] |= things[8]; + + // CHECK: [[adr9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9 + // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]] + // CHECK: [[res7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec9]], [[vec7]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[adr7]] + things[7] &= things[9]; + + // CHECK: [[adr10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10 + // CHECK: [[vec10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr10]] + // CHECK: [[res8:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec8]], [[vec10]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[adr8]] + things[8] ^= things[10]; + + // CHECK: ret void +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl new file mode 100644 index 0000000000..8b12b96c80 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl @@ -0,0 +1,342 @@ +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int64_t %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL + +// Test relevant operators on an assortment bool vector sizes and types with 6.9 native vectors. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z0-9_]*]] +RWStructuredBuffer buf; + +export void assignments(inout TYPE things[10], TYPE scales[10]); +export TYPE arithmetic(inout TYPE things[11])[11]; +export bool logic(bool truth[10], TYPE consequences[10])[10]; +export TYPE index(TYPE things[10], int i, TYPE val)[10]; + +struct Interface { + TYPE assigned[10]; + TYPE arithmeticked[11]; + bool logicked[10]; + TYPE indexed[10]; + TYPE scales[10]; +}; + +#if 0 +// Requires vector loading support. Enable when available. +RWStructuredBuffer Input; +RWStructuredBuffer Output; + +TYPE g_val; + +[shader("compute")] +[numthreads(8,1,1)] +void main(uint GI : SV_GroupIndex) { + assignments(Output[GI].assigned, Input[GI].scales); + Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked); + Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned); + Output[GI].indexed = index(Input[GI].indexed, GI, g_val); +} +#endif + +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +// CHECK-LABEL: define void @"\01?assignments +export void assignments(inout TYPE things[10]) { + + // CHECK: [[buf:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle {{%.*}}, i32 1, i32 0, i8 1, i32 {{(8|4|2)}}) + // CHECK: [[res0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[buf]], 0 + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]] + things[0] = buf.Load(1); + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5 + // CHECK: [[val5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 1 + // CHECK: [[val1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[res1:%.*]] = [[ADD:f?add( fast| nsw)?]] [[TYPE]] [[val1]], [[val5]] + // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]] + things[1] += things[5]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 6 + // CHECK: [[val6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[val2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast| nsw)?]] [[TYPE]] [[val2]], [[val6]] + // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]] + things[2] -= things[6]; + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 7 + // CHECK: [[val7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 3 + // CHECK: [[val3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast| nsw)?]] [[TYPE]] [[val3]], [[val7]] + // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]] + things[3] *= things[7]; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 8 + // CHECK: [[val8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 4 + // CHECK: [[val4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast| nsw)?]] [[TYPE]] [[val4]], [[val8]] + // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]] + things[4] /= things[8]; + + // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 9 + // CHECK: [[val9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]] +#ifdef DBL + // DBL: [[fvec9:%.*]] = fptrunc double [[val9]] to float + // DBL: [[fvec5:%.*]] = fptrunc double [[val5]] to float + // DBL: [[fres5:%.*]] = [[REM:[ufs]?rem( fast| nsw)?]] float [[fvec5]], [[fvec9]] + // DBL: [[res5:%.*]] = fpext float [[fres5]] to double + float f9 = things[9]; + float f5 = things[5]; + f5 %= f9; + things[5] = f5; +#else + // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast| nsw)?]] [[TYPE]] [[val5]], [[val9]] + things[5] %= things[9]; +#endif + // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]] +} + +// Test arithmetic operators. +// CHECK-LABEL: define void @"\01?arithmetic +export TYPE arithmetic(inout TYPE things[11])[11] { + TYPE res[11]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: [[res0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[res1:%.*]] = [[SUB]] [[TYPE]] {{-?(0|0\.0*e\+0*|0xH8000)}}, [[res0]] + res[0] = +things[0]; + res[1] = -things[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1 + // CHECK: [[val1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[val2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[res2:%.*]] = [[ADD]] [[TYPE]] [[val2]], [[val1]] + res[2] = things[1] + things[2]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3 + // CHECK: [[val3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[res3:%.*]] = [[SUB]] [[TYPE]] [[val2]], [[val3]] + res[3] = things[2] - things[3]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4 + // CHECK: [[val4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[res4:%.*]] = [[MUL]] [[TYPE]] [[val4]], [[val3]] + res[4] = things[3] * things[4]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5 + // CHECK: [[val5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[res5:%.*]] = [[DIV]] [[TYPE]] [[val4]], [[val5]] + res[5] = things[4] / things[5]; + + // DBL: [[fvec5:%.*]] = fptrunc double [[val5]] to float + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6 + // CHECK: [[val6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] +#ifdef DBL + // DBL: [[fvec6:%.*]] = fptrunc double [[val6]] to float + // DBL: [[fres6:%.*]] = [[REM]] float [[fvec5]], [[fvec6]] + // DBL: [[res6:%.*]] = fpext float [[fres6]] to double + res[6] = (float)things[5] % (float)things[6]; +#else + // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[val5]], [[val6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7 + // CHECK: [[val7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]] + // CHECK: [[res7:%.*]] = [[ADD:f?add( fast| nsw)?]] [[TYPE]] [[val7]], {{(1|1\.?0*e?\+?0*|0xH3C00)}} + // CHECK: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]] + res[7] = things[7]++; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8 + // CHECK: [[val8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]] + // CHECK: [[res8:%.*]] = [[ADD]] [[TYPE]] [[val8]] + // CHECK: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]] + res[8] = things[8]--; + + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9 + // CHECK: [[val9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]] + // CHECK: [[res9:%.*]] = [[ADD]] [[TYPE]] [[val9]] + // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]] + res[9] = ++things[9]; + + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10 + // CHECK: [[val10:%.*]] = load [[TYPE]], [[TYPE]]* [[adr10]] + // CHECK: [[res10:%.*]] = [[ADD]] [[TYPE]] [[val10]] + // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]] + res[10] = --things[10]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 0 + // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 1 + // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 2 + // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 3 + // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 4 + // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 5 + // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 6 + // CHECK: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 7 + // This is a post op, so the original value goes into res[]. + // CHECK: store [[TYPE]] [[val7]], [[TYPE]]* [[adr7]] + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 8 + // This is a post op, so the original value goes into res[]. + // CHECK: store [[TYPE]] [[val8]], [[TYPE]]* [[adr8]] + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 9 + // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]] + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 10 + // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]] + // CHECK: ret void + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +// CHECK-LABEL: define void @"\01?logic +export bool logic(bool truth[10], TYPE consequences[10])[10] { + bool res[10]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0 + // CHECK: [[val0:%.*]] = load i32, i32* [[adr0]] + // CHECK: [[res0:%.*]] = xor i32 [[val0]], 1 + res[0] = !truth[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1 + // CHECK: [[val1:%.*]] = load i32, i32* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + // CHECK: [[val2:%.*]] = load i32, i32* [[adr2]] + // CHECK: [[res1:%.*]] = or i32 [[val2]], [[val1]] + res[1] = truth[1] || truth[2]; + + // CHECK: [[bvec2:%.*]] = icmp ne i32 [[val2]], 0 + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + // CHECK: [[val3:%.*]] = load i32, i32* [[adr3]] + // CHECK: [[bvec3:%.*]] = icmp ne i32 [[val3]], 0 + // CHECK: [[bres2:%.*]] = and i1 [[bvec2]], [[bvec3]] + // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32 + res[2] = truth[2] && truth[3]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4 + // CHECK: [[val4:%.*]] = load i32, i32* [[adr4]] + // CHECK: [[bvec4:%.*]] = icmp ne i32 [[val4]], 0 + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5 + // CHECK: [[val5:%.*]] = load i32, i32* [[adr5]] + // CHECK: [[bvec5:%.*]] = icmp ne i32 [[val5]], 0 + // CHECK: [[bres3:%.*]] = select i1 [[bvec3]], i1 [[bvec4]], i1 [[bvec5]] + // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32 + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 0 + // CHECK: [[val0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1 + // CHECK: [[val1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast| nsw)?]] {{o?}}eq [[TYPE]] [[val0]], [[val1]] + // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32 + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2 + // CHECK: [[val2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[val1]], [[val2]] + // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32 + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3 + // CHECK: [[val3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[val2]], [[val3]] + // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32 + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4 + // CHECK: [[val4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[val3]], [[val4]] + // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32 + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5 + // CHECK: [[val5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[val4]], [[val5]] + // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32 + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 6 + // CHECK: [[val6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[val5]], [[val6]] + // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32 + res[9] = consequences[5] >= consequences[6]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 0 + // CHECK: store i32 [[res0]], i32* [[adr0]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 1 + // CHECK: store i32 [[res1]], i32* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 2 + // CHECK: store i32 [[res2]], i32* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 3 + // CHECK: store i32 [[res3]], i32* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 4 + // CHECK: store i32 [[res4]], i32* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 5 + // CHECK: store i32 [[res5]], i32* [[adr5]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 6 + // CHECK: store i32 [[res6]], i32* [[adr6]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 7 + // CHECK: store i32 [[res7]], i32* [[adr7]] + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 8 + // CHECK: store i32 [[res8]], i32* [[adr8]] + // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 9 + // CHECK: store i32 [[res9]], i32* [[adr9]] + + // CHECK: ret void + return res; +} + +static const int Ix = 2; + +// Test indexing operators +// CHECK-LABEL: define void @"\01?index +export TYPE index(TYPE things[10], int i)[10] { + // CHECK: [[res:%.*]] = alloca [10 x [[TYPE]]] + TYPE res[10]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 0 + // CHECK: store [[TYPE]] {{(0|0*\.?0*e?\+?0*|0xH0000)}}, [[TYPE]]* [[adr0]] + res[0] = 0; + + // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 %i + // CHECK: store [[TYPE]] {{(1|1\.?0*e?\+?0*|0xH3C00)}}, [[TYPE]]* [[adri]] + res[i] = 1; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 2 + // CHECK: store [[TYPE]] {{(2|2\.?0*e?\+?0*|0xH4000)}}, [[TYPE]]* [[adr2]] + res[Ix] = 2; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: [[thg0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 3 + // CHECK: store [[TYPE]] [[thg0]], [[TYPE]]* [[adr3]] + res[3] = things[0]; + + // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 %i + // CHECK: [[thgi:%.*]] = load [[TYPE]], [[TYPE]]* [[adri]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 4 + // CHECK: store [[TYPE]] [[thgi]], [[TYPE]]* [[adr4]] + res[4] = things[i]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[thg2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 5 + // CHECK: store [[TYPE]] [[thg2]], [[TYPE]]* [[adr5]] + res[5] = things[Ix]; + // CHECK: ret void + return res; +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl new file mode 100644 index 0000000000..cb2fd5f781 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl @@ -0,0 +1,57 @@ +// RUN: %dxc -HV 2018 -T lib_6_9 %s | FileCheck %s +// RUN: %dxc -HV 2018 -T lib_6_9 %s | FileCheck %s --check-prefix=NOBR + +// Test that no short-circuiting takes place for logic ops with native vectors. +// First run verifies that side effects result in stores. +// Second runline just makes sure there are no branches nor phis at all. + +// NOBR-NOT: br i1 +// NOBR-NOT: = phi + +export int4 logic(inout bool4 truth[5], inout int4 consequences[4]) { + // CHECK: [[adr0:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 0 + // CHECK: [[vec0:%.*]] = load <4 x i32>, <4 x i32>* [[adr0]] + // CHECK: [[bvec0:%.*]] = icmp ne <4 x i32> [[vec0]], zeroinitializer + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load <4 x i32>, <4 x i32>* [[adr1]] + // CHECK: [[add:%.*]] = add <4 x i32> [[vec1]], + // CHECK: store <4 x i32> [[add]], <4 x i32>* [[adr1]] + // CHECK: [[bvec1:%.*]] = icmp ne <4 x i32> [[vec1]], zeroinitializer + // CHECK: [[bres3:%.*]] = or <4 x i1> [[bvec1]], [[bvec0]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 3 + // CHECK: [[res3:%.*]] = zext <4 x i1> [[bres3]] to <4 x i32> + // CHECK: store <4 x i32> [[res3]], <4 x i32>* [[adr3]] + truth[3] = truth[0] || consequences[1]++; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load <4 x i32>, <4 x i32>* [[adr1]] + // CHECK: [[bvec1:%.*]] = icmp ne <4 x i32> [[vec1]], zeroinitializer + // CHECK: [[adr0:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 0 + // CHECK: [[vec0:%.*]] = load <4 x i32>, <4 x i32>* [[adr0]] + // CHECK: [[sub:%.*]] = add <4 x i32> [[vec0]], + // CHECK: store <4 x i32> [[sub]], <4 x i32>* [[adr0]] + // CHECK: [[bvec0:%.*]] = icmp ne <4 x i32> [[vec0]], zeroinitializer + // CHECK: [[bres4:%.*]] = and <4 x i1> [[bvec0]], [[bvec1]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 4 + // CHECK: [[res4:%.*]] = zext <4 x i1> [[bres4]] to <4 x i32> + // CHECK: store <4 x i32> [[res4]], <4 x i32>* [[adr4]] + truth[4] = truth[1] && consequences[0]--; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <4 x i32>, <4 x i32>* [[adr2]] + // CHECK: [[bcond:%.*]] = icmp ne <4 x i32> [[vec2]], zeroinitializer + // CHECK: [[adr2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <4 x i32>, <4 x i32>* [[adr2]] + // CHECK: [[add:%.*]] = add <4 x i32> %25, + // CHECK: store <4 x i32> [[add]], <4 x i32>* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <4 x i32>, <4 x i32>* [[adr3]] + // CHECK: [[sub:%.*]] = add <4 x i32> [[vec3]], + // CHECK: store <4 x i32> [[sub]], <4 x i32>* [[adr3]] + // CHECK: [[res:%.*]] = select <4 x i1> [[bcond]], <4 x i32> [[vec2]], <4 x i32> [[vec3]] + int4 res = truth[2] ? consequences[2]++ : consequences[3]--; + + // CHECK: ret <4 x i32> %30 + return res; +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl new file mode 100644 index 0000000000..c366261406 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl @@ -0,0 +1,479 @@ +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float1 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int1 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double1 -DDBL %s | FileCheck %s --check-prefixes=CHECK +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t1 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t1 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t1 -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG + +// Test relevant operators on an assortment bool vector sizes and types with 6.9 native vectors. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[ELTY:[a-z0-9_]*]] +// CHECK: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:.*]] } +RWStructuredBuffer buf; + +export void assignments(inout TYPE things[10], TYPE scales[10]); +export TYPE arithmetic(inout TYPE things[11])[11]; +export bool logic(bool truth[10], TYPE consequences[10])[10]; +export TYPE index(TYPE things[10], int i, TYPE val)[10]; + +struct Interface { + TYPE assigned[10]; + TYPE arithmeticked[11]; + bool logicked[10]; + TYPE indexed[10]; + TYPE scales[10]; +}; + +#if 0 +// Requires vector loading support. Enable when available. +RWStructuredBuffer Input; +RWStructuredBuffer Output; + +TYPE g_val; + +[shader("compute")] +[numthreads(8,1,1)] +void main(uint GI : SV_GroupIndex) { + assignments(Output[GI].assigned, Input[GI].scales); + Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked); + Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned); + Output[GI].indexed = index(Input[GI].indexed, GI, g_val); +} +#endif + +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +// CHECK-LABEL: define void @"\01?assignments +export void assignments(inout TYPE things[10]) { + + // CHECK: [[buf:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle {{%.*}}, i32 1, i32 0, i8 1, i32 {{8|4|2}}) + // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[buf]], 0 + // CHECK: [[res0:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[val0]], i64 0 + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]] + things[0] = buf.Load(1); + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5 + // CHECK: [[ld5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[val5:%.*]] = extractelement [[TYPE]] [[ld5]], i32 0 + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 1 + // CHECK: [[ld1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[val1:%.*]] = extractelement [[TYPE]] [[ld1]], i32 0 + // CHECK: [[add1:%.*]] = [[ADD:f?add( fast)?]] [[ELTY]] [[val1]], [[val5]] + // CHECK: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add1]], i32 0 + // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]] + things[1] += things[5]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // CHECK: [[val6:%.*]] = extractelement [[TYPE]] [[ld6]], i32 0 + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[val2:%.*]] = extractelement [[TYPE]] [[ld2]], i32 0 + // CHECK: [[sub2:%.*]] = [[SUB:f?sub( fast)?]] [[ELTY]] [[val2]], [[val6]] + // CHECK: [[res2:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[sub2]], i32 0 + // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]] + things[2] -= things[6]; + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 7 + // CHECK: [[ld7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]] + // CHECK: [[val7:%.*]] = extractelement [[TYPE]] [[ld7]], i32 0 + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 3 + // CHECK: [[ld3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[val3:%.*]] = extractelement [[TYPE]] [[ld3]], i32 0 + // CHECK: [[mul3:%.*]] = [[MUL:f?mul( fast)?]] [[ELTY]] [[val3]], [[val7]] + // CHECK: [[res3:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[mul3]], i32 0 + // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]] + things[3] *= things[7]; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 8 + // CHECK: [[ld8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]] + // CHECK: [[val8:%.*]] = extractelement [[TYPE]] [[ld8]], i32 0 + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 4 + // CHECK: [[ld4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[val4:%.*]] = extractelement [[TYPE]] [[ld4]], i32 0 + // CHECK: [[div4:%.*]] = [[DIV:[ufs]?div( fast)?]] [[ELTY]] [[val4]], [[val8]] + // CHECK: [[res4:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[div4]], i32 0 + // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]] + things[4] /= things[8]; + +#ifndef DBL + // NODBL: [[adr9:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 9 + // NODBL: [[ld9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]] + // NODBL: [[val9:%.*]] = extractelement [[TYPE]] [[ld9]] + // NODBL: [[rem5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[ELTY]] [[val5]], [[val9]] + // NODBL: [[res5:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[rem5]], i32 0 + // NODBL: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]] + things[5] %= things[9]; +#endif +} + +// Test arithmetic operators. +// CHECK-LABEL: define void @"\01?arithmetic +export TYPE arithmetic(inout TYPE things[11])[11] { + TYPE res[11]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: [[res0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[val0:%.*]] = extractelement [[TYPE]] [[res0]], i32 0 + // CHECK: [[sub1:%.*]] = [[SUB]] [[ELTY]] {{-?(0|0\.?0*e?\+?0*|0xH8000)}}, [[val0]] + res[0] = +things[0]; + res[1] = -things[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1 + // CHECK: [[ld1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[val1:%.*]] = extractelement [[TYPE]] [[ld1]], i32 0 + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[val2:%.*]] = extractelement [[TYPE]] [[ld2]], i32 0 + // CHECK: [[add2:%.*]] = [[ADD]] [[ELTY]] [[val2]], [[val1]] + res[2] = things[1] + things[2]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3 + // CHECK: [[ld3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[val3:%.*]] = extractelement [[TYPE]] [[ld3]], i32 0 + // CHECK: [[sub3:%.*]] = [[SUB]] [[ELTY]] [[val2]], [[val3]] + res[3] = things[2] - things[3]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4 + // CHECK: [[ld4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[val4:%.*]] = extractelement [[TYPE]] [[ld4]], i32 0 + // CHECK: [[mul4:%.*]] = [[MUL]] [[ELTY]] [[val4]], [[val3]] + res[4] = things[3] * things[4]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5 + // CHECK: [[ld5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[val5:%.*]] = extractelement [[TYPE]] [[ld5]], i32 0 + // CHECK: [[div5:%.*]] = [[DIV]] [[ELTY]] [[val4]], [[val5]] + res[5] = things[4] / things[5]; + +#ifndef DBL + // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6 + // NODBL: [[ld6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // NODBL: [[val6:%.*]] = extractelement [[TYPE]] [[ld6]] + // NODBL: [[rem6:%.*]] = [[REM]] [[ELTY]] [[val5]], [[val6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7 + // CHECK: [[ld7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]] + // CHECK: [[val7:%.*]] = extractelement [[TYPE]] [[ld7]], i32 0 + // CHECK: [[add7:%.*]] = [[ADD]] [[ELTY]] [[val7]], [[POS1:(1|1\.0*e\+0*|0xH3C00)]] + // CHECK: [[res7:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add7]], i32 0 + // CHECK: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]] + res[7] = things[7]++; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8 + // CHECK: [[ld8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]] + // CHECK: [[val8:%.*]] = extractelement [[TYPE]] [[ld8]], i32 0 + // CHECK: [[add8:%.*]] = [[ADD]] [[ELTY]] [[val8]], [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]] + // CHECK: [[res8:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add8]], i32 0 + // CHECK: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]] + res[8] = things[8]--; + + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9 + // CHECK: [[ld9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]] + // CHECK: [[val9:%.*]] = extractelement [[TYPE]] [[ld9]], i32 0 + // CHECK: [[add9:%.*]] = [[ADD]] [[ELTY]] [[val9]], [[POS1]] + // CHECK: [[res9:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add9]], i32 0 + // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]] + res[9] = ++things[9]; + + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10 + // CHECK: [[ld10:%.*]] = load [[TYPE]], [[TYPE]]* [[adr10]] + // CHECK: [[val10:%.*]] = extractelement [[TYPE]] [[ld10]], i32 0 + // CHECK: [[add10:%.*]] = [[ADD]] [[ELTY]] [[val10]], [[NEG1]] + // CHECK: [[res10:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add10]], i32 0 + // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]] + res[10] = --things[10]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 0 + // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 1 + // CHECK: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[sub1]], i64 0 + // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 2 + // CHECK: [[res2:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add2]], i64 0 + // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 3 + // CHECK: [[res3:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[sub3]], i64 0 + // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 4 + // CHECK: [[res4:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[mul4]], i64 0 + // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 5 + // CHECK: [[res5:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[div5]], i64 0 + // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]] + // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 6 + // NODBL: [[res6:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[rem6]], i64 0 + // NODBL: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 7 + // This is a post op, so the original value goes into res[]. + // CHECK: store [[TYPE]] [[ld7]], [[TYPE]]* [[adr7]] + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 8 + // This is a post op, so the original value goes into res[]. + // CHECK: store [[TYPE]] [[ld8]], [[TYPE]]* [[adr8]] + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 9 + // CHECK: [[res9:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add9]], i64 0 + // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]] + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 10 + // CHECK: [[res10:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add10]], i64 0 + // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]] + // CHECK: ret void + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +// CHECK-LABEL: define void @"\01?logic +export bool logic(bool truth[10], TYPE consequences[10])[10] { + bool res[10]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0 + // CHECK: [[val0:%.*]] = load i32, i32* [[adr0]] + // CHECK: [[res0:%.*]] = xor i32 [[val0]], 1 + res[0] = !truth[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1 + // CHECK: [[val1:%.*]] = load i32, i32* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + // CHECK: [[val2:%.*]] = load i32, i32* [[adr2]] + // CHECK: [[res1:%.*]] = or i32 [[val2]], [[val1]] + res[1] = truth[1] || truth[2]; + + // CHECK: [[bval2:%.*]] = icmp ne i32 [[val2]], 0 + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + // CHECK: [[val3:%.*]] = load i32, i32* [[adr3]] + // CHECK: [[bval3:%.*]] = icmp ne i32 [[val3]], 0 + // CHECK: [[bres2:%.*]] = and i1 [[bval2]], [[bval3]] + // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32 + res[2] = truth[2] && truth[3]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4 + // CHECK: [[val4:%.*]] = load i32, i32* [[adr4]] + // CHECK: [[bval4:%.*]] = icmp ne i32 [[val4]], 0 + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5 + // CHECK: [[val5:%.*]] = load i32, i32* [[adr5]] + // CHECK: [[bval5:%.*]] = icmp ne i32 [[val5]], 0 + // CHECK: [[bres3:%.*]] = select i1 [[bval3]], i1 [[bval4]], i1 [[bval5]] + // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32 + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 0 + // CHECK: [[ld0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[val0:%.*]] = extractelement [[TYPE]] [[ld0]], i32 0 + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1 + // CHECK: [[ld1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[val1:%.*]] = extractelement [[TYPE]] [[ld1]], i32 0 + // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[ELTY]] [[val0]], [[val1]] + // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32 + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2 + // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[val2:%.*]] = extractelement [[TYPE]] [[ld2]], i32 0 + // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[ELTY]] [[val1]], [[val2]] + // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32 + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3 + // CHECK: [[ld3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[val3:%.*]] = extractelement [[TYPE]] [[ld3]], i32 0 + // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[ELTY]] [[val2]], [[val3]] + // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32 + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4 + // CHECK: [[ld4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[val4:%.*]] = extractelement [[TYPE]] [[ld4]], i32 0 + // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[ELTY]] [[val3]], [[val4]] + // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32 + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5 + // CHECK: [[ld5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[val5:%.*]] = extractelement [[TYPE]] [[ld5]], i32 0 + // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[ELTY]] [[val4]], [[val5]] + // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32 + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // CHECK: [[val6:%.*]] = extractelement [[TYPE]] [[ld6]], i32 0 + // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[ELTY]] [[val5]], [[val6]] + // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32 + res[9] = consequences[5] >= consequences[6]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 0 + // CHECK: store i32 [[res0]], i32* [[adr0]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 1 + // CHECK: store i32 [[res1]], i32* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 2 + // CHECK: store i32 [[res2]], i32* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 3 + // CHECK: store i32 [[res3]], i32* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 4 + // CHECK: store i32 [[res4]], i32* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 5 + // CHECK: store i32 [[res5]], i32* [[adr5]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 6 + // CHECK: store i32 [[res6]], i32* [[adr6]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 7 + // CHECK: store i32 [[res7]], i32* [[adr7]] + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 8 + // CHECK: store i32 [[res8]], i32* [[adr8]] + // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 9 + // CHECK: store i32 [[res9]], i32* [[adr9]] + + // CHECK: ret void + return res; +} + +static const int Ix = 2; + +// Test indexing operators +// CHECK-LABEL: define void @"\01?index +export TYPE index(TYPE things[10], int i)[10] { + // CHECK: [[res:%.*]] = alloca [10 x [[ELTY]]] + TYPE res[10]; + + // CHECK: [[res0:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 0 + // CHECK: store [[ELTY]] {{(0|0*\.?0*e?\+?0*|0xH0000)}}, [[ELTY]]* [[res0]] + res[0] = 0; + + // CHECK: [[adri:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 %i + // CHECK: store [[ELTY]] [[POS1]], [[ELTY]]* [[adri]] + res[i] = 1; + + // CHECK: [[adr2:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 2 + // CHECK: store [[ELTY]] {{(2|2\.?0*e?\+?0*|0xH4000)}}, [[ELTY]]* [[adr2]] + res[Ix] = 2; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: [[ld0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[adr3:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 3 + // CHECK: [[thg0:%.*]] = extractelement [[TYPE]] [[ld0]], i64 0 + // CHECK: store [[ELTY]] [[thg0]], [[ELTY]]* [[adr3]] + res[3] = things[0]; + + // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 %i + // CHECK: [[ldi:%.*]] = load [[TYPE]], [[TYPE]]* [[adri]] + // CHECK: [[adr4:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 4 + // CHECK: [[thgi:%.*]] = extractelement [[TYPE]] [[ldi]], i64 0 + // CHECK: store [[ELTY]] [[thgi]], [[ELTY]]* [[adr4]] + res[4] = things[i]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[adr5:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 5 + // CHECK: [[thg2:%.*]] = extractelement [[TYPE]] [[ld2]], i64 0 + // CHECK: store [[ELTY]] [[thg2]], [[ELTY]]* [[adr5]] + res[5] = things[Ix]; + // CHECK: ret void + return res; +} + +#ifdef INT +// Test bit twiddling operators. +// INT-LABEL: define void @"\01?bittwiddlers +export void bittwiddlers(inout TYPE things[13]) { + // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 1 + // INT: [[ld1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // INT: [[val1:%[0-9]*]] = extractelement [[TYPE]] [[ld1]], i32 0 + // INT: [[xor1:%[0-9]*]] = xor [[ELTY]] [[val1]], -1 + // INT: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor1]], i32 0 + // INT: [[adr0:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 0 + // INT: store [[TYPE]] [[res1]], [[TYPE]]* [[adr0]] + things[0] = ~things[1]; + + // INT: [[adr2:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 2 + // INT: [[ld2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // INT: [[val2:%[0-9]*]] = extractelement [[TYPE]] [[ld2]], i32 0 + // INT: [[adr3:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 3 + // INT: [[ld3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // INT: [[val3:%[0-9]*]] = extractelement [[TYPE]] [[ld3]], i32 0 + // INT: [[or1:%[0-9]*]] = or [[ELTY]] [[val3]], [[val2]] + // INT: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[or1]], i32 0 + // INT: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]] + things[1] = things[2] | things[3]; + + // INT: [[adr4:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 4 + // INT: [[ld4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // INT: [[val4:%[0-9]*]] = extractelement [[TYPE]] [[ld4]], i32 0 + // INT: [[and2:%[0-9]*]] = and [[ELTY]] [[val4]], [[val3]] + // INT: [[res2:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[and2]], i32 0 + // INT: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]] + things[2] = things[3] & things[4]; + + // INT: [[adr5:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 5 + // INT: [[ld5:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // INT: [[val5:%[0-9]*]] = extractelement [[TYPE]] [[ld5]], i32 0 + // INT: [[xor3:%[0-9]*]] = xor [[ELTY]] [[val5]], [[val4]] + // INT: [[res3:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor3]], i32 0 + // INT: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]] + things[3] = things[4] ^ things[5]; + + // INT: [[adr6:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 6 + // INT: [[ld6:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // INT: [[val6:%[0-9]*]] = extractelement [[TYPE]] [[ld6]], i32 0 + // INT: [[shv6:%[0-9]*]] = and [[ELTY]] [[val6]] + // INT: [[shl4:%[0-9]*]] = shl [[ELTY]] [[val5]], [[shv6]] + // INT: [[res4:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shl4]], i32 0 + // INT: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]] + things[4] = things[5] << things[6]; + + // INT: [[adr7:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 7 + // INT: [[ld7:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr7]] + // INT: [[val7:%[0-9]*]] = extractelement [[TYPE]] [[ld7]], i32 0 + // INT: [[shv7:%[0-9]*]] = and [[ELTY]] [[val7]] + // UNSIG: [[shr5:%[0-9]*]] = lshr [[ELTY]] [[val6]], [[shv7]] + // SIG: [[shr5:%[0-9]*]] = ashr [[ELTY]] [[val6]], [[shv7]] + // INT: [[res5:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shr5]], i32 0 + // INT: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]] + things[5] = things[6] >> things[7]; + + // INT: [[adr8:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 8 + // INT: [[ld8:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr8]] + // INT: [[val8:%[0-9]*]] = extractelement [[TYPE]] [[ld8]], i32 0 + // INT: [[or6:%[0-9]*]] = or [[ELTY]] [[val8]], [[val6]] + // INT: [[res6:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[or6]], i32 0 + // INT: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]] + things[6] |= things[8]; + + // INT: [[adr9:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 9 + // INT: [[ld9:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr9]] + // INT: [[val9:%[0-9]*]] = extractelement [[TYPE]] [[ld9]], i32 0 + // INT: [[and7:%[0-9]*]] = and [[ELTY]] [[val9]], [[val7]] + // INT: [[res7:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[and7]], i32 0 + // INT: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]] + things[7] &= things[9]; + + // INT: [[adr10:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 10 + // INT: [[ld10:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr10]] + // INT: [[val10:%[0-9]*]] = extractelement [[TYPE]] [[ld10]], i32 0 + // INT: [[xor8:%[0-9]*]] = xor [[ELTY]] [[val10]], [[val8]] + // INT: [[res8:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor8]], i32 0 + // INT: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]] + things[8] ^= things[10]; + + // INT: [[adr11:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 11 + // INT: [[ld11:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr11]] + // INT: [[val11:%[0-9]*]] = extractelement [[TYPE]] [[ld11]], i32 0 + // INT: [[shv11:%[0-9]*]] = and [[ELTY]] [[val11]] + // INT: [[shl9:%[0-9]*]] = shl [[ELTY]] [[val9]], [[shv11]] + // INT: [[res9:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shl9]], i32 0 + // INT: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]] + things[9] <<= things[11]; + + // INT: [[adr12:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 12 + // INT: [[ld12:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr12]] + // INT: [[val12:%[0-9]*]] = extractelement [[TYPE]] [[ld12]], i32 0 + // INT: [[shv12:%[0-9]*]] = and [[ELTY]] [[val12]] + // UNSIG: [[shr10:%[0-9]*]] = lshr [[ELTY]] [[val10]], [[shv12]] + // SIG: [[shr10:%[0-9]*]] = ashr [[ELTY]] [[val10]], [[shv12]] + // INT: [[res10:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shr10]], i32 0 + // INT: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]] + things[10] >>= things[12]; + + // INT: ret void +} +#endif // INT diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl new file mode 100644 index 0000000000..ed7a2bff25 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl @@ -0,0 +1,581 @@ +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=3 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=4 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=5 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=6 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=7 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=8 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=9 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=10 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=11 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=12 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=13 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=14 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=15 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=16 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=18 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=128 %s | FileCheck %s --check-prefixes=CHECK,NODBL + +// Less exhaustive testing for some other types. +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int -DNUM=2 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint -DNUM=5 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DNUM=3 -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DNUM=9 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -DNUM=17 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t -DNUM=177 -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG + +// Test relevant operators on an assortment vector sizes and types with 6.9 native vectors. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// Uses non vector buffer to avoid interacting with that implementation. +// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z_0-9]*]] + +RWStructuredBuffer< TYPE > buf; + +export void assignments(inout vector things[10], TYPE scales[10]); +export vector arithmetic(inout vector things[11])[11]; +export vector scarithmetic(inout vector things[10], TYPE scales[10])[10]; +export vector logic(vector truth[10], vector consequences[10])[10]; +export vector index(vector things[10], int i, TYPE val)[10]; + +struct Interface { + vector assigned[10]; + vector arithmeticked[11]; + vector scarithmeticked[10]; + vector logicked[10]; + vector indexed[10]; + TYPE scales[10]; +}; + +#if 0 +// Requires vector loading support. Enable when available. +RWStructuredBuffer Input; +RWStructuredBuffer Output; + +TYPE g_val; + +[shader("compute")] +[numthreads(8,1,1)] +void main(uint GI : SV_GroupIndex) { + assignments(Output[GI].assigned, Input[GI].scales); + Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked); + Output[GI].scarithmeticked = scarithmetic(Input[GI].scarithmeticked, Input[GI].scales); + Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned); + Output[GI].indexed = index(Input[GI].indexed, GI, g_val); +} +#endif + +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +// CHECK-LABEL: define void @"\01?assignments +export void assignments(inout vector things[10], TYPE scales[10]) { + + // Another trick to capture the size. + // CHECK: [[res:%[0-9]*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle %{{[^,]*}}, i32 [[NUM:[0-9]*]] + // CHECK: [[scl:%[0-9]*]] = extractvalue %dx.types.ResRet.[[TY]] [[res]], 0 + TYPE scalar = buf.Load(NUM); + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl]], i32 0 + // CHECK: [[res0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]] + things[0] = scalar; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[res1:%[0-9]*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec5]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]] + things[1] += things[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[res2:%[0-9]*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]] + things[2] -= things[6]; + + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[res3:%[0-9]*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec7]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]] + things[3] *= things[7]; + + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[res4:%[0-9]*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]] + things[4] /= things[8]; + + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9 + // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add9]] +#ifdef DBL + // DBL can't use remainder operator, do something anyway to keep the rest consistent. + // DBL: [[fvec9:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec9]] to <[[NUM]] x float> + // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float> + // DBL: [[fres5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x float> [[fvec5]], [[fvec9]] + // DBL: [[res5:%[0-9]*]] = fpext <[[NUM]] x float> [[fres5]] to <[[NUM]] x double> + vector f9 = things[9]; + vector f5 = things[5]; + f5 %= f9; + things[5] = f5; +#else + // NODBL: [[res5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]] + things[5] %= things[9]; +#endif + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]] + + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 1 + // CHECK: [[scl1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add1]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0 + // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res6:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt1]], [[vec6]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]] + things[6] += scales[1]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 2 + // CHECK: [[scl2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add2]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0 + // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res7:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec7]], [[spt2]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[add7]] + things[7] -= scales[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 3 + // CHECK: [[scl3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add3]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0 + // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res8:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt3]], [[vec8]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[add8]] + things[8] *= scales[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 4 + // CHECK: [[scl4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add4]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0 + // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res9:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec9]], [[spt4]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]] + things[9] /= scales[4]; + +} + +// Test arithmetic operators. +// CHECK-LABEL: define void @"\01?arithmetic +export vector arithmetic(inout vector things[11])[11] { + vector res[11]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: [[res1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]] + res[0] = -things[0]; + res[1] = +things[0]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[res2:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec1]] + res[2] = things[1] + things[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[res3:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + res[3] = things[2] - things[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[res4:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]] + res[4] = things[3] * things[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[res5:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + res[5] = things[4] / things[5]; + + // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float> + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] +#ifdef DBL + // DBL can't use remainder operator, do something anyway to keep the rest consistent. + // DBL: [[fvec6:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec6]] to <[[NUM]] x float> + // DBL: [[fres6:%[0-9]*]] = [[REM]] <[[NUM]] x float> [[fvec5]], [[fvec6]] + // DBL: [[res6:%[0-9]*]] = fpext <[[NUM]] x float> [[fres6]] to <[[NUM]] x double> + res[6] = (vector)things[5] % (vector)things[6]; +#else + // NODBL: [[res6:%[0-9]*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]] + // CHECK: [[res7:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[add7]] + res[7] = things[7]++; + + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]] + // CHECK: [[res8:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[add8]] + res[8] = things[8]--; + + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9 + // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add9]] + // CHECK: [[res9:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]] + res[9] = ++things[9]; + + // CHECK: [[add10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10 + // CHECK: [[vec10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add10]] + // CHECK: [[res10:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[add10]] + res[10] = --things[10]; + + // Stores into res[]. Previous were for things[] inout. + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 1 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 6 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]] + // These two were post ops, so the original value goes into res[]. + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 7 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[add7]] + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 8 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[add8]] + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 9 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]] + // CHECK: [[add10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 10 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[add10]] + // CHECK: ret void + + + return res; +} + +// Test arithmetic operators with scalars. +// CHECK-LABEL: define void @"\01?scarithmetic +export vector scarithmetic(inout vector things[10], TYPE scales[10])[10] { + vector res[10]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 0 + // CHECK: [[scl0:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add0]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0 + // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res0:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt0]], [[vec0]] + res[0] = things[0] + scales[0]; + + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 1 + // CHECK: [[scl1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add1]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0 + // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res1:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec1]], [[spt1]] + res[1] = things[1] - scales[1]; + + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 2 + // CHECK: [[scl2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add2]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0 + // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res2:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt2]], [[vec2]] + res[2] = things[2] * scales[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 3 + // CHECK: [[scl3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add3]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0 + // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res3:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec3]], [[spt3]] + res[3] = things[3] / scales[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 4 + // CHECK: [[scl4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add4]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0 + // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res4:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt4]], [[vec4]] + res[4] = scales[4] + things[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 5 + // CHECK: [[scl5:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add5]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl5]], i32 0 + // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res5:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[spt5]], [[vec5]] + res[5] = scales[5] - things[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 6 + // CHECK: [[scl6:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add6]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl6]], i32 0 + // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res6:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt6]], [[vec6]] + res[6] = scales[6] * things[6]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 1 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 6 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: ret void + + + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +// CHECK-LABEL: define void @"\01?logic +export vector logic(vector truth[10], vector consequences[10])[10] { + vector res[10]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32> + res[0] = !truth[0]; + + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]] + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]] + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + res[1] = truth[1] || truth[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]] + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]] + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + res[2] = truth[2] && truth[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]] + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]] + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[bres3:%[0-9]*]] = select <[[NUM]] x i1> [[bvec3]], <[[NUM]] x i1> [[bvec4]], <[[NUM]] x i1> [[bvec5]] + // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[cmp4:%[0-9]*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]] + // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp4]] to <[[NUM]] x i32> + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[cmp5:%[0-9]*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]] + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp5]] to <[[NUM]] x i32> + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[cmp6:%[0-9]*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp6]] to <[[NUM]] x i32> + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[cmp7:%[0-9]*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]] + // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp7]] to <[[NUM]] x i32> + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[cmp8:%[0-9]*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp8]] to <[[NUM]] x i32> + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: [[cmp9:%[0-9]*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp9]] to <[[NUM]] x i32> + res[9] = consequences[5] >= consequences[6]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1 + // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2 + // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3 + // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6 + // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]] + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 7 + // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[add7]] + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 8 + // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[add8]] + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 9 + // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[add9]] + // CHECK: ret void + + return res; +} + +static const int Ix = 2; + +// Test indexing operators +// CHECK-LABEL: define void @"\01?index +export vector index(vector things[10], int i, TYPE val)[10] { + vector res[10]; + + // CHECK: [[res:%[0-9]*]] = alloca [10 x <[[NUM]] x [[TYPE]]>] + // CHECK: [[res0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[res0]] + res[0] = 0; + + // CHECK: [[resi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 %i + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(1|0xH3C00).*}}>, <[[NUM]] x [[TYPE]]>* [[resi]] + res[i] = 1; + + // CHECK: [[res2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(2|0xH4000).*}}>, <[[NUM]] x [[TYPE]]>* [[res2]] + res[Ix] = 2; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: [[thg0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[res3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[thg0]], <[[NUM]] x [[TYPE]]>* [[res3]] + res[3] = things[0]; + + // CHECK: [[addi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 %i + // CHECK: [[thgi:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[addi]] + // CHECK: [[res4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[thgi]], <[[NUM]] x [[TYPE]]>* [[res4]] + res[4] = things[i]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[thg2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[res5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[thg2]], <[[NUM]] x [[TYPE]]>* [[res5]] + res[5] = things[Ix]; + // CHECK: ret void + return res; +} + +#ifdef INT +// Test bit twiddling operators. +// INT-LABEL: define void @"\01?bittwiddlers +export void bittwiddlers(inout vector things[13]) { + // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // INT: [[ld1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]] + // INT: [[res1:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[ld1]], <[[TYPE]] -1 + // INT: [[adr0:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // INT: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr0]] + things[0] = ~things[1]; + + // INT: [[adr2:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // INT: [[ld2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + // INT: [[adr3:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // INT: [[ld3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]] + // INT: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[ld3]], [[ld2]] + // INT: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr1]] + things[1] = things[2] | things[3]; + + // INT: [[adr4:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // INT: [[ld4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]] + // INT: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld4]], [[ld3]] + // INT: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]] + things[2] = things[3] & things[4]; + + // INT: [[adr5:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // INT: [[ld5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // INT: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[ld4]], [[ld5]] + // INT: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]] + things[3] = things[4] ^ things[5]; + + // INT: [[adr6:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // INT: [[ld6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]] + // INT: [[shv6:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld6]] + // INT: [[res4:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[ld5]], [[shv6]] + // INT: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]] + things[4] = things[5] << things[6]; + + // INT: [[adr7:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // INT: [[ld7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]] + // INT: [[shv7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld7]] + // UNSIG: [[res5:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[ld6]], [[shv7]] + // SIG: [[res5:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[ld6]], [[shv7]] + // INT: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]] + things[5] = things[6] >> things[7]; + + // INT: [[adr8:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // INT: [[ld8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]] + // INT: [[res6:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[ld8]], [[ld6]] + // INT: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[adr6]] + things[6] |= things[8]; + + // INT: [[adr9:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9 + // INT: [[ld9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]] + // INT: [[res7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld9]], [[ld7]] + // INT: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[adr7]] + things[7] &= things[9]; + + // INT: [[adr10:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10 + // INT: [[ld10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr10]] + // INT: [[res8:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[ld8]], [[ld10]] + // INT: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[adr8]] + things[8] ^= things[10]; + + // INT: [[adr11:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 11 + // INT: [[ld11:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr11]] + // INT: [[shv11:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld11]] + // INT: [[res9:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[ld9]], [[shv11]] + // INT: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[adr9]] + things[9] <<= things[11]; + + // INT: [[adr12:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 12 + // INT: [[ld12:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr12]] + // INT: [[shv12:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld12]] + // UNSIG: [[res10:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[ld10]], [[shv12]] + // SIG: [[res10:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[ld10]], [[shv12]] + // INT: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[adr10]] + things[10] >>= things[12]; + + // INT: ret void +} +#endif // INT diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll new file mode 100644 index 0000000000..987f997a2a --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll @@ -0,0 +1,269 @@ +; RUN: %dxopt %s -dynamic-vector-to-array,ReplaceAllVectors=0 -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.VectRec1 = type { <1 x float> } +%struct.VectRec2 = type { <2 x float> } + +; Vec2s should be preserved. +; CHECK-DAG: @dyglob2 = internal global <2 x float> zeroinitializer, align 4 +; CHECK-DAG: @dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +; CHECK-DAG: @dygrec2.0 = internal global <2 x float> zeroinitializer, align 4 + +; CHECK-DAG: @stgrec2.0 = internal global <2 x float> zeroinitializer, align 4 +; CHECK-DAG: @stglob2 = internal global <2 x float> zeroinitializer, align 4 +; CHECK-DAG: @stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 + +; Dynamic Vec1s should be reduced. +; CHECK-DAG: @dygar1.v = internal global [2 x [1 x float]] zeroinitializer, align 4 +; CHECK-DAG: @dygrec1.0.v = internal global [1 x float] zeroinitializer, align 4 +; CHECK-DAG: @dyglob1.v = internal global [1 x float] zeroinitializer, align 4 + +; These static accessed Vec1s were already reduced by SROA +; CHECK-DAG: @stgar1.0 = internal global [2 x float] zeroinitializer, align 4 +; CHECK-DAG: @stglob1.0 = internal global float 0.000000e+00, align 4 +; CHECK-DAG: @stgrec1.0.0 = internal global float 0.000000e+00, align 4 + +@dyglob1 = internal global <1 x float> zeroinitializer, align 4 +@dyglob2 = internal global <2 x float> zeroinitializer, align 4 +@stglob2 = internal global <2 x float> zeroinitializer, align 4 +@dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4 +@dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +@stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +@dygrec2.0 = internal global <2 x float> zeroinitializer, align 4 +@stgrec2.0 = internal global <2 x float> zeroinitializer, align 4 +@stgar1.0 = internal global [2 x float] zeroinitializer, align 4 +@dygrec1.0 = internal global <1 x float> zeroinitializer, align 4 +@stglob1.0 = internal global float 0.000000e+00, align 4 +@stgrec1.0.0 = internal global float 0.000000e+00, align 4 + +; Function Attrs: nounwind +; CHECK-LOCAL: define <4 x float> @"\01?tester +define <4 x float> @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z"(i32 %ix, [12 x float]* %vals) #0 { +bb: + ; Vec2s are preserved. + ; CHECK-DAG: %dyloc2 = alloca <2 x float> + ; CHECK-DAG: %dylar2 = alloca [4 x <2 x float>] + ; CHECK-DAG: %dylorc2.0 = alloca <2 x float> + + ; CHECK-DAG: %stloc2 = alloca <2 x float> + ; CHECK-DAG: %stlar2 = alloca [4 x <2 x float>] + ; CHECK-DAG: %stlorc2.0 = alloca <2 x float> + + ; Statics vec1s are unaltered by dynamic vector to array. + ; CHECK-DAG: %stloc1 = alloca <1 x float> + ; CHECK-DAG: %stlar1.0 = alloca [3 x float] + ; CHECK-DAG: %stlorc1.0 = alloca <1 x float> + + ; Dynamic vec1s are removed and lose their names. + ; CHECK-DAG: alloca [1 x float] + ; CHECK-DAG: alloca [3 x [1 x float]] + ; CHECK-DAG: alloca [1 x float] + + %dylorc1.0 = alloca <1 x float> + %stlorc1.0 = alloca <1 x float> + %dylorc2.0 = alloca <2 x float> + %stlorc2.0 = alloca <2 x float> + %stlar1.0 = alloca [3 x float] + %tmp = alloca i32, align 4 + %dyloc1 = alloca <1 x float>, align 4 + %dyloc2 = alloca <2 x float>, align 4 + %dylar1 = alloca [3 x <1 x float>], align 4 + %dylar2 = alloca [4 x <2 x float>], align 4 + %stloc1 = alloca <1 x float>, align 4 + %stloc2 = alloca <2 x float>, align 4 + %stlar2 = alloca [4 x <2 x float>], align 4 + store i32 %ix, i32* %tmp, align 4 + + %tmp13 = load i32, i32* %tmp, align 4 ; line:53 col:7 + %tmp14 = icmp sgt i32 %tmp13, 0 ; line:53 col:10 + %tmp15 = icmp ne i1 %tmp14, false ; line:53 col:10 + %tmp16 = icmp ne i1 %tmp15, false ; line:53 col:10 + br i1 %tmp16, label %bb17, label %bb76 ; line:53 col:7 + +bb17: ; preds = %bb + %tmp18 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 0 ; line:54 col:30 + %tmp19 = load float, float* %tmp18, align 4 ; line:54 col:30 + %tmp20 = load i32, i32* %tmp, align 4 ; line:54 col:24 + %tmp21 = getelementptr <1 x float>, <1 x float>* %dyloc1, i32 0, i32 %tmp20 ; line:54 col:17 + store float %tmp19, float* %tmp21 ; line:54 col:28 + %tmp22 = getelementptr <1 x float>, <1 x float>* %stloc1, i32 0, i32 0 ; line:54 col:5 + store float %tmp19, float* %tmp22 ; line:54 col:15 + %tmp23 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 1 ; line:55 col:30 + %tmp24 = load float, float* %tmp23, align 4 ; line:55 col:30 + %tmp25 = load i32, i32* %tmp, align 4 ; line:55 col:24 + %tmp26 = getelementptr <2 x float>, <2 x float>* %dyloc2, i32 0, i32 %tmp25 ; line:55 col:17 + store float %tmp24, float* %tmp26 ; line:55 col:28 + %tmp27 = getelementptr <2 x float>, <2 x float>* %stloc2, i32 0, i32 1 ; line:55 col:5 + store float %tmp24, float* %tmp27 ; line:55 col:15 + %tmp28 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 2 ; line:56 col:37 + %tmp29 = load float, float* %tmp28, align 4 ; line:56 col:37 + %tmp30 = load i32, i32* %tmp, align 4 ; line:56 col:27 + %tmp31 = load i32, i32* %tmp, align 4 ; line:56 col:31 + %tmp32 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp30, i32 %tmp31 ; line:56 col:20 + store float %tmp29, float* %tmp32 ; line:56 col:35 + %tmp33 = getelementptr inbounds [3 x float], [3 x float]* %stlar1.0, i32 0, i32 1 ; line:56 col:5 + store float %tmp29, float* %tmp33 ; line:56 col:18 + %tmp34 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 3 ; line:57 col:37 + %tmp35 = load float, float* %tmp34, align 4 ; line:57 col:37 + %tmp36 = load i32, i32* %tmp, align 4 ; line:57 col:27 + %tmp37 = load i32, i32* %tmp, align 4 ; line:57 col:31 + %tmp38 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp36, i32 %tmp37 ; line:57 col:20 + store float %tmp35, float* %tmp38 ; line:57 col:35 + %tmp39 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 1, i32 0 ; line:57 col:5 + store float %tmp35, float* %tmp39 ; line:57 col:18 + %tmp40 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 4 ; line:58 col:36 + %tmp41 = load float, float* %tmp40, align 4 ; line:58 col:36 + %tmp42 = load i32, i32* %tmp, align 4 ; line:58 col:30 + %tmp43 = getelementptr inbounds <1 x float>, <1 x float>* %dylorc1.0, i32 0, i32 %tmp42 ; line:58 col:20 + store float %tmp41, float* %tmp43 ; line:58 col:34 + %tmp44 = getelementptr inbounds <1 x float>, <1 x float>* %stlorc1.0, i32 0, i32 0 ; line:58 col:5 + store float %tmp41, float* %tmp44 ; line:58 col:18 + %tmp45 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 5 ; line:59 col:36 + %tmp46 = load float, float* %tmp45, align 4 ; line:59 col:36 + %tmp47 = load i32, i32* %tmp, align 4 ; line:59 col:30 + %tmp48 = getelementptr inbounds <2 x float>, <2 x float>* %dylorc2.0, i32 0, i32 %tmp47 ; line:59 col:20 + store float %tmp46, float* %tmp48 ; line:59 col:34 + %tmp49 = getelementptr inbounds <2 x float>, <2 x float>* %stlorc2.0, i32 0, i32 1 ; line:59 col:5 + store float %tmp46, float* %tmp49 ; line:59 col:18 + %tmp50 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 6 ; line:61 col:32 + %tmp51 = load float, float* %tmp50, align 4 ; line:61 col:32 + %tmp52 = load i32, i32* %tmp, align 4 ; line:61 col:26 + %tmp53 = getelementptr <1 x float>, <1 x float>* @dyglob1, i32 0, i32 %tmp52 ; line:61 col:18 + store float %tmp51, float* %tmp53 ; line:61 col:30 + store float %tmp51, float* @stglob1.0 ; line:61 col:16 + %tmp54 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 7 ; line:62 col:32 + %tmp55 = load float, float* %tmp54, align 4 ; line:62 col:32 + %tmp56 = load i32, i32* %tmp, align 4 ; line:62 col:26 + %tmp57 = getelementptr <2 x float>, <2 x float>* @dyglob2, i32 0, i32 %tmp56 ; line:62 col:18 + store float %tmp55, float* %tmp57 ; line:62 col:30 + store float %tmp55, float* getelementptr inbounds (<2 x float>, <2 x float>* @stglob2, i32 0, i32 1) ; line:62 col:16 + %tmp58 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 8 ; line:63 col:37 + %tmp59 = load float, float* %tmp58, align 4 ; line:63 col:37 + %tmp60 = load i32, i32* %tmp, align 4 ; line:63 col:27 + %tmp61 = load i32, i32* %tmp, align 4 ; line:63 col:31 + %tmp62 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp60, i32 %tmp61 ; line:63 col:20 + store float %tmp59, float* %tmp62 ; line:63 col:35 + store float %tmp59, float* getelementptr inbounds ([2 x float], [2 x float]* @stgar1.0, i32 0, i32 1) ; line:63 col:18 + %tmp63 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 9 ; line:64 col:37 + %tmp64 = load float, float* %tmp63, align 4 ; line:64 col:37 + %tmp65 = load i32, i32* %tmp, align 4 ; line:64 col:27 + %tmp66 = load i32, i32* %tmp, align 4 ; line:64 col:31 + %tmp67 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp65, i32 %tmp66 ; line:64 col:20 + store float %tmp64, float* %tmp67 ; line:64 col:35 + store float %tmp64, float* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 1, i32 1) ; line:64 col:18 + %tmp68 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 10 ; line:65 col:36 + %tmp69 = load float, float* %tmp68, align 4 ; line:65 col:36 + %tmp70 = load i32, i32* %tmp, align 4 ; line:65 col:30 + %tmp71 = getelementptr inbounds <1 x float>, <1 x float>* @dygrec1.0, i32 0, i32 %tmp70 ; line:65 col:20 + store float %tmp69, float* %tmp71 ; line:65 col:34 + store float %tmp69, float* @stgrec1.0.0 ; line:65 col:18 + %tmp72 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 11 ; line:66 col:36 + %tmp73 = load float, float* %tmp72, align 4 ; line:66 col:36 + %tmp74 = load i32, i32* %tmp, align 4 ; line:66 col:30 + %tmp75 = getelementptr inbounds <2 x float>, <2 x float>* @dygrec2.0, i32 0, i32 %tmp74 ; line:66 col:20 + store float %tmp73, float* %tmp75 ; line:66 col:34 + store float %tmp73, float* getelementptr inbounds (<2 x float>, <2 x float>* @stgrec2.0, i32 0, i32 1) ; line:66 col:18 + br label %bb76 ; line:67 col:3 + +bb76: ; preds = %bb17, %bb + %tmp77 = load <1 x float>, <1 x float>* %dyloc1, align 4 ; line:68 col:17 + %tmp78 = extractelement <1 x float> %tmp77, i32 0 ; line:68 col:17 + %tmp79 = load <2 x float>, <2 x float>* %dyloc2, align 4 ; line:68 col:27 + %tmp80 = extractelement <2 x float> %tmp79, i32 1 ; line:68 col:27 + %tmp81 = load <1 x float>, <1 x float>* %stloc1, align 4 ; line:68 col:37 + %tmp82 = extractelement <1 x float> %tmp81, i32 0 ; line:68 col:37 + %tmp83 = load <2 x float>, <2 x float>* %stloc2, align 4 ; line:68 col:47 + %tmp84 = extractelement <2 x float> %tmp83, i32 1 ; line:68 col:47 + %tmp85 = insertelement <4 x float> undef, float %tmp78, i64 0 ; line:68 col:16 + %tmp86 = insertelement <4 x float> %tmp85, float %tmp80, i64 1 ; line:68 col:16 + %tmp87 = insertelement <4 x float> %tmp86, float %tmp82, i64 2 ; line:68 col:16 + %tmp88 = insertelement <4 x float> %tmp87, float %tmp84, i64 3 ; line:68 col:16 + %tmp89 = load i32, i32* %tmp, align 4 ; line:68 col:73 + %tmp90 = load i32, i32* %tmp, align 4 ; line:68 col:77 + %tmp91 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp89, i32 %tmp90 ; line:68 col:66 + %tmp92 = load float, float* %tmp91 ; line:68 col:66 + %tmp93 = load i32, i32* %tmp, align 4 ; line:68 col:89 + %tmp94 = load i32, i32* %tmp, align 4 ; line:68 col:93 + %tmp95 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp93, i32 %tmp94 ; line:68 col:82 + %tmp96 = load float, float* %tmp95 ; line:68 col:82 + %tmp97 = getelementptr [3 x float], [3 x float]* %stlar1.0, i32 0, i32 0 ; line:68 col:98 + %load = load float, float* %tmp97 ; line:68 col:98 + %insert = insertelement <1 x float> undef, float %load, i64 0 ; line:68 col:98 + %tmp98 = extractelement <1 x float> %insert, i32 0 ; line:68 col:98 + %tmp99 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 0 ; line:68 col:111 + %tmp100 = load <2 x float>, <2 x float>* %tmp99, align 4 ; line:68 col:111 + %tmp101 = extractelement <2 x float> %tmp100, i32 1 ; line:68 col:111 + %tmp102 = insertelement <4 x float> undef, float %tmp92, i64 0 ; line:68 col:65 + %tmp103 = insertelement <4 x float> %tmp102, float %tmp96, i64 1 ; line:68 col:65 + %tmp104 = insertelement <4 x float> %tmp103, float %tmp98, i64 2 ; line:68 col:65 + %tmp105 = insertelement <4 x float> %tmp104, float %tmp101, i64 3 ; line:68 col:65 + %tmp106 = fadd <4 x float> %tmp88, %tmp105 ; line:68 col:57 + %tmp107 = load <1 x float>, <1 x float>* @dyglob1, align 4 ; line:69 col:10 + %tmp108 = extractelement <1 x float> %tmp107, i32 0 ; line:69 col:10 + %tmp109 = load <2 x float>, <2 x float>* @dyglob2, align 4 ; line:69 col:21 + %tmp110 = extractelement <2 x float> %tmp109, i32 1 ; line:69 col:21 + %load3 = load float, float* @stglob1.0 ; line:69 col:32 + %insert4 = insertelement <1 x float> undef, float %load3, i64 0 ; line:69 col:32 + %tmp111 = extractelement <1 x float> %insert4, i32 0 ; line:69 col:32 + %tmp112 = load <2 x float>, <2 x float>* @stglob2, align 4 ; line:69 col:43 + %tmp113 = extractelement <2 x float> %tmp112, i32 1 ; line:69 col:43 + %tmp114 = insertelement <4 x float> undef, float %tmp108, i64 0 ; line:69 col:9 + %tmp115 = insertelement <4 x float> %tmp114, float %tmp110, i64 1 ; line:69 col:9 + %tmp116 = insertelement <4 x float> %tmp115, float %tmp111, i64 2 ; line:69 col:9 + %tmp117 = insertelement <4 x float> %tmp116, float %tmp113, i64 3 ; line:69 col:9 + %tmp118 = fadd <4 x float> %tmp106, %tmp117 ; line:68 col:124 + %tmp119 = load i32, i32* %tmp, align 4 ; line:69 col:70 + %tmp120 = load i32, i32* %tmp, align 4 ; line:69 col:74 + %tmp121 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp119, i32 %tmp120 ; line:69 col:63 + %tmp122 = load float, float* %tmp121 ; line:69 col:63 + %tmp123 = load i32, i32* %tmp, align 4 ; line:69 col:86 + %tmp124 = load i32, i32* %tmp, align 4 ; line:69 col:90 + %tmp125 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp123, i32 %tmp124 ; line:69 col:79 + %tmp126 = load float, float* %tmp125 ; line:69 col:79 + %load1 = load float, float* getelementptr inbounds ([2 x float], [2 x float]* @stgar1.0, i32 0, i32 0) ; line:69 col:95 + %insert2 = insertelement <1 x float> undef, float %load1, i64 0 ; line:69 col:95 + %tmp127 = extractelement <1 x float> %insert2, i32 0 ; line:69 col:95 + %tmp128 = load <2 x float>, <2 x float>* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 0), align 4 ; line:69 col:108 + %tmp129 = extractelement <2 x float> %tmp128, i32 1 ; line:69 col:108 + %tmp130 = insertelement <4 x float> undef, float %tmp122, i64 0 ; line:69 col:62 + %tmp131 = insertelement <4 x float> %tmp130, float %tmp126, i64 1 ; line:69 col:62 + %tmp132 = insertelement <4 x float> %tmp131, float %tmp127, i64 2 ; line:69 col:62 + %tmp133 = insertelement <4 x float> %tmp132, float %tmp129, i64 3 ; line:69 col:62 + %tmp134 = fadd <4 x float> %tmp118, %tmp133 ; line:69 col:54 + %tmp135 = load <1 x float>, <1 x float>* %stlorc1.0, align 4 ; line:70 col:20 + %tmp136 = extractelement <1 x float> %tmp135, i64 0 ; line:70 col:11 + %tmp137 = getelementptr inbounds <2 x float>, <2 x float>* %stlorc2.0, i32 0, i32 1 ; line:70 col:23 + %tmp138 = load float, float* %tmp137 ; line:70 col:23 + %tmp139 = load <1 x float>, <1 x float>* %dylorc1.0, align 4 ; line:70 col:45 + %tmp140 = extractelement <1 x float> %tmp139, i64 0 ; line:70 col:11 + %tmp141 = load i32, i32* %tmp, align 4 ; line:70 col:58 + %tmp142 = getelementptr inbounds <2 x float>, <2 x float>* %dylorc2.0, i32 0, i32 %tmp141 ; line:70 col:48 + %tmp143 = load float, float* %tmp142 ; line:70 col:48 + %tmp144 = insertelement <4 x float> undef, float %tmp136, i64 0 ; line:70 col:11 + %tmp145 = insertelement <4 x float> %tmp144, float %tmp138, i64 1 ; line:70 col:11 + %tmp146 = insertelement <4 x float> %tmp145, float %tmp140, i64 2 ; line:70 col:11 + %tmp147 = insertelement <4 x float> %tmp146, float %tmp143, i64 3 ; line:70 col:11 + %tmp148 = fadd <4 x float> %tmp134, %tmp147 ; line:69 col:121 + %load5 = load float, float* @stgrec1.0.0 ; line:70 col:80 + %insert6 = insertelement <1 x float> undef, float %load5, i64 0 ; line:70 col:80 + %tmp149 = extractelement <1 x float> %insert6, i64 0 ; line:70 col:71 + %tmp150 = load float, float* getelementptr inbounds (<2 x float>, <2 x float>* @stgrec2.0, i32 0, i32 1) ; line:70 col:83 + %tmp151 = load <1 x float>, <1 x float>* @dygrec1.0, align 4 ; line:70 col:105 + %tmp152 = extractelement <1 x float> %tmp151, i64 0 ; line:70 col:71 + %tmp153 = load i32, i32* %tmp, align 4 ; line:70 col:118 + %tmp154 = getelementptr inbounds <2 x float>, <2 x float>* @dygrec2.0, i32 0, i32 %tmp153 ; line:70 col:108 + %tmp155 = load float, float* %tmp154 ; line:70 col:108 + %tmp156 = insertelement <4 x float> undef, float %tmp149, i64 0 ; line:70 col:71 + %tmp157 = insertelement <4 x float> %tmp156, float %tmp150, i64 1 ; line:70 col:71 + %tmp158 = insertelement <4 x float> %tmp157, float %tmp152, i64 2 ; line:70 col:71 + %tmp159 = insertelement <4 x float> %tmp158, float %tmp155, i64 3 ; line:70 col:71 + %tmp160 = fadd <4 x float> %tmp148, %tmp159 ; line:70 col:63 + ret <4 x float> %tmp160 ; line:68 col:3 +} + +attributes #0 = { nounwind } + +!dx.version = !{!3} +!3 = !{i32 1, i32 9} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll new file mode 100644 index 0000000000..95a64a17d4 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll @@ -0,0 +1,324 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; Test for SROA reduction of globals and allocas. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.VectRec1 = type { <1 x float> } +%struct.VectRec2 = type { <2 x float> } +%ConstantBuffer = type opaque + +; Confirm that the dynamic globals are untouched and the statics are scalarized. +; DAG used to preserve the convenient ordering. + +; Dynamic access preserves even vec1s in SROA. +; CHECK-DAG: @dyglob1 = internal global <1 x float> zeroinitializer, align 4 +; CHECK-DAG: @dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4 +; CHECK-DAG: @dygrec1.0 = internal global <1 x float> zeroinitializer, align 4 +; CHECK-DAG: @dyglob2 = internal global <2 x float> zeroinitializer, align 4 +; CHECK-DAG: @dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +; CHECK-DAG: @dygrec2.0 = internal global <2 x float> zeroinitializer, align 4 + +; Having >1 elements preserves even statically-accessed vec2s. +; CHECK-DAG: @stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +; CHECK-DAG: @stglob2 = internal global <2 x float> zeroinitializer, align 4 +; CHECK-DAG: @stgrec2.0 = internal global <2 x float> zeroinitializer, align 4 + +; Statically-accessed vec1s should get scalarized. +; CHECK-DAG: @stgar1.0 = internal global [2 x float] zeroinitializer, align 4 +; CHECK-DAG: @stglob1.0 = internal global float 0.000000e+00, align 4 +; CHECK-DAG: @stgrec1.0.0 = internal global float 0.000000e+00, align 4 + +@dyglob2 = internal global <2 x float> zeroinitializer, align 4 +@dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +@dygrec2 = internal global %struct.VectRec2 zeroinitializer, align 4 +@dyglob1 = internal global <1 x float> zeroinitializer, align 4 +@dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4 +@dygrec1 = internal global %struct.VectRec1 zeroinitializer, align 4 + +@stglob2 = internal global <2 x float> zeroinitializer, align 4 +@stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +@stgrec2 = internal global %struct.VectRec2 zeroinitializer, align 4 + +@stglob1 = internal global <1 x float> zeroinitializer, align 4 +@stgar1 = internal global [2 x <1 x float>] zeroinitializer, align 4 +@stgrec1 = internal global %struct.VectRec1 zeroinitializer, align 4 + +@"$Globals" = external constant %ConstantBuffer + +; Function Attrs: nounwind +define <4 x float> @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z"(i32 %ix, [12 x float]* %vals) #0 { +bb: + ; Dynamic access preserves even vec1s in SROA. + ; CHECK-DAG: %dylorc1.0 = alloca <1 x float> + ; CHECK-DAG: %dylorc2.0 = alloca <2 x float> + ; CHECK-DAG: %dylorc1.0 = alloca <1 x float> + ; CHECK-DAG: %dylorc2.0 = alloca <2 x float> + ; CHECK-DAG: %dylar1 = alloca [3 x <1 x float>] + ; CHECK-DAG: %dylar2 = alloca [4 x <2 x float>] + + ; SROA doesn't reduce non-array allocas because scalarizer should get them. + ; CHECK-DAG: %stlorc1.0 = alloca <1 x float> + ; CHECK-DAG: %stlorc2.0 = alloca <2 x float> + ; CHECK-DAG: %stloc1 = alloca <1 x float>, align 4 + ; CHECK-DAG: %stloc2 = alloca <2 x float>, align 4 + + ; Statically-accessed arrays should get reduced. + ; CHECK-DAG: %stlar2 = alloca [4 x <2 x float>] + ; CHECK-DAG: %stlar1.0 = alloca [3 x float] + + %tmp = alloca i32, align 4, !dx.temp !14 + %dyloc1 = alloca <1 x float>, align 4 + %dyloc2 = alloca <2 x float>, align 4 + %dylar1 = alloca [3 x <1 x float>], align 4 + %dylar2 = alloca [4 x <2 x float>], align 4 + %dylorc1 = alloca %struct.VectRec1, align 4 + %dylorc2 = alloca %struct.VectRec2, align 4 + %stloc1 = alloca <1 x float>, align 4 + %stloc2 = alloca <2 x float>, align 4 + %stlar1 = alloca [3 x <1 x float>], align 4 + %stlar2 = alloca [4 x <2 x float>], align 4 + %stlorc1 = alloca %struct.VectRec1, align 4 + %stlorc2 = alloca %struct.VectRec2, align 4 + + store i32 %ix, i32* %tmp, align 4 + %tmp13 = load i32, i32* %tmp, align 4 ; line:53 col:7 + %tmp14 = icmp sgt i32 %tmp13, 0 ; line:53 col:10 + %tmp15 = icmp ne i1 %tmp14, false ; line:53 col:10 + %tmp16 = icmp ne i1 %tmp15, false ; line:53 col:10 + br i1 %tmp16, label %bb17, label %bb86 ; line:53 col:7 + +bb17: ; preds = %bb + %tmp18 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 0 ; line:54 col:30 + %tmp19 = load float, float* %tmp18, align 4 ; line:54 col:30 + %tmp20 = load i32, i32* %tmp, align 4 ; line:54 col:24 + %tmp21 = getelementptr <1 x float>, <1 x float>* %dyloc1, i32 0, i32 %tmp20 ; line:54 col:17 + store float %tmp19, float* %tmp21 ; line:54 col:28 + %tmp22 = getelementptr <1 x float>, <1 x float>* %stloc1, i32 0, i32 0 ; line:54 col:5 + store float %tmp19, float* %tmp22 ; line:54 col:15 + %tmp23 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 1 ; line:55 col:30 + %tmp24 = load float, float* %tmp23, align 4 ; line:55 col:30 + %tmp25 = load i32, i32* %tmp, align 4 ; line:55 col:24 + %tmp26 = getelementptr <2 x float>, <2 x float>* %dyloc2, i32 0, i32 %tmp25 ; line:55 col:17 + store float %tmp24, float* %tmp26 ; line:55 col:28 + %tmp27 = getelementptr <2 x float>, <2 x float>* %stloc2, i32 0, i32 1 ; line:55 col:5 + store float %tmp24, float* %tmp27 ; line:55 col:15 + %tmp28 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 2 ; line:56 col:37 + %tmp29 = load float, float* %tmp28, align 4 ; line:56 col:37 + %tmp30 = load i32, i32* %tmp, align 4 ; line:56 col:27 + %tmp31 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp30 ; line:56 col:20 + %tmp32 = load i32, i32* %tmp, align 4 ; line:56 col:31 + %tmp33 = getelementptr <1 x float>, <1 x float>* %tmp31, i32 0, i32 %tmp32 ; line:56 col:20 + store float %tmp29, float* %tmp33 ; line:56 col:35 + %tmp34 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %stlar1, i32 0, i32 1 ; line:56 col:5 + %tmp35 = getelementptr <1 x float>, <1 x float>* %tmp34, i32 0, i32 0 ; line:56 col:5 + store float %tmp29, float* %tmp35 ; line:56 col:18 + %tmp36 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 3 ; line:57 col:37 + %tmp37 = load float, float* %tmp36, align 4 ; line:57 col:37 + %tmp38 = load i32, i32* %tmp, align 4 ; line:57 col:27 + %tmp39 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp38 ; line:57 col:20 + %tmp40 = load i32, i32* %tmp, align 4 ; line:57 col:31 + %tmp41 = getelementptr <2 x float>, <2 x float>* %tmp39, i32 0, i32 %tmp40 ; line:57 col:20 + store float %tmp37, float* %tmp41 ; line:57 col:35 + %tmp42 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 1 ; line:57 col:5 + %tmp43 = getelementptr <2 x float>, <2 x float>* %tmp42, i32 0, i32 0 ; line:57 col:5 + store float %tmp37, float* %tmp43 ; line:57 col:18 + %tmp44 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 4 ; line:58 col:36 + %tmp45 = load float, float* %tmp44, align 4 ; line:58 col:36 + %tmp46 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %dylorc1, i32 0, i32 0 ; line:58 col:28 + %tmp47 = load i32, i32* %tmp, align 4 ; line:58 col:30 + %tmp48 = getelementptr <1 x float>, <1 x float>* %tmp46, i32 0, i32 %tmp47 ; line:58 col:20 + store float %tmp45, float* %tmp48 ; line:58 col:34 + %tmp49 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %stlorc1, i32 0, i32 0 ; line:58 col:13 + %tmp50 = getelementptr <1 x float>, <1 x float>* %tmp49, i32 0, i32 0 ; line:58 col:5 + store float %tmp45, float* %tmp50 ; line:58 col:18 + %tmp51 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 5 ; line:59 col:36 + %tmp52 = load float, float* %tmp51, align 4 ; line:59 col:36 + %tmp53 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %dylorc2, i32 0, i32 0 ; line:59 col:28 + %tmp54 = load i32, i32* %tmp, align 4 ; line:59 col:30 + %tmp55 = getelementptr <2 x float>, <2 x float>* %tmp53, i32 0, i32 %tmp54 ; line:59 col:20 + store float %tmp52, float* %tmp55 ; line:59 col:34 + %tmp56 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %stlorc2, i32 0, i32 0 ; line:59 col:13 + %tmp57 = getelementptr <2 x float>, <2 x float>* %tmp56, i32 0, i32 1 ; line:59 col:5 + store float %tmp52, float* %tmp57 ; line:59 col:18 + %tmp58 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 6 ; line:61 col:32 + %tmp59 = load float, float* %tmp58, align 4 ; line:61 col:32 + %tmp60 = load i32, i32* %tmp, align 4 ; line:61 col:26 + %tmp61 = getelementptr <1 x float>, <1 x float>* @dyglob1, i32 0, i32 %tmp60 ; line:61 col:18 + store float %tmp59, float* %tmp61 ; line:61 col:30 + store float %tmp59, float* getelementptr inbounds (<1 x float>, <1 x float>* @stglob1, i32 0, i32 0) ; line:61 col:16 + %tmp62 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 7 ; line:62 col:32 + %tmp63 = load float, float* %tmp62, align 4 ; line:62 col:32 + %tmp64 = load i32, i32* %tmp, align 4 ; line:62 col:26 + %tmp65 = getelementptr <2 x float>, <2 x float>* @dyglob2, i32 0, i32 %tmp64 ; line:62 col:18 + store float %tmp63, float* %tmp65 ; line:62 col:30 + store float %tmp63, float* getelementptr inbounds (<2 x float>, <2 x float>* @stglob2, i32 0, i32 1) ; line:62 col:16 + %tmp66 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 8 ; line:63 col:37 + %tmp67 = load float, float* %tmp66, align 4 ; line:63 col:37 + %tmp68 = load i32, i32* %tmp, align 4 ; line:63 col:27 + %tmp69 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp68 ; line:63 col:20 + %tmp70 = load i32, i32* %tmp, align 4 ; line:63 col:31 + %tmp71 = getelementptr <1 x float>, <1 x float>* %tmp69, i32 0, i32 %tmp70 ; line:63 col:20 + store float %tmp67, float* %tmp71 ; line:63 col:35 + store float %tmp67, float* getelementptr inbounds ([2 x <1 x float>], [2 x <1 x float>]* @stgar1, i32 0, i32 1, i32 0) ; line:63 col:18 + %tmp72 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 9 ; line:64 col:37 + %tmp73 = load float, float* %tmp72, align 4 ; line:64 col:37 + %tmp74 = load i32, i32* %tmp, align 4 ; line:64 col:27 + %tmp75 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp74 ; line:64 col:20 + %tmp76 = load i32, i32* %tmp, align 4 ; line:64 col:31 + %tmp77 = getelementptr <2 x float>, <2 x float>* %tmp75, i32 0, i32 %tmp76 ; line:64 col:20 + store float %tmp73, float* %tmp77 ; line:64 col:35 + store float %tmp73, float* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 1, i32 1) ; line:64 col:18 + %tmp78 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 10 ; line:65 col:36 + %tmp79 = load float, float* %tmp78, align 4 ; line:65 col:36 + %tmp80 = load i32, i32* %tmp, align 4 ; line:65 col:30 + %tmp81 = getelementptr <1 x float>, <1 x float>* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @dygrec1, i32 0, i32 0), i32 0, i32 %tmp80 ; line:65 col:20 + store float %tmp79, float* %tmp81 ; line:65 col:34 + store float %tmp79, float* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @stgrec1, i32 0, i32 0, i32 0) ; line:65 col:18 + %tmp82 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 11 ; line:66 col:36 + %tmp83 = load float, float* %tmp82, align 4 ; line:66 col:36 + %tmp84 = load i32, i32* %tmp, align 4 ; line:66 col:30 + %tmp85 = getelementptr <2 x float>, <2 x float>* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @dygrec2, i32 0, i32 0), i32 0, i32 %tmp84 ; line:66 col:20 + store float %tmp83, float* %tmp85 ; line:66 col:34 + store float %tmp83, float* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @stgrec2, i32 0, i32 0, i32 1) ; line:66 col:18 + br label %bb86 ; line:67 col:3 + +bb86: ; preds = %bb17, %bb + %tmp87 = load <1 x float>, <1 x float>* %dyloc1, align 4 ; line:68 col:17 + %tmp88 = extractelement <1 x float> %tmp87, i32 0 ; line:68 col:17 + %tmp89 = load <2 x float>, <2 x float>* %dyloc2, align 4 ; line:68 col:27 + %tmp90 = extractelement <2 x float> %tmp89, i32 1 ; line:68 col:27 + %tmp91 = load <1 x float>, <1 x float>* %stloc1, align 4 ; line:68 col:37 + %tmp92 = extractelement <1 x float> %tmp91, i32 0 ; line:68 col:37 + %tmp93 = load <2 x float>, <2 x float>* %stloc2, align 4 ; line:68 col:47 + %tmp94 = extractelement <2 x float> %tmp93, i32 1 ; line:68 col:47 + %tmp95 = insertelement <4 x float> undef, float %tmp88, i64 0 ; line:68 col:16 + %tmp96 = insertelement <4 x float> %tmp95, float %tmp90, i64 1 ; line:68 col:16 + %tmp97 = insertelement <4 x float> %tmp96, float %tmp92, i64 2 ; line:68 col:16 + %tmp98 = insertelement <4 x float> %tmp97, float %tmp94, i64 3 ; line:68 col:16 + %tmp99 = load i32, i32* %tmp, align 4 ; line:68 col:73 + %tmp100 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp99 ; line:68 col:66 + %tmp101 = load i32, i32* %tmp, align 4 ; line:68 col:77 + %tmp102 = getelementptr <1 x float>, <1 x float>* %tmp100, i32 0, i32 %tmp101 ; line:68 col:66 + %tmp103 = load float, float* %tmp102 ; line:68 col:66 + %tmp104 = load i32, i32* %tmp, align 4 ; line:68 col:89 + %tmp105 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp104 ; line:68 col:82 + %tmp106 = load i32, i32* %tmp, align 4 ; line:68 col:93 + %tmp107 = getelementptr <2 x float>, <2 x float>* %tmp105, i32 0, i32 %tmp106 ; line:68 col:82 + %tmp108 = load float, float* %tmp107 ; line:68 col:82 + %tmp109 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %stlar1, i32 0, i32 0 ; line:68 col:98 + %tmp110 = load <1 x float>, <1 x float>* %tmp109, align 4 ; line:68 col:98 + %tmp111 = extractelement <1 x float> %tmp110, i32 0 ; line:68 col:98 + %tmp112 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 0 ; line:68 col:111 + %tmp113 = load <2 x float>, <2 x float>* %tmp112, align 4 ; line:68 col:111 + %tmp114 = extractelement <2 x float> %tmp113, i32 1 ; line:68 col:111 + %tmp115 = insertelement <4 x float> undef, float %tmp103, i64 0 ; line:68 col:65 + %tmp116 = insertelement <4 x float> %tmp115, float %tmp108, i64 1 ; line:68 col:65 + %tmp117 = insertelement <4 x float> %tmp116, float %tmp111, i64 2 ; line:68 col:65 + %tmp118 = insertelement <4 x float> %tmp117, float %tmp114, i64 3 ; line:68 col:65 + %tmp119 = fadd <4 x float> %tmp98, %tmp118 ; line:68 col:57 + %tmp120 = load <1 x float>, <1 x float>* @dyglob1, align 4 ; line:69 col:10 + %tmp121 = extractelement <1 x float> %tmp120, i32 0 ; line:69 col:10 + %tmp122 = load <2 x float>, <2 x float>* @dyglob2, align 4 ; line:69 col:21 + %tmp123 = extractelement <2 x float> %tmp122, i32 1 ; line:69 col:21 + %tmp124 = load <1 x float>, <1 x float>* @stglob1, align 4 ; line:69 col:32 + %tmp125 = extractelement <1 x float> %tmp124, i32 0 ; line:69 col:32 + %tmp126 = load <2 x float>, <2 x float>* @stglob2, align 4 ; line:69 col:43 + %tmp127 = extractelement <2 x float> %tmp126, i32 1 ; line:69 col:43 + %tmp128 = insertelement <4 x float> undef, float %tmp121, i64 0 ; line:69 col:9 + %tmp129 = insertelement <4 x float> %tmp128, float %tmp123, i64 1 ; line:69 col:9 + %tmp130 = insertelement <4 x float> %tmp129, float %tmp125, i64 2 ; line:69 col:9 + %tmp131 = insertelement <4 x float> %tmp130, float %tmp127, i64 3 ; line:69 col:9 + %tmp132 = fadd <4 x float> %tmp119, %tmp131 ; line:68 col:124 + %tmp133 = load i32, i32* %tmp, align 4 ; line:69 col:70 + %tmp134 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp133 ; line:69 col:63 + %tmp135 = load i32, i32* %tmp, align 4 ; line:69 col:74 + %tmp136 = getelementptr <1 x float>, <1 x float>* %tmp134, i32 0, i32 %tmp135 ; line:69 col:63 + %tmp137 = load float, float* %tmp136 ; line:69 col:63 + %tmp138 = load i32, i32* %tmp, align 4 ; line:69 col:86 + %tmp139 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp138 ; line:69 col:79 + %tmp140 = load i32, i32* %tmp, align 4 ; line:69 col:90 + %tmp141 = getelementptr <2 x float>, <2 x float>* %tmp139, i32 0, i32 %tmp140 ; line:69 col:79 + %tmp142 = load float, float* %tmp141 ; line:69 col:79 + %tmp143 = load <1 x float>, <1 x float>* getelementptr inbounds ([2 x <1 x float>], [2 x <1 x float>]* @stgar1, i32 0, i32 0), align 4 ; line:69 col:95 + %tmp144 = extractelement <1 x float> %tmp143, i32 0 ; line:69 col:95 + %tmp145 = load <2 x float>, <2 x float>* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 0), align 4 ; line:69 col:108 + %tmp146 = extractelement <2 x float> %tmp145, i32 1 ; line:69 col:108 + %tmp147 = insertelement <4 x float> undef, float %tmp137, i64 0 ; line:69 col:62 + %tmp148 = insertelement <4 x float> %tmp147, float %tmp142, i64 1 ; line:69 col:62 + %tmp149 = insertelement <4 x float> %tmp148, float %tmp144, i64 2 ; line:69 col:62 + %tmp150 = insertelement <4 x float> %tmp149, float %tmp146, i64 3 ; line:69 col:62 + %tmp151 = fadd <4 x float> %tmp132, %tmp150 ; line:69 col:54 + %tmp152 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %stlorc1, i32 0, i32 0 ; line:70 col:20 + %tmp153 = load <1 x float>, <1 x float>* %tmp152, align 4 ; line:70 col:20 + %tmp154 = extractelement <1 x float> %tmp153, i64 0 ; line:70 col:11 + %tmp155 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %stlorc2, i32 0, i32 0 ; line:70 col:31 + %tmp156 = getelementptr <2 x float>, <2 x float>* %tmp155, i32 0, i32 1 ; line:70 col:23 + %tmp157 = load float, float* %tmp156 ; line:70 col:23 + %tmp158 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %dylorc1, i32 0, i32 0 ; line:70 col:45 + %tmp159 = load <1 x float>, <1 x float>* %tmp158, align 4 ; line:70 col:45 + %tmp160 = extractelement <1 x float> %tmp159, i64 0 ; line:70 col:11 + %tmp161 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %dylorc2, i32 0, i32 0 ; line:70 col:56 + %tmp162 = load i32, i32* %tmp, align 4 ; line:70 col:58 + %tmp163 = getelementptr <2 x float>, <2 x float>* %tmp161, i32 0, i32 %tmp162 ; line:70 col:48 + %tmp164 = load float, float* %tmp163 ; line:70 col:48 + %tmp165 = insertelement <4 x float> undef, float %tmp154, i64 0 ; line:70 col:11 + %tmp166 = insertelement <4 x float> %tmp165, float %tmp157, i64 1 ; line:70 col:11 + %tmp167 = insertelement <4 x float> %tmp166, float %tmp160, i64 2 ; line:70 col:11 + %tmp168 = insertelement <4 x float> %tmp167, float %tmp164, i64 3 ; line:70 col:11 + %tmp169 = fadd <4 x float> %tmp151, %tmp168 ; line:69 col:121 + %tmp170 = load <1 x float>, <1 x float>* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @stgrec1, i32 0, i32 0), align 4 ; line:70 col:80 + %tmp171 = extractelement <1 x float> %tmp170, i64 0 ; line:70 col:71 + %tmp172 = load float, float* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @stgrec2, i32 0, i32 0, i32 1) ; line:70 col:83 + %tmp173 = load <1 x float>, <1 x float>* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @dygrec1, i32 0, i32 0), align 4 ; line:70 col:105 + %tmp174 = extractelement <1 x float> %tmp173, i64 0 ; line:70 col:71 + %tmp175 = load i32, i32* %tmp, align 4 ; line:70 col:118 + %tmp176 = getelementptr <2 x float>, <2 x float>* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @dygrec2, i32 0, i32 0), i32 0, i32 %tmp175 ; line:70 col:108 + %tmp177 = load float, float* %tmp176 ; line:70 col:108 + %tmp178 = insertelement <4 x float> undef, float %tmp171, i64 0 ; line:70 col:71 + %tmp179 = insertelement <4 x float> %tmp178, float %tmp172, i64 1 ; line:70 col:71 + %tmp180 = insertelement <4 x float> %tmp179, float %tmp174, i64 2 ; line:70 col:71 + %tmp181 = insertelement <4 x float> %tmp180, float %tmp177, i64 3 ; line:70 col:71 + %tmp182 = fadd <4 x float> %tmp169, %tmp181 ; line:70 col:63 + ret <4 x float> %tmp182 ; line:68 col:3 +} + +attributes #0 = { nounwind } + +!pauseresume = !{!1} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !10} +!dx.entryPoints = !{!19} +!dx.fnprops = !{} +!dx.options = !{!23, !24} + +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!3 = !{i32 1, i32 9} +!4 = !{!"lib", i32 6, i32 9} +!5 = !{i32 0, %struct.VectRec1 undef, !6, %struct.VectRec2 undef, !8} +!6 = !{i32 4, !7} +!7 = !{i32 6, !"f", i32 3, i32 0, i32 4, !"REC1", i32 7, i32 9, i32 13, i32 1} +!8 = !{i32 8, !9} +!9 = !{i32 6, !"f", i32 3, i32 0, i32 4, !"REC2", i32 7, i32 9, i32 13, i32 2} +!10 = !{i32 1, <4 x float> (i32, [12 x float]*)* @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z", !11} +!11 = !{!12, !15, !17} +!12 = !{i32 1, !13, !14} +!13 = !{i32 7, i32 9, i32 13, i32 4} +!14 = !{} +!15 = !{i32 0, !16, !14} +!16 = !{i32 4, !"IX", i32 7, i32 4} +!17 = !{i32 0, !18, !14} +!18 = !{i32 4, !"VAL", i32 7, i32 9} +!19 = !{null, !"", null, !20, null} +!20 = !{null, null, !21, null} +!21 = !{!22} +!22 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!23 = !{i32 64} +!24 = !{i32 -1} +!25 = !{!26, !26, i64 0} +!26 = !{!"int", !27, i64 0} +!27 = !{!"omnipotent char", !28, i64 0} +!28 = !{!"Simple C/C++ TBAA"} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl new file mode 100644 index 0000000000..7641cb4f39 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl @@ -0,0 +1,112 @@ +// RUN: %dxc -fcgl -T lib_6_9 %s | FileCheck %s + +// Mainly a source for the ScalarReductionOfAggregatesHLSL(SROA) +// and DynamicIndexingVectorToArray(DIVA) IR tests with native vectors +// using allocas, static globals, and parameters. +// Dynamically accessed 1-element vectors should get skipped by SROA, +// but addressed by DynamicIndexingVectorToArray (hence the name). +// Larger vectors should be untouched. +// Arrays of vectors get some special treatment as well. +// Verifies that the original code is as expected for the IR tests. + +struct VectRec1 { + float1 f : REC1; +}; +struct VectRec2 { + float2 f : REC2; +}; + +// Vec2s will be preserved. +// CHECK-DAG: @dyglob2 = internal global <2 x float> zeroinitializer, align 4 +// CHECK-DAG: @dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +// CHECK-DAG: @dygrec2 = internal global %struct.VectRec2 zeroinitializer, align 4 + +// Dynamic vec1s will get replaced with dynamic vector to array. +// CHECK-DAG: @dyglob1 = internal global <1 x float> zeroinitializer, align 4 +// CHECK-DAG: @dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4 +// CHECK-DAG: @dygrec1 = internal global %struct.VectRec1 zeroinitializer, align 4 + +// Vec2s will be preserved. +// CHECK-DAG: @stglob2 = internal global <2 x float> zeroinitializer, align 4 +// CHECK-DAG: @stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4 +// CHECK-DAG: @stgrec2 = internal global %struct.VectRec2 zeroinitializer, align 4 + +// Static vec1s will get replaced with SROA. +// CHECK-DAG: @stglob1 = internal global <1 x float> zeroinitializer, align 4 +// CHECK-DAG: @stgar1 = internal global [2 x <1 x float>] zeroinitializer, align 4 +// CHECK-DAG: @stgrec1 = internal global %struct.VectRec1 zeroinitializer, align 4 + +static float1 dyglob1; +static float2 dyglob2; +static float1 dygar1[2]; +static float2 dygar2[3]; +static VectRec1 dygrec1; +static VectRec2 dygrec2; + +static float1 stglob1; +static float2 stglob2; +static float1 stgar1[2]; +static float2 stgar2[3]; +static VectRec1 stgrec1; +static VectRec2 stgrec2; + +// Test assignment operators. +// Vec2s should be skipped by SROA and DIVA +// DIVA will lower statically-indexed vectors and vectors in an array. +// CHECK-LABEL: define <4 x float> @"\01?tester +export float4 tester(int ix : IX, float vals[12] : VAL) { + + // Vec2s will be preserved. + // CHECK-DAG: %dyloc2 = alloca <2 x float>, align 4 + // CHECK-DAG: %dylar2 = alloca [4 x <2 x float>], align 4 + // CHECK-DAG: %dylorc2 = alloca %struct.VectRec2, align 4 + + // Dynamic local vec1s will get replaced with dynamic vector to array. + // CHECK-DAG: %dyloc1 = alloca <1 x float>, align 4 + // CHECK-DAG: %dylar1 = alloca [3 x <1 x float>], align 4 + // CHECK-DAG: %dylorc1 = alloca %struct.VectRec1, align 4 + + // Vec2s will be preserved. + // CHECK-DAG: %stloc2 = alloca <2 x float>, align 4 + // CHECK-DAG: %stlar2 = alloca [4 x <2 x float>], align 4 + // CHECK-DAG: %stlorc2 = alloca %struct.VectRec2, align 4 + + // Static local vec1s will get replaced by various passes. + // CHECK-DAG: %stloc1 = alloca <1 x float>, align 4 + // CHECK-DAG: %stlar1 = alloca [3 x <1 x float>], align 4 + // CHECK-DAG: %stlorc1 = alloca %struct.VectRec1, align 4 + + float1 dyloc1; + float2 dyloc2; + float1 dylar1[3]; + float2 dylar2[4]; + VectRec1 dylorc1; + VectRec2 dylorc2; + + float1 stloc1; + float2 stloc2; + float1 stlar1[3]; + float2 stlar2[4]; + VectRec1 stlorc1; + VectRec2 stlorc2; + + if (ix > 0) { + stloc1[0] = dyloc1[ix] = vals[0]; + stloc2[1] = dyloc2[ix] = vals[1]; + stlar1[1][0] = dylar1[ix][ix] = vals[2]; + stlar2[1][0] = dylar2[ix][ix] = vals[3]; + stlorc1.f[0] = dylorc1.f[ix] = vals[4]; + stlorc2.f[1] = dylorc2.f[ix] = vals[5]; + + stglob1[0] = dyglob1[ix] = vals[6]; + stglob2[1] = dyglob2[ix] = vals[7]; + stgar1[1][0] = dygar1[ix][ix] = vals[8]; + stgar2[1][1] = dygar2[ix][ix] = vals[9]; + stgrec1.f[0] = dygrec1.f[ix] = vals[10]; + stgrec2.f[1] = dygrec2.f[ix] = vals[11]; + } + return float4(dyloc1.x, dyloc2.y, stloc1.x, stloc2.y) + float4(dylar1[ix][ix], dylar2[ix][ix], stlar1[0].x, stlar2[0].y) + + float4(dyglob1.x, dyglob2.y, stglob1.x, stglob2.y) + float4(dygar1[ix][ix], dygar2[ix][ix], stgar1[0].x, stgar2[0].y) + + float4(stlorc1.f, stlorc2.f[1], dylorc1.f, dylorc2.f[ix]) + float4(stgrec1.f, stgrec2.f[1], dygrec1.f, dygrec2.f[ix]); +} + diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll new file mode 100644 index 0000000000..1fe7c17621 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll @@ -0,0 +1,660 @@ +; RUN: %dxopt %s -scalarizer -S | FileCheck %s + +; Vectors of length greather than 1 should get no changes from scalarizer, +; so this unusual test, verifies that the pass makes no changes at all. +; Still justified because prior to 6.9, many changes would result. +; Compiled mostly for float7 vectors with int7 for the integer specific parts. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWStructuredBuffer" = type { float } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.f32 = type { float, float, float, float, i32 } + +@"\01?buf@@3PAV?$RWStructuredBuffer@M@@A" = external global [7 x %"class.RWStructuredBuffer"], align 4 +@llvm.used = appending global [1 x i8*] [i8* bitcast ([7 x %"class.RWStructuredBuffer"]* @"\01?buf@@3PAV?$RWStructuredBuffer@M@@A" to i8*)], section "llvm.metadata" + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?assignments +define void @"\01?assignments@@YAXY09$$CAV?$vector@M$06@@@Z"([10 x <7 x float>]* noalias %things) #0 { +bb: + %tmp = load %"class.RWStructuredBuffer", %"class.RWStructuredBuffer"* getelementptr inbounds ([7 x %"class.RWStructuredBuffer"], [7 x %"class.RWStructuredBuffer"]* @"\01?buf@@3PAV?$RWStructuredBuffer@M@@A", i32 0, i32 0) + %tmp1 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32 160, %"class.RWStructuredBuffer" %tmp) + %tmp2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + + ; CHECK: [[buf:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 1, i32 0, i8 1, i32 4) + ; CHECK: [[val:%.*]] = extractvalue %dx.types.ResRet.f32 [[buf]], 0 + ; CHECK: [[vec:%.*]] = insertelement <7 x float> undef, float [[val]], i32 0 + ; CHECK: [[res0:%.*]] = shufflevector <7 x float> [[vec]], <7 x float> undef, <7 x i32> zeroinitializer + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0 + ; CHECK: store <7 x float> [[res0]], <7 x float>* [[adr0]], align 4 + %RawBufferLoad = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp2, i32 1, i32 0, i8 1, i32 4) + %tmp3 = extractvalue %dx.types.ResRet.f32 %RawBufferLoad, 0 + %tmp4 = insertelement <7 x float> undef, float %tmp3, i32 0 + %tmp5 = shufflevector <7 x float> %tmp4, <7 x float> undef, <7 x i32> zeroinitializer + %tmp6 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0 + store <7 x float> %tmp5, <7 x float>* %tmp6, align 4 + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4 + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4 + ; CHECK: [[res1:%.*]] = fadd fast <7 x float> [[ld1]], [[ld5]] + ; CHECK: store <7 x float> [[res1]], <7 x float>* [[adr1]], align 4 + %tmp7 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5 + %tmp8 = load <7 x float>, <7 x float>* %tmp7, align 4 + %tmp9 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 1 + %tmp10 = load <7 x float>, <7 x float>* %tmp9, align 4 + %tmp11 = fadd fast <7 x float> %tmp10, %tmp8 + store <7 x float> %tmp11, <7 x float>* %tmp9, align 4 + + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <7 x float>, <7 x float>* [[adr6]], align 4 + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4 + ; CHECK: [[res2:%.*]] = fsub fast <7 x float> [[ld2]], [[ld6]] + ; CHECK: store <7 x float> [[res2]], <7 x float>* [[adr2]], align 4 + %tmp12 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 6 + %tmp13 = load <7 x float>, <7 x float>* %tmp12, align 4 + %tmp14 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2 + %tmp15 = load <7 x float>, <7 x float>* %tmp14, align 4 + %tmp16 = fsub fast <7 x float> %tmp15, %tmp13 + store <7 x float> %tmp16, <7 x float>* %tmp14, align 4 + + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load <7 x float>, <7 x float>* [[adr7]], align 4 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4 + ; CHECK: [[res3:%.*]] = fmul fast <7 x float> [[ld3]], [[ld7]] + ; CHECK: store <7 x float> [[res3]], <7 x float>* [[adr3]], align 4 + %tmp17 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 7 + %tmp18 = load <7 x float>, <7 x float>* %tmp17, align 4 + %tmp19 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 3 + %tmp20 = load <7 x float>, <7 x float>* %tmp19, align 4 + %tmp21 = fmul fast <7 x float> %tmp20, %tmp18 + store <7 x float> %tmp21, <7 x float>* %tmp19, align 4 + + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load <7 x float>, <7 x float>* [[adr8]], align 4 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4 + ; CHECK: [[res4:%.*]] = fdiv fast <7 x float> [[ld4]], [[ld8]] + ; CHECK: store <7 x float> [[res4]], <7 x float>* [[adr4]], align 4 + %tmp22 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 8 + %tmp23 = load <7 x float>, <7 x float>* %tmp22, align 4 + %tmp24 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 4 + %tmp25 = load <7 x float>, <7 x float>* %tmp24, align 4 + %tmp26 = fdiv fast <7 x float> %tmp25, %tmp23 + store <7 x float> %tmp26, <7 x float>* %tmp24, align 4 + + ; CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 9 + ; CHECK: [[ld9:%.*]] = load <7 x float>, <7 x float>* [[adr9]], align 4 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4 + ; CHECK: [[res5:%.*]] = frem fast <7 x float> [[ld5]], [[ld9]] + ; CHECK: store <7 x float> [[res5]], <7 x float>* [[adr5]], align 4 + %tmp27 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 9 + %tmp28 = load <7 x float>, <7 x float>* %tmp27, align 4 + %tmp29 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5 + %tmp30 = load <7 x float>, <7 x float>* %tmp29, align 4 + %tmp31 = frem fast <7 x float> %tmp30, %tmp28 + store <7 x float> %tmp31, <7 x float>* %tmp29, align 4 + + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?arithmetic +define void @"\01?arithmetic@@YA$$BY0L@V?$vector@M$06@@Y0L@$$CAV1@@Z"([11 x <7 x float>]* noalias sret %agg.result, [11 x <7 x float>]* noalias %things) #0 { +bb: + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0 + ; CHECK: [[ld0:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4 + ; CHECK: [[res0:%.*]] = fsub fast <7 x float> , [[ld0]] + %tmp = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0 + %tmp1 = load <7 x float>, <7 x float>* %tmp, align 4 + %tmp2 = fsub fast <7 x float> , %tmp1 + + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0 + ; CHECK: [[res1:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4 + %tmp3 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0 + %tmp4 = load <7 x float>, <7 x float>* %tmp3, align 4 + + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4 + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4 + ; CHECK: [[res2:%.*]] = fadd fast <7 x float> [[ld1]], [[ld2]] + %tmp5 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 1 + %tmp6 = load <7 x float>, <7 x float>* %tmp5, align 4 + %tmp7 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2 + %tmp8 = load <7 x float>, <7 x float>* %tmp7, align 4 + %tmp9 = fadd fast <7 x float> %tmp6, %tmp8 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4 + ; CHECK: [[res3:%.*]] = fsub fast <7 x float> [[ld2]], [[ld3]] + %tmp10 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2 + %tmp11 = load <7 x float>, <7 x float>* %tmp10, align 4 + %tmp12 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3 + %tmp13 = load <7 x float>, <7 x float>* %tmp12, align 4 + %tmp14 = fsub fast <7 x float> %tmp11, %tmp13 + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4 + ; CHECK: [[res4:%.*]] = fmul fast <7 x float> [[ld3]], [[ld4]] + %tmp15 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3 + %tmp16 = load <7 x float>, <7 x float>* %tmp15, align 4 + %tmp17 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4 + %tmp18 = load <7 x float>, <7 x float>* %tmp17, align 4 + %tmp19 = fmul fast <7 x float> %tmp16, %tmp18 + + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4 + ; CHECK: [[res5:%.*]] = fdiv fast <7 x float> [[ld4]], [[ld5]] + %tmp20 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4 + %tmp21 = load <7 x float>, <7 x float>* %tmp20, align 4 + %tmp22 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5 + %tmp23 = load <7 x float>, <7 x float>* %tmp22, align 4 + %tmp24 = fdiv fast <7 x float> %tmp21, %tmp23 + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <7 x float>, <7 x float>* [[adr6]], align 4 + ; CHECK: [[res6:%.*]] = frem fast <7 x float> [[ld5]], [[ld6]] + %tmp25 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5 + %tmp26 = load <7 x float>, <7 x float>* %tmp25, align 4 + %tmp27 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 6 + %tmp28 = load <7 x float>, <7 x float>* %tmp27, align 4 + %tmp29 = frem fast <7 x float> %tmp26, %tmp28 + + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load <7 x float>, <7 x float>* [[adr7]], align 4 + ; CHECK: [[res7:%.*]] = fadd fast <7 x float> [[ld7]], + ; CHECK: store <7 x float> [[res7]], <7 x float>* [[adr7]], align 4 + %tmp30 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 7 + %tmp31 = load <7 x float>, <7 x float>* %tmp30, align 4 + %tmp32 = fadd fast <7 x float> %tmp31, + store <7 x float> %tmp32, <7 x float>* %tmp30, align 4 + + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load <7 x float>, <7 x float>* [[adr8]], align 4 + ; CHECK: [[res8:%.*]] = fadd fast <7 x float> [[ld8]], + ; CHECK: store <7 x float> [[res8]], <7 x float>* [[adr8]], align 4 + %tmp33 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 8 + %tmp34 = load <7 x float>, <7 x float>* %tmp33, align 4 + %tmp35 = fadd fast <7 x float> %tmp34, + store <7 x float> %tmp35, <7 x float>* %tmp33, align 4 + + ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 9 + ; CHECK: [[ld9:%.*]] = load <7 x float>, <7 x float>* [[adr9]], align 4 + ; CHECK: [[res9:%.*]] = fadd fast <7 x float> [[ld9]], + ; CHECK: store <7 x float> [[res9]], <7 x float>* [[adr9]], align 4 + %tmp36 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 9 + %tmp37 = load <7 x float>, <7 x float>* %tmp36, align 4 + %tmp38 = fadd fast <7 x float> %tmp37, + store <7 x float> %tmp38, <7 x float>* %tmp36, align 4 + + ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 10 + ; CHECK: [[ld10:%.*]] = load <7 x float>, <7 x float>* [[adr10]], align 4 + ; CHECK: [[res10:%.*]] = fadd fast <7 x float> [[ld10]], + ; CHECK: store <7 x float> [[res10]], <7 x float>* [[adr10]], align 4 + %tmp39 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 10 + %tmp40 = load <7 x float>, <7 x float>* %tmp39, align 4 + %tmp41 = fadd fast <7 x float> %tmp40, + store <7 x float> %tmp41, <7 x float>* %tmp39, align 4 + + %tmp42 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 0 + store <7 x float> %tmp2, <7 x float>* %tmp42 + %tmp43 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 1 + store <7 x float> %tmp4, <7 x float>* %tmp43 + %tmp44 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 2 + store <7 x float> %tmp9, <7 x float>* %tmp44 + %tmp45 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 3 + store <7 x float> %tmp14, <7 x float>* %tmp45 + %tmp46 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 4 + store <7 x float> %tmp19, <7 x float>* %tmp46 + %tmp47 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 5 + store <7 x float> %tmp24, <7 x float>* %tmp47 + %tmp48 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 6 + store <7 x float> %tmp29, <7 x float>* %tmp48 + %tmp49 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 7 + store <7 x float> %tmp31, <7 x float>* %tmp49 + %tmp50 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 8 + store <7 x float> %tmp34, <7 x float>* %tmp50 + %tmp51 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 9 + store <7 x float> %tmp38, <7 x float>* %tmp51 + %tmp52 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 10 + store <7 x float> %tmp41, <7 x float>* %tmp52 + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?logic +define void @"\01?logic@@YA$$BY09V?$vector@_N$06@@Y09V1@Y09V?$vector@M$06@@@Z"([10 x <7 x i32>]* noalias sret %agg.result, [10 x <7 x i32>]* %truth, [10 x <7 x float>]* %consequences) #0 { +bb: + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 0 + ; CHECK: [[ld0:%.*]] = load <7 x i32>, <7 x i32>* [[adr0]], align 4 + ; CHECK: [[nres0:%.*]] = icmp ne <7 x i32> [[ld0]], zeroinitializer + ; CHECK: [[bres0:%.*]] = icmp eq <7 x i1> [[nres0:%.*]], zeroinitializer + ; CHECK: [[res0:%.*]] = zext <7 x i1> [[bres0]] to <7 x i32> + %tmp = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 0 + %tmp1 = load <7 x i32>, <7 x i32>* %tmp, align 4 + %tmp2 = icmp ne <7 x i32> %tmp1, zeroinitializer + %tmp3 = icmp eq <7 x i1> %tmp2, zeroinitializer + %tmp4 = zext <7 x i1> %tmp3 to <7 x i32> + + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <7 x i32>, <7 x i32>* [[adr1]], align 4 + ; CHECK: [[bld1:%.*]] = icmp ne <7 x i32> [[ld1]], zeroinitializer + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x i32>, <7 x i32>* [[adr2]], align 4 + ; CHECK: [[bld2:%.*]] = icmp ne <7 x i32> [[ld2]], zeroinitializer + ; CHECK: [[bres1:%.*]] = or <7 x i1> [[bld1]], [[bld2]] + ; CHECK: [[res1:%.*]] = zext <7 x i1> [[bres1]] to <7 x i32> + %tmp5 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 1 + %tmp6 = load <7 x i32>, <7 x i32>* %tmp5, align 4 + %tmp7 = icmp ne <7 x i32> %tmp6, zeroinitializer + %tmp8 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2 + %tmp9 = load <7 x i32>, <7 x i32>* %tmp8, align 4 + %tmp10 = icmp ne <7 x i32> %tmp9, zeroinitializer + %tmp11 = or <7 x i1> %tmp7, %tmp10 + %tmp12 = zext <7 x i1> %tmp11 to <7 x i32> + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x i32>, <7 x i32>* [[adr2]], align 4 + ; CHECK: [[bld2:%.*]] = icmp ne <7 x i32> [[ld2]], zeroinitializer + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4 + ; CHECK: [[bld3:%.*]] = icmp ne <7 x i32> [[ld3]], zeroinitializer + ; CHECK: [[bres2:%.*]] = and <7 x i1> [[bld2]], [[bld3]] + ; CHECK: [[res2:%.*]] = zext <7 x i1> [[bres2]] to <7 x i32> + %tmp13 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2 + %tmp14 = load <7 x i32>, <7 x i32>* %tmp13, align 4 + %tmp15 = icmp ne <7 x i32> %tmp14, zeroinitializer + %tmp16 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3 + %tmp17 = load <7 x i32>, <7 x i32>* %tmp16, align 4 + %tmp18 = icmp ne <7 x i32> %tmp17, zeroinitializer + %tmp19 = and <7 x i1> %tmp15, %tmp18 + %tmp20 = zext <7 x i1> %tmp19 to <7 x i32> + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4 + ; CHECK: [[bld3:%.*]] = icmp ne <7 x i32> [[ld3]], zeroinitializer + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x i32>, <7 x i32>* [[adr4]], align 4 + ; CHECK: [[bld4:%.*]] = icmp ne <7 x i32> [[ld4]], zeroinitializer + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x i32>, <7 x i32>* [[adr5]], align 4 + ; CHECK: [[bld5:%.*]] = icmp ne <7 x i32> [[ld5]], zeroinitializer + ; CHECK: [[bres3:%.*]] = select <7 x i1> [[bld3]], <7 x i1> [[bld4]], <7 x i1> [[bld5]] + ; CHECK: [[res3:%.*]] = zext <7 x i1> [[bres3]] to <7 x i32> + %tmp21 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3 + %tmp22 = load <7 x i32>, <7 x i32>* %tmp21, align 4 + %tmp23 = icmp ne <7 x i32> %tmp22, zeroinitializer + %tmp24 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 4 + %tmp25 = load <7 x i32>, <7 x i32>* %tmp24, align 4 + %tmp26 = icmp ne <7 x i32> %tmp25, zeroinitializer + %tmp27 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 5 + %tmp28 = load <7 x i32>, <7 x i32>* %tmp27, align 4 + %tmp29 = icmp ne <7 x i32> %tmp28, zeroinitializer + %tmp30 = select <7 x i1> %tmp23, <7 x i1> %tmp26, <7 x i1> %tmp29 + %tmp31 = zext <7 x i1> %tmp30 to <7 x i32> + + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 0 + ; CHECK: [[ld0:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4 + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4 + ; CHECK: [[bres1:%.*]] = fcmp fast oeq <7 x float> [[ld0]], [[ld1]] + ; CHECK: [[res1:%.*]] = zext <7 x i1> [[bres1]] to <7 x i32> + %tmp32 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 0 + %tmp33 = load <7 x float>, <7 x float>* %tmp32, align 4 + %tmp34 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1 + %tmp35 = load <7 x float>, <7 x float>* %tmp34, align 4 + %tmp36 = fcmp fast oeq <7 x float> %tmp33, %tmp35 + %tmp37 = zext <7 x i1> %tmp36 to <7 x i32> + + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4 + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4 + ; CHECK: [[bres2:%.*]] = fcmp fast une <7 x float> [[ld1]], [[ld2]] + ; CHECK: [[res2:%.*]] = zext <7 x i1> [[bres2]] to <7 x i32> + %tmp38 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1 + %tmp39 = load <7 x float>, <7 x float>* %tmp38, align 4 + %tmp40 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2 + %tmp41 = load <7 x float>, <7 x float>* %tmp40, align 4 + %tmp42 = fcmp fast une <7 x float> %tmp39, %tmp41 + %tmp43 = zext <7 x i1> %tmp42 to <7 x i32> + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4 + ; CHECK: [[bres3:%.*]] = fcmp fast olt <7 x float> [[ld2]], [[ld3]] + ; CHECK: [[res3:%.*]] = zext <7 x i1> [[bres3]] to <7 x i32> + %tmp44 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2 + %tmp45 = load <7 x float>, <7 x float>* %tmp44, align 4 + %tmp46 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3 + %tmp47 = load <7 x float>, <7 x float>* %tmp46, align 4 + %tmp48 = fcmp fast olt <7 x float> %tmp45, %tmp47 + %tmp49 = zext <7 x i1> %tmp48 to <7 x i32> + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4 + ; CHECK: [[bres4:%.*]] = fcmp fast ogt <7 x float> [[ld3]], [[ld4]] + ; CHECK: [[res4:%.*]] = zext <7 x i1> [[bres4]] to <7 x i32> + %tmp50 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3 + %tmp51 = load <7 x float>, <7 x float>* %tmp50, align 4 + %tmp52 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4 + %tmp53 = load <7 x float>, <7 x float>* %tmp52, align 4 + %tmp54 = fcmp fast ogt <7 x float> %tmp51, %tmp53 + %tmp55 = zext <7 x i1> %tmp54 to <7 x i32> + + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4 + ; CHECK: [[bres5:%.*]] = fcmp fast ole <7 x float> [[ld4]], [[ld5]] + ; CHECK: [[res5:%.*]] = zext <7 x i1> [[bres5]] to <7 x i32> + %tmp56 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4 + %tmp57 = load <7 x float>, <7 x float>* %tmp56, align 4 + %tmp58 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5 + %tmp59 = load <7 x float>, <7 x float>* %tmp58, align 4 + %tmp60 = fcmp fast ole <7 x float> %tmp57, %tmp59 + %tmp61 = zext <7 x i1> %tmp60 to <7 x i32> + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <7 x float>, <7 x float>* [[adr6]], align 4 + ; CHECK: [[bres6:%.*]] = fcmp fast oge <7 x float> [[ld5]], [[ld6]] + ; CHECK: [[res6:%.*]] = zext <7 x i1> [[bres6]] to <7 x i32> + %tmp62 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5 + %tmp63 = load <7 x float>, <7 x float>* %tmp62, align 4 + %tmp64 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 6 + %tmp65 = load <7 x float>, <7 x float>* %tmp64, align 4 + %tmp66 = fcmp fast oge <7 x float> %tmp63, %tmp65 + %tmp67 = zext <7 x i1> %tmp66 to <7 x i32> + + %tmp68 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 0 + store <7 x i32> %tmp4, <7 x i32>* %tmp68 + %tmp69 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 1 + store <7 x i32> %tmp12, <7 x i32>* %tmp69 + %tmp70 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 2 + store <7 x i32> %tmp20, <7 x i32>* %tmp70 + %tmp71 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 3 + store <7 x i32> %tmp31, <7 x i32>* %tmp71 + %tmp72 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 4 + store <7 x i32> %tmp37, <7 x i32>* %tmp72 + %tmp73 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 5 + store <7 x i32> %tmp43, <7 x i32>* %tmp73 + %tmp74 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 6 + store <7 x i32> %tmp49, <7 x i32>* %tmp74 + %tmp75 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 7 + store <7 x i32> %tmp55, <7 x i32>* %tmp75 + %tmp76 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 8 + store <7 x i32> %tmp61, <7 x i32>* %tmp76 + %tmp77 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 9 + store <7 x i32> %tmp67, <7 x i32>* %tmp77 + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?index +define void @"\01?index@@YA$$BY09V?$vector@M$06@@Y09V1@H@Z"([10 x <7 x float>]* noalias sret %agg.result, [10 x <7 x float>]* %things, i32 %i) #0 { +bb: + %res = alloca [10 x <7 x float>], align 4 + + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 0 + ; CHECK: store <7 x float> zeroinitializer, <7 x float>* [[adr0]], align 4 + %tmp1 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 0 + store <7 x float> zeroinitializer, <7 x float>* %tmp1, align 4 + + ; CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 %i + ; CHECK: store <7 x float> , <7 x float>* [[adri]], align 4 + %tmp2 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 %i + store <7 x float> , <7 x float>* %tmp2, align 4 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 2 + ; CHECK: store <7 x float> , <7 x float>* [[adr2]], align 4 + %tmp3 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 2 + store <7 x float> , <7 x float>* %tmp3, align 4 + + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0 + ; CHECK: [[res3:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 3 + ; CHECK: store <7 x float> [[res3]], <7 x float>* [[adr3]], align 4 + %tmp4 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0 + %tmp5 = load <7 x float>, <7 x float>* %tmp4, align 4 + %tmp6 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 3 + store <7 x float> %tmp5, <7 x float>* %tmp6, align 4 + + ; CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 %i + ; CHECK: [[res4:%.*]] = load <7 x float>, <7 x float>* [[adri]], align 4 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 4 + ; CHECK: store <7 x float> [[res4]], <7 x float>* [[adr4]], align 4 + %tmp7 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 %i + %tmp8 = load <7 x float>, <7 x float>* %tmp7, align 4 + %tmp9 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 4 + store <7 x float> %tmp8, <7 x float>* %tmp9, align 4 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2 + ; CHECK: [[res5:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 5 + ; CHECK: store <7 x float> [[res5]], <7 x float>* [[adr5]], align 4 + %tmp10 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2 + %tmp11 = load <7 x float>, <7 x float>* %tmp10, align 4 + %tmp12 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 5 + store <7 x float> %tmp11, <7 x float>* %tmp12, align 4 + + %tmp13 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 0 + %tmp14 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 0 + %tmp15 = load <7 x float>, <7 x float>* %tmp14 + store <7 x float> %tmp15, <7 x float>* %tmp13 + + %tmp16 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 1 + %tmp17 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 1 + %tmp18 = load <7 x float>, <7 x float>* %tmp17 + store <7 x float> %tmp18, <7 x float>* %tmp16 + + %tmp19 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 2 + %tmp20 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 2 + %tmp21 = load <7 x float>, <7 x float>* %tmp20 + store <7 x float> %tmp21, <7 x float>* %tmp19 + + %tmp22 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 3 + %tmp23 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 3 + %tmp24 = load <7 x float>, <7 x float>* %tmp23 + store <7 x float> %tmp24, <7 x float>* %tmp22 + + %tmp25 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 4 + %tmp26 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 4 + %tmp27 = load <7 x float>, <7 x float>* %tmp26 + store <7 x float> %tmp27, <7 x float>* %tmp25 + + %tmp28 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 5 + %tmp29 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 5 + %tmp30 = load <7 x float>, <7 x float>* %tmp29 + store <7 x float> %tmp30, <7 x float>* %tmp28 + + %tmp31 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 6 + %tmp32 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 6 + %tmp33 = load <7 x float>, <7 x float>* %tmp32 + store <7 x float> %tmp33, <7 x float>* %tmp31 + + %tmp34 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 7 + %tmp35 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 7 + %tmp36 = load <7 x float>, <7 x float>* %tmp35 + store <7 x float> %tmp36, <7 x float>* %tmp34 + + %tmp37 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 8 + %tmp38 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 8 + %tmp39 = load <7 x float>, <7 x float>* %tmp38 + store <7 x float> %tmp39, <7 x float>* %tmp37 + + %tmp40 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 9 + %tmp41 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 9 + %tmp42 = load <7 x float>, <7 x float>* %tmp41 + store <7 x float> %tmp42, <7 x float>* %tmp40 + + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?bittwiddlers +define void @"\01?bittwiddlers@@YAXY0L@$$CAV?$vector@I$06@@@Z"([11 x <7 x i32>]* noalias %things) #0 { +bb: + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <7 x i32>, <7 x i32>* [[adr1]], align 4 + ; CHECK: [[res0:%.*]] = xor <7 x i32> [[ld1]], + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 0 + ; CHECK: store <7 x i32> [[res0]], <7 x i32>* [[adr0]], align 4 + %tmp = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1 + %tmp1 = load <7 x i32>, <7 x i32>* %tmp, align 4 + %tmp2 = xor <7 x i32> %tmp1, + %tmp3 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 0 + store <7 x i32> %tmp2, <7 x i32>* %tmp3, align 4 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <7 x i32>, <7 x i32>* [[adr2]], align 4 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4 + ; CHECK: [[res1:%.*]] = or <7 x i32> [[ld2]], [[ld3]] + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1 + ; CHECK: store <7 x i32> [[res1]], <7 x i32>* [[adr1]], align 4 + %tmp4 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2 + %tmp5 = load <7 x i32>, <7 x i32>* %tmp4, align 4 + %tmp6 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3 + %tmp7 = load <7 x i32>, <7 x i32>* %tmp6, align 4 + %tmp8 = or <7 x i32> %tmp5, %tmp7 + %tmp9 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1 + store <7 x i32> %tmp8, <7 x i32>* %tmp9, align 4 + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x i32>, <7 x i32>* [[adr4]], align 4 + ; CHECK: [[res2:%.*]] = and <7 x i32> [[ld3]], [[ld4]] + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2 + ; CHECK: store <7 x i32> [[res2]], <7 x i32>* [[adr2]], align 4 + %tmp10 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3 + %tmp11 = load <7 x i32>, <7 x i32>* %tmp10, align 4 + %tmp12 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4 + %tmp13 = load <7 x i32>, <7 x i32>* %tmp12, align 4 + %tmp14 = and <7 x i32> %tmp11, %tmp13 + %tmp15 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2 + store <7 x i32> %tmp14, <7 x i32>* %tmp15, align 4 + + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <7 x i32>, <7 x i32>* [[adr4]], align 4 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x i32>, <7 x i32>* [[adr5]], align 4 + ; CHECK: [[res3:%.*]] = xor <7 x i32> [[ld4]], [[ld5]] + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3 + ; CHECK: store <7 x i32> [[res3]], <7 x i32>* [[adr3]], align 4 + %tmp16 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4 + %tmp17 = load <7 x i32>, <7 x i32>* %tmp16, align 4 + %tmp18 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5 + %tmp19 = load <7 x i32>, <7 x i32>* %tmp18, align 4 + %tmp20 = xor <7 x i32> %tmp17, %tmp19 + %tmp21 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3 + store <7 x i32> %tmp20, <7 x i32>* %tmp21, align 4 + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <7 x i32>, <7 x i32>* [[adr5]], align 4 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <7 x i32>, <7 x i32>* [[adr6]], align 4 + ; CHECK: [[shv6:%.*]] = and <7 x i32> [[ld6]], + ; CHECK: [[res4:%.*]] = shl <7 x i32> [[ld5]], [[shv6]] + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4 + ; CHECK: store <7 x i32> [[res4]], <7 x i32>* [[adr4]], align 4 + %tmp22 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5 + %tmp23 = load <7 x i32>, <7 x i32>* %tmp22, align 4 + %tmp24 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6 + %tmp25 = load <7 x i32>, <7 x i32>* %tmp24, align 4 + %tmp26 = and <7 x i32> %tmp25, + %tmp27 = shl <7 x i32> %tmp23, %tmp26 + %tmp28 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4 + store <7 x i32> %tmp27, <7 x i32>* %tmp28, align 4 + + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <7 x i32>, <7 x i32>* [[adr6]], align 4 + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load <7 x i32>, <7 x i32>* [[adr7]], align 4 + ; CHECK: [[shv7:%.*]] = and <7 x i32> [[ld7]], + ; CHECK: [[res5:%.*]] = lshr <7 x i32> [[ld6]], [[shv7]] + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5 + ; CHECK: store <7 x i32> [[res5]], <7 x i32>* [[adr5]], align 4 + %tmp29 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6 + %tmp30 = load <7 x i32>, <7 x i32>* %tmp29, align 4 + %tmp31 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7 + %tmp32 = load <7 x i32>, <7 x i32>* %tmp31, align 4 + %tmp33 = and <7 x i32> %tmp32, + %tmp34 = lshr <7 x i32> %tmp30, %tmp33 + %tmp35 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5 + store <7 x i32> %tmp34, <7 x i32>* %tmp35, align 4 + + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load <7 x i32>, <7 x i32>* [[adr8]], align 4 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <7 x i32>, <7 x i32>* [[adr6]], align 4 + ; CHECK: [[res6:%.*]] = or <7 x i32> [[ld6]], [[ld8]] + ; CHECK: store <7 x i32> [[res6]], <7 x i32>* [[adr6]], align 4 + %tmp36 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8 + %tmp37 = load <7 x i32>, <7 x i32>* %tmp36, align 4 + %tmp38 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6 + %tmp39 = load <7 x i32>, <7 x i32>* %tmp38, align 4 + %tmp40 = or <7 x i32> %tmp39, %tmp37 + store <7 x i32> %tmp40, <7 x i32>* %tmp38, align 4 + + ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 9 + ; CHECK: [[ld9:%.*]] = load <7 x i32>, <7 x i32>* [[adr9]], align 4 + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load <7 x i32>, <7 x i32>* [[adr7]], align 4 + ; CHECK: [[res7:%.*]] = and <7 x i32> [[ld7]], [[ld9]] + ; CHECK: store <7 x i32> [[res7]], <7 x i32>* [[adr7]], align 4 + %tmp41 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 9 + %tmp42 = load <7 x i32>, <7 x i32>* %tmp41, align 4 + %tmp43 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7 + %tmp44 = load <7 x i32>, <7 x i32>* %tmp43, align 4 + %tmp45 = and <7 x i32> %tmp44, %tmp42 + store <7 x i32> %tmp45, <7 x i32>* %tmp43, align 4 + + ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 10 + ; CHECK: [[ld10:%.*]] = load <7 x i32>, <7 x i32>* [[adr10]], align 4 + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load <7 x i32>, <7 x i32>* [[adr8]], align 4 + ; CHECK: [[res8:%.*]] = xor <7 x i32> [[ld8]], [[ld10]] + ; CHECK: store <7 x i32> [[res8]], <7 x i32>* [[adr8]], align 4 + %tmp46 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 10 + %tmp47 = load <7 x i32>, <7 x i32>* %tmp46, align 4 + %tmp48 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8 + %tmp49 = load <7 x i32>, <7 x i32>* %tmp48, align 4 + %tmp50 = xor <7 x i32> %tmp49, %tmp47 + store <7 x i32> %tmp50, <7 x i32>* %tmp48, align 4 + + ret void +} + +declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #1 +declare %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer"(i32, %"class.RWStructuredBuffer") #1 +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } + +!dx.version = !{!3} + +!3 = !{i32 1, i32 9} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll new file mode 100644 index 0000000000..9734b85b12 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll @@ -0,0 +1,745 @@ +; RUN: %dxopt %s -scalarizer -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWStructuredBuffer >" = type { <1 x float> } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.f32 = type { float, float, float, float, i32 } + +@"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@llvm.used = appending global [1 x i8*] [i8* bitcast (%"class.RWStructuredBuffer >"* @"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A" to i8*)], section "llvm.metadata" + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?assignments +define void @"\01?assignments@@YAXY09$$CAV?$vector@M$00@@@Z"([10 x <1 x float>]* noalias %things) #0 { +bb: + %tmp = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A" + %tmp1 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32 160, %"class.RWStructuredBuffer >" %tmp) + %tmp2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + %RawBufferLoad = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp2, i32 1, i32 0, i8 1, i32 4) + %tmp3 = extractvalue %dx.types.ResRet.f32 %RawBufferLoad, 0 + %tmp4 = insertelement <1 x float> undef, float %tmp3, i64 0 + %tmp5 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 0 + store <1 x float> %tmp4, <1 x float>* %tmp5, align 4 + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]] + ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0 + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]] + ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0 + ; CHECK: [[res1:%.*]] = fadd fast float [[val1]], [[val5]] + ; CHECK: [[vec1:%.*]] = insertelement <1 x float> undef, float [[res1]], i32 0 + ; CHECK: store <1 x float> [[vec1]], <1 x float>* [[adr1]], align 4 + %tmp6 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5 + %tmp7 = load <1 x float>, <1 x float>* %tmp6, align 4 + %tmp8 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 1 + %tmp9 = load <1 x float>, <1 x float>* %tmp8, align 4 + %tmp10 = fadd fast <1 x float> %tmp9, %tmp7 + store <1 x float> %tmp10, <1 x float>* %tmp8, align 4 + + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <1 x float>, <1 x float>* [[adr6]] + ; CHECK: [[val6:%.*]] = extractelement <1 x float> [[ld6]], i32 0 + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]] + ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0 + ; CHECK: [[res2:%.*]] = fsub fast float [[val2]], [[val6]] + ; CHECK: [[vec2:%.*]] = insertelement <1 x float> undef, float [[res2]], i32 0 + ; CHECK: store <1 x float> [[vec2]], <1 x float>* [[adr2]], align 4 + %tmp11 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 6 + %tmp12 = load <1 x float>, <1 x float>* %tmp11, align 4 + %tmp13 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2 + %tmp14 = load <1 x float>, <1 x float>* %tmp13, align 4 + %tmp15 = fsub fast <1 x float> %tmp14, %tmp12 + store <1 x float> %tmp15, <1 x float>* %tmp13, align 4 + + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load <1 x float>, <1 x float>* [[adr7]] + ; CHECK: [[val7:%.*]] = extractelement <1 x float> [[ld7]], i32 0 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]] + ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0 + ; CHECK: [[res3:%.*]] = fmul fast float [[val3]], [[val7]] + ; CHECK: [[vec3:%.*]] = insertelement <1 x float> undef, float [[res3]], i32 0 + ; CHECK: store <1 x float> [[vec3]], <1 x float>* [[adr3]], align 4 + %tmp16 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 7 + %tmp17 = load <1 x float>, <1 x float>* %tmp16, align 4 + %tmp18 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 3 + %tmp19 = load <1 x float>, <1 x float>* %tmp18, align 4 + %tmp20 = fmul fast <1 x float> %tmp19, %tmp17 + store <1 x float> %tmp20, <1 x float>* %tmp18, align 4 + + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load <1 x float>, <1 x float>* [[adr8]] + ; CHECK: [[val8:%.*]] = extractelement <1 x float> [[ld8]], i32 0 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]] + ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0 + ; CHECK: [[res4:%.*]] = fdiv fast float [[val4]], [[val8]] + ; CHECK: [[vec4:%.*]] = insertelement <1 x float> undef, float [[res4]], i32 0 + ; CHECK: store <1 x float> [[vec4]], <1 x float>* [[adr4]], align 4 + %tmp21 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 8 + %tmp22 = load <1 x float>, <1 x float>* %tmp21, align 4 + %tmp23 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 4 + %tmp24 = load <1 x float>, <1 x float>* %tmp23, align 4 + %tmp25 = fdiv fast <1 x float> %tmp24, %tmp22 + store <1 x float> %tmp25, <1 x float>* %tmp23, align 4 + + ; CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 9 + ; CHECK: [[ld9:%.*]] = load <1 x float>, <1 x float>* [[adr9]] + ; CHECK: [[val9:%.*]] = extractelement <1 x float> [[ld9]], i32 0 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]] + ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0 + ; CHECK: [[res5:%.*]] = frem fast float [[val5]], [[val9]] + ; CHECK: [[vec5:%.*]] = insertelement <1 x float> undef, float [[res5]], i32 0 + ; CHECK: store <1 x float> [[vec5]], <1 x float>* [[adr5]], align 4 + %tmp26 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 9 + %tmp27 = load <1 x float>, <1 x float>* %tmp26, align 4 + %tmp28 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5 + %tmp29 = load <1 x float>, <1 x float>* %tmp28, align 4 + %tmp30 = frem fast <1 x float> %tmp29, %tmp27 + store <1 x float> %tmp30, <1 x float>* %tmp28, align 4 + + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?arithmetic +define void @"\01?arithmetic@@YA$$BY0L@V?$vector@M$00@@Y0L@$$CAV1@@Z"([11 x <1 x float>]* noalias sret %agg.result, [11 x <1 x float>]* noalias %things) #0 { +bb: + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0 + ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]], align 4 + ; CHECK-DAG: [[zero:%.*]] = extractelement <1 x float> , i32 0 + ; CHECK-DAG: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i32 0 + ; CHECK: [[sub0:%.*]] = fsub fast float [[zero]], [[val0]] + ; CHECK: [[res0:%.*]] = insertelement <1 x float> undef, float [[sub0]], i32 0 + %tmp = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0 + %tmp1 = load <1 x float>, <1 x float>* %tmp, align 4 + %tmp2 = fsub fast <1 x float> , %tmp1 + %tmp3 = extractelement <1 x float> %tmp2, i64 0 + + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0 + ; CHECK: [[res1:%.*]] = load <1 x float>, <1 x float>* [[adr0]], align 4 + ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[res1]], i64 0 + %tmp4 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0 + %tmp5 = load <1 x float>, <1 x float>* %tmp4, align 4 + %tmp6 = extractelement <1 x float> %tmp5, i64 0 + + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]], align 4 + ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0 + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]], align 4 + ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0 + ; CHECK: [[add1:%.*]] = fadd fast float [[val1]], [[val2]] + ; CHECK: [[res1:%.*]] = insertelement <1 x float> undef, float [[add1]], i32 0 + %tmp7 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 1 + %tmp8 = load <1 x float>, <1 x float>* %tmp7, align 4 + %tmp9 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2 + %tmp10 = load <1 x float>, <1 x float>* %tmp9, align 4 + %tmp11 = fadd fast <1 x float> %tmp8, %tmp10 + %tmp12 = extractelement <1 x float> %tmp11, i64 0 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]], align 4 + ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]], align 4 + ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0 + ; CHECK: [[sub2:%.*]] = fsub fast float [[val2]], [[val3]] + ; CHECK: [[res2:%.*]] = insertelement <1 x float> undef, float [[sub2]], i32 0 + %tmp13 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2 + %tmp14 = load <1 x float>, <1 x float>* %tmp13, align 4 + %tmp15 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3 + %tmp16 = load <1 x float>, <1 x float>* %tmp15, align 4 + %tmp17 = fsub fast <1 x float> %tmp14, %tmp16 + %tmp18 = extractelement <1 x float> %tmp17, i64 0 + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]], align 4 + ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]], align 4 + ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0 + ; CHECK: [[mul3:%.*]] = fmul fast float [[val3]], [[val4]] + ; CHECK: [[res3:%.*]] = insertelement <1 x float> undef, float [[mul3]], i32 0 + %tmp19 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3 + %tmp20 = load <1 x float>, <1 x float>* %tmp19, align 4 + %tmp21 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4 + %tmp22 = load <1 x float>, <1 x float>* %tmp21, align 4 + %tmp23 = fmul fast <1 x float> %tmp20, %tmp22 + %tmp24 = extractelement <1 x float> %tmp23, i64 0 + + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]], align 4 + ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]], align 4 + ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0 + ; CHECK: [[div4:%.*]] = fdiv fast float [[val4]], [[val5]] + ; CHECK: [[res4:%.*]] = insertelement <1 x float> undef, float [[div4]], i32 0 + %tmp25 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4 + %tmp26 = load <1 x float>, <1 x float>* %tmp25, align 4 + %tmp27 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5 + %tmp28 = load <1 x float>, <1 x float>* %tmp27, align 4 + %tmp29 = fdiv fast <1 x float> %tmp26, %tmp28 + %tmp30 = extractelement <1 x float> %tmp29, i64 0 + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]], align 4 + ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <1 x float>, <1 x float>* [[adr6]], align 4 + ; CHECK: [[val6:%.*]] = extractelement <1 x float> [[ld6]], i32 0 + ; CHECK: [[rem5:%.*]] = frem fast float [[val5]], [[val6]] + ; CHECK: [[res5:%.*]] = insertelement <1 x float> undef, float [[rem5]], i32 0 + %tmp31 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5 + %tmp32 = load <1 x float>, <1 x float>* %tmp31, align 4 + %tmp33 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 6 + %tmp34 = load <1 x float>, <1 x float>* %tmp33, align 4 + %tmp35 = frem fast <1 x float> %tmp32, %tmp34 + %tmp36 = extractelement <1 x float> %tmp35, i64 0 + + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load <1 x float>, <1 x float>* [[adr7]], align 4 + ; CHECK-DAG: [[val7:%.*]] = extractelement <1 x float> [[ld7]], i32 0 + ; CHECK-DAG: [[pos1:%.*]] = extractelement <1 x float> , i32 0 + ; CHECK: [[add6:%.*]] = fadd fast float [[val7]], [[pos1]] + ; CHECK: [[res6:%.*]] = insertelement <1 x float> undef, float [[add6]], i32 0 + %tmp37 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 7 + %tmp38 = load <1 x float>, <1 x float>* %tmp37, align 4 + %tmp39 = fadd fast <1 x float> %tmp38, + store <1 x float> %tmp39, <1 x float>* %tmp37, align 4 + + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load <1 x float>, <1 x float>* [[adr8]], align 4 + ; CHECK-DAG: [[val8:%.*]] = extractelement <1 x float> [[ld8]], i32 0 + ; CHECK-DAG: [[neg1:%.*]] = extractelement <1 x float> , i32 0 + ; CHECK: [[add7:%.*]] = fadd fast float [[val8]], [[neg1]] + ; CHECK: [[res7:%.*]] = insertelement <1 x float> undef, float [[add7]], i32 0 + %tmp40 = extractelement <1 x float> %tmp38, i64 0 + %tmp41 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 8 + %tmp42 = load <1 x float>, <1 x float>* %tmp41, align 4 + %tmp43 = fadd fast <1 x float> %tmp42, + store <1 x float> %tmp43, <1 x float>* %tmp41, align 4 + + ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 9 + ; CHECK: [[ld9:%.*]] = load <1 x float>, <1 x float>* [[adr9]], align 4 + ; CHECK-DAG: [[val9:%.*]] = extractelement <1 x float> [[ld9]], i32 0 + ; CHECK-DAG: [[pos1:%.*]] = extractelement <1 x float> , i32 0 + ; CHECK: [[add8:%.*]] = fadd fast float [[val9]], [[pos1]] + ; CHECK: [[res8:%.*]] = insertelement <1 x float> undef, float [[add8]], i32 0 + %tmp44 = extractelement <1 x float> %tmp42, i64 0 + %tmp45 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 9 + %tmp46 = load <1 x float>, <1 x float>* %tmp45, align 4 + %tmp47 = fadd fast <1 x float> %tmp46, + store <1 x float> %tmp47, <1 x float>* %tmp45, align 4 + + ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 10 + ; CHECK: [[ld10:%.*]] = load <1 x float>, <1 x float>* [[adr10]], align 4 + ; CHECK-DAG: [[val10:%.*]] = extractelement <1 x float> [[ld10]], i32 0 + ; CHECK-DAG: [[neg1:%.*]] = extractelement <1 x float> , i32 0 + ; CHECK: [[add9:%.*]] = fadd fast float [[val10]], [[neg1]] + ; CHECK: [[res9:%.*]] = insertelement <1 x float> undef, float [[add9]], i32 0 + %tmp48 = extractelement <1 x float> %tmp47, i64 0 + %tmp49 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 10 + %tmp50 = load <1 x float>, <1 x float>* %tmp49, align 4 + %tmp51 = fadd fast <1 x float> %tmp50, + store <1 x float> %tmp51, <1 x float>* %tmp49, align 4 + + %tmp52 = extractelement <1 x float> %tmp51, i64 0 + %tmp53 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 0 + %insert20 = insertelement <1 x float> undef, float %tmp3, i64 0 + store <1 x float> %insert20, <1 x float>* %tmp53 + %tmp54 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 1 + %insert18 = insertelement <1 x float> undef, float %tmp6, i64 0 + store <1 x float> %insert18, <1 x float>* %tmp54 + %tmp55 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 2 + %insert16 = insertelement <1 x float> undef, float %tmp12, i64 0 + store <1 x float> %insert16, <1 x float>* %tmp55 + %tmp56 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 3 + %insert14 = insertelement <1 x float> undef, float %tmp18, i64 0 + store <1 x float> %insert14, <1 x float>* %tmp56 + %tmp57 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 4 + %insert12 = insertelement <1 x float> undef, float %tmp24, i64 0 + store <1 x float> %insert12, <1 x float>* %tmp57 + %tmp58 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 5 + %insert10 = insertelement <1 x float> undef, float %tmp30, i64 0 + store <1 x float> %insert10, <1 x float>* %tmp58 + %tmp59 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 6 + %insert8 = insertelement <1 x float> undef, float %tmp36, i64 0 + store <1 x float> %insert8, <1 x float>* %tmp59 + %tmp60 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 7 + %insert6 = insertelement <1 x float> undef, float %tmp40, i64 0 + store <1 x float> %insert6, <1 x float>* %tmp60 + %tmp61 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 8 + %insert4 = insertelement <1 x float> undef, float %tmp44, i64 0 + store <1 x float> %insert4, <1 x float>* %tmp61 + %tmp62 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 9 + %insert2 = insertelement <1 x float> undef, float %tmp48, i64 0 + store <1 x float> %insert2, <1 x float>* %tmp62 + %tmp63 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 10 + %insert = insertelement <1 x float> undef, float %tmp52, i64 0 + store <1 x float> %insert, <1 x float>* %tmp63 + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?logic +define void @"\01?logic@@YA$$BY09_NY09_NY09V?$vector@M$00@@@Z"([10 x i32]* noalias sret %agg.result, [10 x i32]* %truth, [10 x <1 x float>]* %consequences) #0 { +bb: + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0 + ; CHECK: [[ld0:%.*]] = load i32, i32* [[adr0]], align 4 + ; CHECK: [[cmp0:%.*]] = icmp ne i32 [[ld0]], 0 + ; CHECK: [[bres0:%.*]] = xor i1 [[cmp0]], true + ; CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32 + %tmp = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = icmp ne i32 %tmp1, 0 + %tmp3 = xor i1 %tmp2, true + %tmp4 = zext i1 %tmp3 to i32 + + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]], align 4 + ; CHECK: [[cmp1:%.*]] = icmp ne i32 [[ld1]], 0 + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4 + ; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0 + ; CHECK: [[bres1:%.*]] = or i1 [[cmp1]], [[cmp2]] + ; CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32 + %tmp5 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1 + %tmp6 = load i32, i32* %tmp5, align 4 + %tmp7 = icmp ne i32 %tmp6, 0 + %tmp9 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + %tmp10 = load i32, i32* %tmp9, align 4 + %tmp11 = icmp ne i32 %tmp10, 0 + %tmp13 = or i1 %tmp7, %tmp11 + %tmp14 = zext i1 %tmp13 to i32 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4 + ; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4 + ; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0 + ; CHECK: [[bres2:%.*]] = and i1 [[cmp2]], [[cmp3]] + ; CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32 + %tmp15 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + %tmp16 = load i32, i32* %tmp15, align 4 + %tmp17 = icmp ne i32 %tmp16, 0 + %tmp19 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + %tmp20 = load i32, i32* %tmp19, align 4 + %tmp21 = icmp ne i32 %tmp20, 0 + %tmp23 = and i1 %tmp17, %tmp21 + %tmp24 = zext i1 %tmp23 to i32 + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4 + ; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4 + ; CHECK: [[cmp4:%.*]] = icmp ne i32 [[ld4]], 0 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4 + ; CHECK: [[cmp5:%.*]] = icmp ne i32 [[ld5]], 0 + ; CHECK: [[bres3:%.*]] = select i1 [[cmp3]], i1 [[cmp4]], i1 [[cmp5]] + ; CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32 + %tmp25 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + %tmp26 = load i32, i32* %tmp25, align 4 + %tmp27 = icmp ne i32 %tmp26, 0 + %tmp29 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4 + %tmp30 = load i32, i32* %tmp29, align 4 + %tmp31 = icmp ne i32 %tmp30, 0 + %tmp32 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5 + %tmp33 = load i32, i32* %tmp32, align 4 + %tmp34 = icmp ne i32 %tmp33, 0 + %tmp35 = select i1 %tmp27, i1 %tmp31, i1 %tmp34 + %tmp36 = zext i1 %tmp35 to i32 + + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 0 + ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]] + ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i32 0 + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]] + ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0 + ; CHECK: [[bres4:%.*]] = fcmp fast oeq float [[val0]], [[val1]] + ; CHECK: [[res4:%.*]] = zext i1 [[bres4]] to i32 + %tmp37 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 0 + %tmp38 = load <1 x float>, <1 x float>* %tmp37, align 4 + %tmp39 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1 + %tmp40 = load <1 x float>, <1 x float>* %tmp39, align 4 + %tmp41 = fcmp fast oeq <1 x float> %tmp38, %tmp40 + %tmp42 = extractelement <1 x i1> %tmp41, i64 0 + %tmp43 = zext i1 %tmp42 to i32 + + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]] + ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0 + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]] + ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0 + ; CHECK: [[bres5:%.*]] = fcmp fast une float [[val1]], [[val2]] + ; CHECK: [[res5:%.*]] = zext i1 [[bres5]] to i32 + %tmp44 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1 + %tmp45 = load <1 x float>, <1 x float>* %tmp44, align 4 + %tmp46 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2 + %tmp47 = load <1 x float>, <1 x float>* %tmp46, align 4 + %tmp48 = fcmp fast une <1 x float> %tmp45, %tmp47 + %tmp49 = extractelement <1 x i1> %tmp48, i64 0 + %tmp50 = zext i1 %tmp49 to i32 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]] + ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]] + ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0 + ; CHECK: [[bres6:%.*]] = fcmp fast olt float [[val2]], [[val3]] + ; CHECK: [[res6:%.*]] = zext i1 [[bres6]] to i32 + %tmp51 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2 + %tmp52 = load <1 x float>, <1 x float>* %tmp51, align 4 + %tmp53 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3 + %tmp54 = load <1 x float>, <1 x float>* %tmp53, align 4 + %tmp55 = fcmp fast olt <1 x float> %tmp52, %tmp54 + %tmp56 = extractelement <1 x i1> %tmp55, i64 0 + %tmp57 = zext i1 %tmp56 to i32 + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]] + ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]] + ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0 + ; CHECK: [[bres7:%.*]] = fcmp fast ogt float [[val3]], [[val4]] + ; CHECK: [[res7:%.*]] = zext i1 [[bres7]] to i32 + %tmp58 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3 + %tmp59 = load <1 x float>, <1 x float>* %tmp58, align 4 + %tmp60 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4 + %tmp61 = load <1 x float>, <1 x float>* %tmp60, align 4 + %tmp62 = fcmp fast ogt <1 x float> %tmp59, %tmp61 + %tmp63 = extractelement <1 x i1> %tmp62, i64 0 + %tmp64 = zext i1 %tmp63 to i32 + + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]] + ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]] + ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0 + ; CHECK: [[bres8:%.*]] = fcmp fast ole float [[val4]], [[val5]] + ; CHECK: [[res8:%.*]] = zext i1 [[bres8]] to i32 + %tmp65 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4 + %tmp66 = load <1 x float>, <1 x float>* %tmp65, align 4 + %tmp67 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5 + %tmp68 = load <1 x float>, <1 x float>* %tmp67, align 4 + %tmp69 = fcmp fast ole <1 x float> %tmp66, %tmp68 + %tmp70 = extractelement <1 x i1> %tmp69, i64 0 + %tmp71 = zext i1 %tmp70 to i32 + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]] + ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load <1 x float>, <1 x float>* [[adr6]] + ; CHECK: [[val6:%.*]] = extractelement <1 x float> [[ld6]], i32 0 + ; CHECK: [[bres9:%.*]] = fcmp fast oge float [[val5]], [[val6]] + ; CHECK: [[res9:%.*]] = zext i1 [[bres9]] to i32 + %tmp72 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5 + %tmp73 = load <1 x float>, <1 x float>* %tmp72, align 4 + %tmp74 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 6 + %tmp75 = load <1 x float>, <1 x float>* %tmp74, align 4 + %tmp76 = fcmp fast oge <1 x float> %tmp73, %tmp75 + %tmp77 = extractelement <1 x i1> %tmp76, i64 0 + %tmp78 = zext i1 %tmp77 to i32 + + %tmp79 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 0 + store i32 %tmp4, i32* %tmp79 + %tmp80 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 1 + store i32 %tmp14, i32* %tmp80 + %tmp81 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 2 + store i32 %tmp24, i32* %tmp81 + %tmp82 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 3 + store i32 %tmp36, i32* %tmp82 + %tmp83 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 4 + store i32 %tmp43, i32* %tmp83 + %tmp84 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 5 + store i32 %tmp50, i32* %tmp84 + %tmp85 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 6 + store i32 %tmp57, i32* %tmp85 + %tmp86 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 7 + store i32 %tmp64, i32* %tmp86 + %tmp87 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 8 + store i32 %tmp71, i32* %tmp87 + %tmp88 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 9 + store i32 %tmp78, i32* %tmp88 + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?index +define void @"\01?index@@YA$$BY09V?$vector@M$00@@Y09V1@H@Z"([10 x <1 x float>]* noalias sret %agg.result, [10 x <1 x float>]* %things, i32 %i) #0 { +bb: + ; CHECK: %res.0 = alloca [10 x float] + %res.0 = alloca [10 x float] + + ; CHECK: [[adr0:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 0 + ; CHECK: store float 0.000000e+00, float* [[adr0]] + %tmp1 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 0 + store float 0.000000e+00, float* %tmp1 + + ; CHECK: [[adri:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 %i + ; CHECK: store float 1.000000e+00, float* [[adri]] + %tmp2 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 %i + store float 1.000000e+00, float* %tmp2 + + ; CHECK: [[adr2:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 2 + ; CHECK: store float 2.000000e+00, float* [[adr2]] + %tmp3 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 2 + store float 2.000000e+00, float* %tmp3 + + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 0 + ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]] + ; CHECK: [[adr3:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 3 + ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i64 0 + ; CHECK: store float [[val0]], float* [[adr3]] + %tmp4 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 0 + %tmp5 = load <1 x float>, <1 x float>* %tmp4, align 4 + %tmp6 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 3 + %tmp7 = extractelement <1 x float> %tmp5, i64 0 + store float %tmp7, float* %tmp6 + + ; CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 %i + ; CHECK: [[ldi:%.*]] = load <1 x float>, <1 x float>* [[adri]] + ; CHECK: [[adr4:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 4 + ; CHECK: [[vali:%.*]] = extractelement <1 x float> [[ldi]], i64 0 + ; CHECK: store float [[vali]], float* [[adr4]] + %tmp8 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 %i + %tmp9 = load <1 x float>, <1 x float>* %tmp8, align 4 + %tmp10 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 4 + %tmp11 = extractelement <1 x float> %tmp9, i64 0 + store float %tmp11, float* %tmp10 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]] + ; CHECK: [[adr5:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 5 + ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i64 0 + ; CHECK: store float [[val2]], float* [[adr5]] + %tmp12 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2 + %tmp13 = load <1 x float>, <1 x float>* %tmp12, align 4 + %tmp14 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 5 + %tmp15 = extractelement <1 x float> %tmp13, i64 0 + store float %tmp15, float* %tmp14 + + %tmp16 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 0 + %tmp17 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 0 + %load17 = load float, float* %tmp17 + %insert18 = insertelement <1 x float> undef, float %load17, i64 0 + store <1 x float> %insert18, <1 x float>* %tmp16 + + %tmp18 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 1 + %tmp19 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 1 + %load15 = load float, float* %tmp19 + %insert16 = insertelement <1 x float> undef, float %load15, i64 0 + store <1 x float> %insert16, <1 x float>* %tmp18 + + %tmp20 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 2 + %tmp21 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 2 + %load13 = load float, float* %tmp21 + %insert14 = insertelement <1 x float> undef, float %load13, i64 0 + store <1 x float> %insert14, <1 x float>* %tmp20 + + %tmp22 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 3 + %tmp23 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 3 + %load11 = load float, float* %tmp23 + %insert12 = insertelement <1 x float> undef, float %load11, i64 0 + store <1 x float> %insert12, <1 x float>* %tmp22 + + %tmp24 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 4 + %tmp25 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 4 + %load9 = load float, float* %tmp25 + %insert10 = insertelement <1 x float> undef, float %load9, i64 0 + store <1 x float> %insert10, <1 x float>* %tmp24 + + %tmp26 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 5 + %tmp27 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 5 + %load7 = load float, float* %tmp27 + %insert8 = insertelement <1 x float> undef, float %load7, i64 0 + store <1 x float> %insert8, <1 x float>* %tmp26 + + %tmp28 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 6 + %tmp29 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 6 + %load5 = load float, float* %tmp29 + %insert6 = insertelement <1 x float> undef, float %load5, i64 0 + store <1 x float> %insert6, <1 x float>* %tmp28 + + %tmp30 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 7 + %tmp31 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 7 + %load3 = load float, float* %tmp31 + %insert4 = insertelement <1 x float> undef, float %load3, i64 0 + store <1 x float> %insert4, <1 x float>* %tmp30 + + %tmp32 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 8 + %tmp33 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 8 + %load1 = load float, float* %tmp33 + %insert2 = insertelement <1 x float> undef, float %load1, i64 0 + store <1 x float> %insert2, <1 x float>* %tmp32 + + %tmp34 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 9 + %tmp35 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 9 + %load = load float, float* %tmp35 + %insert = insertelement <1 x float> undef, float %load, i64 0 + store <1 x float> %insert, <1 x float>* %tmp34 + + ret void +} + +; Function Attrs: nounwind +; CHECK-LABEL: define void @"\01?bittwiddlers +define void @"\01?bittwiddlers@@YAXY0L@$$CAI@Z"([11 x i32]* noalias %things) #0 { +bb: + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1 + ; CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]], align 4 + ; CHECK: [[res0:%.*]] = xor i32 [[ld1]], -1 + ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 0 + ; CHECK: store i32 [[res0]], i32* [[adr0]], align 4 + %tmp = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = xor i32 %tmp1, -1 + %tmp3 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 0 + store i32 %tmp2, i32* %tmp3, align 4 + + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2 + ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4 + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4 + ; CHECK: [[res1:%.*]] = or i32 [[ld2]], [[ld3]] + ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1 + ; CHECK: store i32 [[res1]], i32* [[adr1]], align 4 + %tmp4 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2 + %tmp5 = load i32, i32* %tmp4, align 4 + %tmp6 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + %tmp7 = load i32, i32* %tmp6, align 4 + %tmp8 = or i32 %tmp5, %tmp7 + %tmp9 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1 + store i32 %tmp8, i32* %tmp9, align 4 + + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4 + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4 + ; CHECK: [[res2:%.*]] = and i32 [[ld3]], [[ld4]] + ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2 + ; CHECK: store i32 [[res2]], i32* [[adr2]], align 4 + %tmp10 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + %tmp11 = load i32, i32* %tmp10, align 4 + %tmp12 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + %tmp13 = load i32, i32* %tmp12, align 4 + %tmp14 = and i32 %tmp11, %tmp13 + %tmp15 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2 + store i32 %tmp14, i32* %tmp15, align 4 + + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4 + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4 + ; CHECK: [[res3:%.*]] = xor i32 [[ld4]], [[ld5]] + ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + ; CHECK: store i32 [[res3]], i32* [[adr3]], align 4 + %tmp16 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + %tmp17 = load i32, i32* %tmp16, align 4 + %tmp18 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + %tmp19 = load i32, i32* %tmp18, align 4 + %tmp20 = xor i32 %tmp17, %tmp19 + %tmp21 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + store i32 %tmp20, i32* %tmp21, align 4 + + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]], align 4 + ; CHECK: [[and4:%.*]] = and i32 [[ld6]], 31 + ; CHECK: [[res4:%.*]] = shl i32 [[ld5]], [[and4]] + ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + ; CHECK: store i32 [[res4]], i32* [[adr4]], align 4 + %tmp22 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + %tmp23 = load i32, i32* %tmp22, align 4 + %tmp24 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + %tmp25 = load i32, i32* %tmp24, align 4 + %tmp26 = and i32 %tmp25, 31 + %tmp27 = shl i32 %tmp23, %tmp26 + %tmp28 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + store i32 %tmp27, i32* %tmp28, align 4 + + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]], align 4 + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]], align 4 + ; CHECK: [[and5:%.*]] = and i32 [[ld7]], 31 + ; CHECK: [[res5:%.*]] = lshr i32 [[ld6]], [[and5]] + ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + ; CHECK: store i32 [[res5]], i32* [[adr5]], align 4 + %tmp29 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + %tmp30 = load i32, i32* %tmp29, align 4 + %tmp31 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7 + %tmp32 = load i32, i32* %tmp31, align 4 + %tmp33 = and i32 %tmp32, 31 + %tmp34 = lshr i32 %tmp30, %tmp33 + %tmp35 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + store i32 %tmp34, i32* %tmp35, align 4 + + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]], align 4 + ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + ; CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]], align 4 + ; CHECK: [[res6:%.*]] = or i32 [[ld6]], [[ld8]] + ; CHECK: store i32 [[res6]], i32* [[adr6]], align 4 + %tmp36 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8 + %tmp37 = load i32, i32* %tmp36, align 4 + %tmp38 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + %tmp39 = load i32, i32* %tmp38, align 4 + %tmp40 = or i32 %tmp39, %tmp37 + store i32 %tmp40, i32* %tmp38, align 4 + + ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 9 + ; CHECK: [[ld9:%.*]] = load i32, i32* [[adr9]], align 4 + ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7 + ; CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]], align 4 + ; CHECK: [[res7:%.*]] = and i32 [[ld7]], [[ld9]] + ; CHECK: store i32 [[res7]], i32* [[adr7]], align 4 + %tmp41 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 9 + %tmp42 = load i32, i32* %tmp41, align 4 + %tmp43 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7 + %tmp44 = load i32, i32* %tmp43, align 4 + %tmp45 = and i32 %tmp44, %tmp42 + store i32 %tmp45, i32* %tmp43, align 4 + + ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 10 + ; CHECK: [[ld10:%.*]] = load i32, i32* [[adr10]], align 4 + ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8 + ; CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]], align 4 + ; CHECK: [[res8:%.*]] = xor i32 [[ld8]], [[ld10]] + ; CHECK: store i32 [[res8]], i32* [[adr8]], align 4 + %tmp46 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 10 + %tmp47 = load i32, i32* %tmp46, align 4 + %tmp48 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8 + %tmp49 = load i32, i32* %tmp48, align 4 + %tmp50 = xor i32 %tmp49, %tmp47 + store i32 %tmp50, i32* %tmp48, align 4 + + ret void +} + +declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #2 +declare %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer >"(i32, %"class.RWStructuredBuffer >") #2 +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!3} +!3 = !{i32 1, i32 9} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl new file mode 100644 index 0000000000..66382af2d5 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl @@ -0,0 +1,425 @@ +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float1 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int1 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=double1 -DDBL %s | FileCheck %s --check-prefixes=CHECK +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=uint64_t1 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float16_t1 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int16_t1 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL + +// Mainly a source for the vec1 scalarizer IR test. +// Serves to verify some codegen as well. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// Need to capture once for the full vector type, again for the element type. +// CHECK-DAG: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:<[0-9]* x [a-z0-9_]*>]] } +// CHECK-DAG: %"class.RWStructuredBuffer<{{.*}}>" = type { <{{[0-9]*}} x [[ELTY:[a-z0-9_]*]]> } +RWStructuredBuffer buf; + +export void assignments(inout TYPE things[10], TYPE scales[10]); +export TYPE arithmetic(inout TYPE things[11])[11]; +export bool logic(bool truth[10], TYPE consequences[10])[10]; +export TYPE index(TYPE things[10], int i, TYPE val)[10]; + +// Test assignment operators. +// CHECK-LABEL: define void @"\01?assignments +export void assignments(inout TYPE things[10]) { + + // CHECK: [[res0:%.*]] = call [[TYPE]] @"dx.hl.op.ro.[[TYPE]] (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle {{%.*}}, i32 1) + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]] + things[0] = buf.Load(1); + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]] [[TYPE]] [[vec1]], [[vec5]] + // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]] + things[1] += things[5]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 6 + // CHECK: [[vec6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]] [[TYPE]] [[vec2]], [[vec6]] + // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]] + things[2] -= things[6]; + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 7 + // CHECK: [[vec7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]] [[TYPE]] [[vec3]], [[vec7]] + // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]] + things[3] *= things[7]; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 8 + // CHECK: [[vec8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]] [[TYPE]] [[vec4]], [[vec8]] + // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]] + things[4] /= things[8]; + +#ifndef DBL + // NODBL: [[adr9:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 9 + // NODBL: [[vec9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]] + // NODBL: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5 + // NODBL: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[TYPE]] [[vec5]], [[vec9]] + // NODBL: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]] + things[5] %= things[9]; +#endif +} + +// Test arithmetic operators. +// CHECK-LABEL: define void @"\01?arithmetic +export TYPE arithmetic(inout TYPE things[11])[11] { + TYPE res[11]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: [[res1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[res0:%.*]] = [[SUB]] [[TYPE]] + res[0] = -things[0]; + res[1] = +things[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[res2:%.*]] = [[ADD]] [[TYPE]] [[vec1]], [[vec2]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 2 + // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]] + res[2] = things[1] + things[2]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[res3:%.*]] = [[SUB]] [[TYPE]] [[vec2]], [[vec3]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 3 + // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]] + res[3] = things[2] - things[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[res4:%.*]] = [[MUL]] [[TYPE]] [[vec3]], [[vec4]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 4 + // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]] + res[4] = things[3] * things[4]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[res5:%.*]] = [[DIV]] [[TYPE]] [[vec4]], [[vec5]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 5 + // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]] + res[5] = things[4] / things[5]; + +#ifndef DBL + // NODBL: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5 + // NODBL: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6 + // NODBL: [[vec6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[vec5]], [[vec6]] + // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 6 + // NODBL: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7 + // CHECK: [[vec7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]] + // CHECK: [[res7:%.*]] = [[ADD]] [[TYPE]] [[vec7]], <[[ELTY]] [[POS1:(1|1\.0*e\+0*|0xH3C00)]]> + // CHECK: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]] + // This is a post op, so the original value goes into res[]. + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 7 + // CHECK: store [[TYPE]] [[vec7]], [[TYPE]]* [[adr7]] + res[7] = things[7]++; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8 + // CHECK: [[vec8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]] + // CHECK: [[res8:%.*]] = [[ADD]] [[TYPE]] [[vec8]] + // CHECK: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]] + // This is a post op, so the original value goes into res[]. + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 8 + // CHECK: store [[TYPE]] [[vec8]], [[TYPE]]* [[adr8]] + res[8] = things[8]--; + + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9 + // CHECK: [[vec9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]] + // CHECK: [[res9:%.*]] = [[ADD]] [[TYPE]] [[vec9]] + // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]] + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 9 + // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]] + res[9] = ++things[9]; + + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10 + // CHECK: [[vec10:%.*]] = load [[TYPE]], [[TYPE]]* [[adr10]] + // CHECK: [[res10:%.*]] = [[ADD]] [[TYPE]] [[vec10]] + // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]] + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 10 + // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]] + res[10] = --things[10]; + + // Memcpy res into return value. + // CHECK: [[retptr:%.*]] = bitcast [11 x [[TYPE]]]* %agg.result to i8* + // CHECK: [[resptr:%.*]] = bitcast [11 x [[TYPE]]]* %res to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]] + // CHECK: ret void + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +// CHECK-LABEL: define void @"\01?logic +export bool logic(bool truth[10], TYPE consequences[10])[10] { + bool res[10]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0 + // CHECK: [[vec0:%.*]] = load i32, i32* [[adr0]] + // CHECK: [[bvec0:%.*]] = icmp ne i32 [[vec0]], 0 + // CHECK: [[bres0:%.*]] = xor i1 [[bvec0]], true + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 0 + // CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32 + // CHECK: store i32 [[res0]], i32* [[adr0]] + res[0] = !truth[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load i32, i32* [[adr1]] + // CHECK: [[bvec1:%.*]] = icmp ne i32 [[vec1]], 0 + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load i32, i32* [[adr2]] + // CHECK: [[bvec2:%.*]] = icmp ne i32 [[vec2]], 0 + // CHECK: [[bres1:%.*]] = or i1 [[bvec1]], [[bvec2]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 1 + // CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32 + // CHECK: store i32 [[res1]], i32* [[adr1]] + res[1] = truth[1] || truth[2]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load i32, i32* [[adr2]] + // CHECK: [[bvec2:%.*]] = icmp ne i32 [[vec2]], 0 + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load i32, i32* [[adr3]] + // CHECK: [[bvec3:%.*]] = icmp ne i32 [[vec3]], 0 + // CHECK: [[bres2:%.*]] = and i1 [[bvec2]], [[bvec3]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 2 + // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32 + // CHECK: store i32 [[res2]], i32* [[adr2]] + res[2] = truth[2] && truth[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load i32, i32* [[adr3]] + // CHECK: [[bvec3:%.*]] = icmp ne i32 [[vec3]], 0 + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load i32, i32* [[adr4]] + // CHECK: [[bvec4:%.*]] = icmp ne i32 [[vec4]], 0 + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load i32, i32* [[adr5]] + // CHECK: [[bvec5:%.*]] = icmp ne i32 [[vec5]], 0 + // CHECK: [[bres3:%.*]] = select i1 [[bvec3]], i1 [[bvec4]], i1 [[bvec5]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 3 + // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32 + // CHECK: store i32 [[res3]], i32* [[adr3]] + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 0 + // CHECK: [[vec0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[TYPE]] [[vec0]], [[vec1]] + // CHECK: [[bres4:%.*]] = extractelement <1 x i1> [[cmp4]], i64 0 + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 4 + // CHECK: [[res4:%.*]] = zext i1 [[bres4]] to i32 + // CHECK: store i32 [[res4]], i32* [[adr4]] + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[vec1]], [[vec2]] + // CHECK: [[bres5:%.*]] = extractelement <1 x i1> [[cmp5]], i64 0 + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 5 + // CHECK: [[res5:%.*]] = zext i1 [[bres5]] to i32 + // CHECK: store i32 [[res5]], i32* [[adr5]] + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[vec2]], [[vec3]] + // CHECK: [[bres6:%.*]] = extractelement <1 x i1> [[cmp6]], i64 0 + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 6 + // CHECK: [[res6:%.*]] = zext i1 [[bres6]] to i32 + // CHECK: store i32 [[res6]], i32* [[adr6]] + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[vec3]], [[vec4]] + // CHECK: [[bres7:%.*]] = extractelement <1 x i1> [[cmp7]], i64 0 + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 7 + // CHECK: [[res7:%.*]] = zext i1 [[bres7]] to i32 + // CHECK: store i32 [[res7]], i32* [[adr7]] + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[vec4]], [[vec5]] + // CHECK: [[bres8:%.*]] = extractelement <1 x i1> [[cmp8]], i64 0 + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 8 + // CHECK: [[res8:%.*]] = zext i1 [[bres8]] to i32 + // CHECK: store i32 [[res8]], i32* [[adr8]] + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 6 + // CHECK: [[vec6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]] + // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[vec5]], [[vec6]] + // CHECK: [[bres9:%.*]] = extractelement <1 x i1> [[cmp9]], i64 0 + // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 9 + // CHECK: [[res9:%.*]] = zext i1 [[bres9]] to i32 + // CHECK: store i32 [[res9]], i32* [[adr9]] + res[9] = consequences[5] >= consequences[6]; + + // Memcpy res into return value. + // CHECK: [[retptr:%.*]] = bitcast [10 x i32]* %agg.result to i8* + // CHECK: [[resptr:%.*]] = bitcast [10 x i32]* %res to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]] + // CHECK: ret void + return res; +} + +static const int Ix = 2; + +// Test indexing operators +// CHECK-LABEL: define void @"\01?index +export TYPE index(TYPE things[10], int i)[10] { + // CHECK: [[res:%.*]] = alloca [10 x [[TYPE]]] + // CHECK: store i32 %i, i32* [[iadd:%.[0-9]*]] + TYPE res[10]; + + // CHECK: [[res0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 0 + // CHECK: store [[TYPE]] zeroinitializer, [[TYPE]]* [[res0]] + res[0] = 0; + + // CHECK: [[i:%.*]] = load i32, i32* [[iadd]] + // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 [[i]] + // CHECK: store [[TYPE]] <[[ELTY]] {{(1|1\.0*e\+0*|0xH3C00).*}}>, [[TYPE]]* [[adri]] + res[i] = 1; + + // CHECK: [[res2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 2 + // CHECK: store [[TYPE]] <[[ELTY]] {{(2|2\.0*e\+0*|0xH4000).*}}>, [[TYPE]]* [[res2]] + res[Ix] = 2; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0 + // CHECK: [[thg0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] + // CHECK: [[res3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 3 + // CHECK: store [[TYPE]] [[thg0]], [[TYPE]]* [[res3]] + res[3] = things[0]; + + // CHECK: [[i:%.*]] = load i32, i32* [[iadd]] + // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 [[i]] + // CHECK: [[thgi:%.*]] = load [[TYPE]], [[TYPE]]* [[adri]] + // CHECK: [[res4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 4 + // CHECK: store [[TYPE]] [[thgi]], [[TYPE]]* [[res4]] + res[4] = things[i]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2 + // CHECK: [[thg2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]] + // CHECK: [[res5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 5 + // CHECK: store [[TYPE]] [[thg2]], [[TYPE]]* [[res5]] + res[5] = things[Ix]; + // CHECK: ret void + return res; +} + +// Test bit twiddling operators. +// INT-LABEL: define void @"\01?bittwiddlers +export void bittwiddlers(inout uint things[11]) { + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1 + // CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]] + // CHECK: [[res1:%.*]] = xor i32 [[ld1]], -1 + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 0 + // CHECK: store i32 [[res1]], i32* [[adr0]] + things[0] = ~things[1]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2 + // CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + // CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]] + // CHECK: [[res1:%.*]] = or i32 [[ld2]], [[ld3]] + // CHECK: store i32 [[res1]], i32* [[adr1]] + things[1] = things[2] | things[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + // CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + // CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]] + // CHECK: [[res2:%.*]] = and i32 [[ld3]], [[ld4]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2 + // CHECK: store i32 [[res2]], i32* [[adr2]] + things[2] = things[3] & things[4]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + // CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + // CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]] + // CHECK: [[res3:%.*]] = xor i32 [[ld4]], [[ld5]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3 + // CHECK: store i32 [[res3]], i32* [[adr3]] + things[3] = things[4] ^ things[5]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + // CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]] + // CHECK: [[shv6:%.*]] = and i32 [[ld6]], 31 + // CHECK: [[res4:%.*]] = shl i32 [[ld5]], [[shv6]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4 + // CHECK: store i32 [[res4]], i32* [[adr4]] + things[4] = things[5] << things[6]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7 + // CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]] + // CHECK: [[shv7:%.*]] = and i32 [[ld7]], 31 + // CHECK: [[res5:%.*]] = lshr i32 [[ld6]], [[shv7]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5 + // CHECK: store i32 [[res5]], i32* [[adr5]] + things[5] = things[6] >> things[7]; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8 + // CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]] + // CHECK: [[res6:%.*]] = or i32 [[ld6]], [[ld8]] + // CHECK: store i32 [[res6]], i32* [[adr6]] + things[6] |= things[8]; + + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 9 + // CHECK: [[ld9:%.*]] = load i32, i32* [[adr9]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7 + // CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]] + // CHECK: [[res7:%.*]] = and i32 [[ld7]], [[ld9]] + // CHECK: store i32 [[res7]], i32* [[adr7]] + things[7] &= things[9]; + + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 10 + // CHECK: [[ld10:%.*]] = load i32, i32* [[adr10]] + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8 + // CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]] + // CHECK: [[res8:%.*]] = xor i32 [[ld8]], [[ld10]] + // CHECK: store i32 [[res8]], i32* [[adr8]] + things[8] ^= things[10]; + + // CHECK: ret void +} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl new file mode 100644 index 0000000000..2c2ef01b8a --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl @@ -0,0 +1,420 @@ +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=4 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int -DNUM=7 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=double -DNUM=16 -DDBL %s | FileCheck %s --check-prefixes=CHECK +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float16_t -DNUM=34 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int16_t -DNUM=129 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL + +// Mainly a source for the longvec scalarizer IR test. +// Serves to verify some codegen as well. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// CHECK: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:[a-z0-9]*]] } +// CHECK: external global {{\[}}[[NUM:[0-9]*]] x %"class.RWStructuredBuffer +RWStructuredBuffer buf[NUM]; + + +// Test assignment operators. +// CHECK-LABEL: define void @"\01?assignments +export void assignments(inout vector things[10]) { + + // CHECK: [[res0:%.*]] = call [[TYPE]] @"dx.hl.op.ro.[[TYPE]] (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle {{%.*}}, i32 1) + // CHECK: [[vec0:%.*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[res0]], i32 0 + // CHECK: [[res0:%.*]] = shufflevector <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[adr0]] + things[0] = buf[0].Load(1); + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]] + // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec5]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr1]] + things[1] += things[5]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]] + things[2] -= things[6]; + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]] + // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec7]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]] + things[3] *= things[7]; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]] + // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]] + things[4] /= things[8]; + +#ifndef DBL + // NODBL: [[adr9:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9 + // NODBL: [[vec9:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]] + // NODBL: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // NODBL: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]] + // NODBL: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]] + things[5] %= things[9]; +#endif +} + +// Test arithmetic operators. +// CHECK-LABEL: define void @"\01?arithmetic +export vector arithmetic(inout vector things[11])[11] { + vector res[11]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: [[res1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]] + // CHECK: [[res0:%.*]] = [[SUB]] <[[NUM]] x [[TYPE]]> + res[0] = -things[0]; + res[1] = +things[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + // CHECK: [[res2:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]] + res[2] = things[1] + things[2]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]] + // CHECK: [[res3:%.*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]] + res[3] = things[2] - things[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]] + // CHECK: [[res4:%.*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]] + res[4] = things[3] * things[4]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // CHECK: [[res5:%.*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]] + res[5] = things[4] / things[5]; + +#ifndef DBL + // NODBL: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // NODBL: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // NODBL: [[vec6:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]] + // NODBL: [[res6:%.*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 6 + // NODBL: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[adr6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]] + // CHECK: [[res7:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]], <[[TYPE]] [[POS1:(1|1\.0*e\+0*|0xH3C00)]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[adr7]] + // This is a post op, so the original value goes into res[]. + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 7 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[adr7]] + res[7] = things[7]++; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]] + // CHECK: [[res8:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[adr8]] + // This is a post op, so the original value goes into res[]. + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 8 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[adr8]] + res[8] = things[8]--; + + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9 + // CHECK: [[vec9:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]] + // CHECK: [[res9:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[adr9]] + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 9 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[adr9]] + res[9] = ++things[9]; + + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10 + // CHECK: [[vec10:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr10]] + // CHECK: [[res10:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[adr10]] + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 10 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[adr10]] + res[10] = --things[10]; + + // Memcpy res into return value. + // CHECK: [[retptr:%.*]] = bitcast [11 x <[[NUM]] x [[TYPE]]>]* %agg.result to i8* + // CHECK: [[resptr:%.*]] = bitcast [11 x <[[NUM]] x [[TYPE]]>]* %res to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]] + // CHECK: ret void + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +// CHECK-LABEL: define void @"\01?logic +export vector logic(vector truth[10], vector consequences[10])[10] { + vector res[10]; + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0 + // CHECK: [[vec0:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr0]] + // CHECK: [[bvec0:%.*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[bres0:%.*]] = icmp eq <[[NUM]] x i1> [[bvec0]], zeroinitializer + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 0 + // CHECK: [[res0:%.*]] = zext <[[NUM]] x i1> [[bres0]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[adr0]] + res[0] = !truth[0]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr1]] + // CHECK: [[bvec1:%.*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr2]] + // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[val1:%.*]] = icmp ne <[[NUM]] x i1> [[bvec1]], zeroinitializer + // CHECK: [[val2:%.*]] = icmp ne <[[NUM]] x i1> [[bvec2]], zeroinitializer + // CHECK: [[bres1:%.*]] = or <[[NUM]] x i1> [[val1]], [[val2]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 1 + // CHECK: [[res1:%.*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[adr1]] + res[1] = truth[1] || truth[2]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr2]] + // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]] + // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[val2:%.*]] = icmp ne <[[NUM]] x i1> [[bvec2]], zeroinitializer + // CHECK: [[val3:%.*]] = icmp ne <[[NUM]] x i1> [[bvec3]], zeroinitializer + // CHECK: [[bres2:%.*]] = and <[[NUM]] x i1> [[val2]], [[val3]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 2 + // CHECK: [[res2:%.*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[adr2]] + res[2] = truth[2] && truth[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]] + // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr4]] + // CHECK: [[bvec4:%.*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr5]] + // CHECK: [[bvec5:%.*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[bres3:%.*]] = select <[[NUM]] x i1> [[bvec3]], <[[NUM]] x i1> [[bvec4]], <[[NUM]] x i1> [[bvec5]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 3 + // CHECK: [[res3:%.*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[adr3]] + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 0 + // CHECK: [[vec0:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]] + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]] + // CHECK: [[bres4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 4 + // CHECK: [[res4:%.*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[adr4]] + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + // CHECK: [[bres5:%.*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 5 + // CHECK: [[res5:%.*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[adr5]] + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]] + // CHECK: [[bres6:%.*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 6 + // CHECK: [[res6:%.*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[adr6]] + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]] + // CHECK: [[bres7:%.*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 7 + // CHECK: [[res7:%.*]] = zext <[[NUM]] x i1> [[bres7]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[adr7]] + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4 + // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // CHECK: [[bres8:%.*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 8 + // CHECK: [[res8:%.*]] = zext <[[NUM]] x i1> [[bres8]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[adr8]] + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5 + // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 6 + // CHECK: [[vec6:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]] + // CHECK: [[bres9:%.*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 9 + // CHECK: [[res9:%.*]] = zext <[[NUM]] x i1> [[bres9]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[adr9]] + res[9] = consequences[5] >= consequences[6]; + + // Memcpy res into return value. + // CHECK: [[retptr:%.*]] = bitcast [10 x <[[NUM]] x i32>]* %agg.result to i8* + // CHECK: [[resptr:%.*]] = bitcast [10 x <[[NUM]] x i32>]* %res to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]] + // CHECK: ret void + return res; +} + +static const int Ix = 2; + +// Test indexing operators +// CHECK-LABEL: define void @"\01?index +export vector index(vector things[10], int i)[10] { + // CHECK: [[res:%.*]] = alloca [10 x <[[NUM]] x [[TYPE]]>] + // CHECK: store i32 %i, i32* [[iadd:%.[0-9]*]] + vector res[10]; + + // CHECK: [[res0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[res0]] + res[0] = 0; + + // CHECK: [[i:%.*]] = load i32, i32* [[iadd]] + // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 [[i]] + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(1|1\.0*e\+0*|0xH3C00).*}}, <[[NUM]] x [[TYPE]]>* [[adri]] + res[i] = 1; + + // CHECK: [[res2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(2|2\.0*e\+0*|0xH4000).*}}, <[[NUM]] x [[TYPE]]>* [[res2]] + res[Ix] = 2; + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: [[thg0:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]] + // CHECK: [[res3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[thg0]], <[[NUM]] x [[TYPE]]>* [[res3]] + res[3] = things[0]; + + // CHECK: [[i:%.*]] = load i32, i32* [[iadd]] + // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 [[i]] + // CHECK: [[thgi:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adri]] + // CHECK: [[res4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[thgi]], <[[NUM]] x [[TYPE]]>* [[res4]] + res[4] = things[i]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[thg2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]] + // CHECK: [[res5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[thg2]], <[[NUM]] x [[TYPE]]>* [[res5]] + res[5] = things[Ix]; + // CHECK: ret void + return res; +} + +// Test bit twiddling operators. +// INT-LABEL: define void @"\01?bittwiddlers +export void bittwiddlers(inout vector things[11]) { + // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 1 + // CHECK: [[ld1:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr1]] + // CHECK: [[res1:%.*]] = xor <[[NUM]] x i32> [[ld1]], ], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[adr0]] + things[0] = ~things[1]; + + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 2 + // CHECK: [[ld2:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr2]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 3 + // CHECK: [[ld3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]] + // CHECK: [[res1:%.*]] = or <[[NUM]] x i32> [[ld2]], [[ld3]] + // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[adr1]] + things[1] = things[2] | things[3]; + + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 3 + // CHECK: [[ld3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 4 + // CHECK: [[ld4:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr4]] + // CHECK: [[res2:%.*]] = and <[[NUM]] x i32> [[ld3]], [[ld4]] + // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 2 + // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[adr2]] + things[2] = things[3] & things[4]; + + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 4 + // CHECK: [[ld4:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr4]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 5 + // CHECK: [[ld5:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr5]] + // CHECK: [[res3:%.*]] = xor <[[NUM]] x i32> [[ld4]], [[ld5]] + // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 3 + // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[adr3]] + things[3] = things[4] ^ things[5]; + + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 5 + // CHECK: [[ld5:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr5]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr6]] + // CHECK: [[shv6:%.*]] = and <[[NUM]] x i32> [[ld6]], [[ld5]], [[shv6]] + // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 4 + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[adr4]] + things[4] = things[5] << things[6]; + + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr6]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 7 + // CHECK: [[ld7:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr7]] + // CHECK: [[shv7:%.*]] = and <[[NUM]] x i32> [[ld7]], [[ld6]], [[shv7]] + // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 5 + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[adr5]] + things[5] = things[6] >> things[7]; + + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 8 + // CHECK: [[ld8:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr8]] + // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 6 + // CHECK: [[ld6:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr6]] + // CHECK: [[res6:%.*]] = or <[[NUM]] x i32> [[ld6]], [[ld8]] + // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[adr6]] + things[6] |= things[8]; + + // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 9 + // CHECK: [[ld9:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr9]] + // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 7 + // CHECK: [[ld7:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr7]] + // CHECK: [[res7:%.*]] = and <[[NUM]] x i32> [[ld7]], [[ld9]] + // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[adr7]] + things[7] &= things[9]; + + // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 10 + // CHECK: [[ld10:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr10]] + // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 8 + // CHECK: [[ld8:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr8]] + // CHECK: [[res8:%.*]] = xor <[[NUM]] x i32> [[ld8]], [[ld10]] + // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[adr8]] + things[8] ^= things[10]; + + // CHECK: ret void +} diff --git a/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll b/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll index 35fd0d6b1d..d5b0bbb2a7 100644 --- a/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll +++ b/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll @@ -30,4 +30,3 @@ entry: declare float @"\01?foo@@YAMY02V?$vector@M$02@@@Z"([3 x <3 x float>]*) attributes #0 = { nounwind } - diff --git a/tools/clang/unittests/HLSL/LinkerTest.cpp b/tools/clang/unittests/HLSL/LinkerTest.cpp index 7cafa0db06..df8bb644e1 100644 --- a/tools/clang/unittests/HLSL/LinkerTest.cpp +++ b/tools/clang/unittests/HLSL/LinkerTest.cpp @@ -526,6 +526,11 @@ TEST_F(LinkerTest, RunLinkMatArrayParam) { Link(L"main", L"ps_6_0", pLinker, {libName, libName2}, {"alloca [24 x float]", "getelementptr [12 x float], [12 x float]*"}, {}); + + Link(L"main", L"ps_6_9", pLinker, {libName, libName2}, + {"alloca [2 x <12 x float>]", + "getelementptr [12 x float], [12 x float]*"}, + {}); } TEST_F(LinkerTest, RunLinkMatParam) { diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index fc4c427580..e32ab1915a 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1184,6 +1184,37 @@ def populate_llvm_instructions(self): self.add_llvm_instr( "OTHER", 53, "VAArg", "VAArgInst", "vaarg instruction", "", [] ) + + self.add_llvm_instr( + "OTHER", + 54, + "ExtractElement", + "ExtractElementInst", + "extracts from vector", + "", + [], + ) + + self.add_llvm_instr( + "OTHER", + 55, + "InsertElement", + "InsertElementInst", + "inserts into vector", + "", + [], + ) + + self.add_llvm_instr( + "OTHER", + 56, + "ShuffleVector", + "ShuffleVectorInst", + "Shuffle two vectors", + "", + [], + ) + self.add_llvm_instr( "OTHER", 57, From d8aad78191b3f179601babc3183fd7c98f50df17 Mon Sep 17 00:00:00 2001 From: Iago Calvo Lista Date: Wed, 26 Mar 2025 14:12:44 +0000 Subject: [PATCH 50/88] Add support for KHR_compute_shader_derivatives (#7249) Add support for KHR_compute_shader_derivatives - DirectxShaderCompiler already supports `NV_compute_shader_derivatives` which is functionality identical to `KHR_compute_shader_derivatives` - The KHR extension will be used by default instead of the NV one following the same approach as the RT extension. - We currently explain this in a comment in `tools/clang/lib/SPIRV/FeatureManager.cpp` `FeatureManager::enabledByDefault`. - Check commit introducing RT for more info 04a84f05a54949d2075daec656a6a4c0c6829c43 Fixes #7179 --- docs/SPIR-V.rst | 1 + .../include/clang/SPIRV/FeatureManager.h | 1 + tools/clang/lib/SPIRV/CapabilityVisitor.cpp | 6 ++++ tools/clang/lib/SPIRV/FeatureManager.cpp | 8 +++++ tools/clang/lib/SPIRV/SpirvEmitter.cpp | 4 +++ .../test/CodeGenSPIRV/ddx.compute.khr.hlsl | 29 +++++++++++++++++++ ...ture.calculate.lod.compute.linear.khr.hlsl | 23 +++++++++++++++ 7 files changed, 72 insertions(+) create mode 100644 tools/clang/test/CodeGenSPIRV/ddx.compute.khr.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/texture.calculate.lod.compute.linear.khr.hlsl diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst index 9a8150a0e8..899b587492 100644 --- a/docs/SPIR-V.rst +++ b/docs/SPIR-V.rst @@ -315,6 +315,7 @@ Supported extensions * SPV_KHR_fragment_shader_barycentric * SPV_KHR_physical_storage_buffer * SPV_KHR_vulkan_memory_model +* SPV_KHR_compute_shader_derivatives * SPV_NV_compute_shader_derivatives * SPV_KHR_maximal_reconvergence * SPV_KHR_float_controls diff --git a/tools/clang/include/clang/SPIRV/FeatureManager.h b/tools/clang/include/clang/SPIRV/FeatureManager.h index 841708d8d5..8a9755ae79 100644 --- a/tools/clang/include/clang/SPIRV/FeatureManager.h +++ b/tools/clang/include/clang/SPIRV/FeatureManager.h @@ -59,6 +59,7 @@ enum class Extension { KHR_physical_storage_buffer, KHR_vulkan_memory_model, NV_compute_shader_derivatives, + KHR_compute_shader_derivatives, KHR_fragment_shader_barycentric, KHR_maximal_reconvergence, KHR_float_controls, diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp index 50a7ab0905..c2b5acff53 100644 --- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp +++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp @@ -852,6 +852,12 @@ bool CapabilityVisitor::visit(SpirvModule *, Visitor::Phase phase) { spv::Capability::FragmentShaderShadingRateInterlockEXT, }); + addExtensionAndCapabilitiesIfEnabled( + Extension::KHR_compute_shader_derivatives, + { + spv::Capability::ComputeDerivativeGroupQuadsKHR, + spv::Capability::ComputeDerivativeGroupLinearKHR, + }); addExtensionAndCapabilitiesIfEnabled( Extension::NV_compute_shader_derivatives, { diff --git a/tools/clang/lib/SPIRV/FeatureManager.cpp b/tools/clang/lib/SPIRV/FeatureManager.cpp index c459f7af0f..a8ee1de000 100644 --- a/tools/clang/lib/SPIRV/FeatureManager.cpp +++ b/tools/clang/lib/SPIRV/FeatureManager.cpp @@ -215,6 +215,8 @@ Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) { .Case("SPV_KHR_physical_storage_buffer", Extension::KHR_physical_storage_buffer) .Case("SPV_KHR_vulkan_memory_model", Extension::KHR_vulkan_memory_model) + .Case("SPV_KHR_compute_shader_derivatives", + Extension::KHR_compute_shader_derivatives) .Case("SPV_NV_compute_shader_derivatives", Extension::NV_compute_shader_derivatives) .Case("SPV_KHR_fragment_shader_barycentric", @@ -283,6 +285,8 @@ const char *FeatureManager::getExtensionName(Extension symbol) { return "SPV_KHR_physical_storage_buffer"; case Extension::KHR_vulkan_memory_model: return "SPV_KHR_vulkan_memory_model"; + case Extension::KHR_compute_shader_derivatives: + return "SPV_KHR_compute_shader_derivatives"; case Extension::NV_compute_shader_derivatives: return "SPV_NV_compute_shader_derivatives"; case Extension::KHR_fragment_shader_barycentric: @@ -370,6 +374,10 @@ bool FeatureManager::enabledByDefault(Extension ext) { // KHR_ray_tracing and NV_ray_tracing are mutually exclusive so enable only // KHR extension by default case Extension::NV_ray_tracing: + return false; + // KHR_compute_shader_derivatives and NV_compute_shader_derivatives are + // mutually exclusive so enable only KHR extension by default + case Extension::NV_compute_shader_derivatives: return false; // Enabling EXT_demote_to_helper_invocation changes the code generation // behavior for the 'discard' statement. Therefore we will only enable it if diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index e1124999ec..04d1a6d556 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -15049,6 +15049,10 @@ void SpirvEmitter::addDerivativeGroupExecutionMode() { // to 2D quad rules. Using derivative operations in any numthreads // configuration not matching either of these is invalid and will produce an // error. + static_assert(spv::ExecutionMode::DerivativeGroupQuadsNV == + spv::ExecutionMode::DerivativeGroupQuadsKHR); + static_assert(spv::ExecutionMode::DerivativeGroupLinearNV == + spv::ExecutionMode::DerivativeGroupLinearKHR); spv::ExecutionMode em = spv::ExecutionMode::DerivativeGroupQuadsNV; if (numThreads[0] % 4 == 0 && numThreads[1] == 1 && numThreads[2] == 1) { em = spv::ExecutionMode::DerivativeGroupLinearNV; diff --git a/tools/clang/test/CodeGenSPIRV/ddx.compute.khr.hlsl b/tools/clang/test/CodeGenSPIRV/ddx.compute.khr.hlsl new file mode 100644 index 0000000000..9e2246e6a5 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/ddx.compute.khr.hlsl @@ -0,0 +1,29 @@ +// RUN: %dxc -T cs_6_6 -E main -fspv-extension=SPV_KHR_compute_shader_derivatives -fcgl %s -spirv 2>&1 | FileCheck %s + +// CHECK: OpCapability ComputeDerivativeGroupQuadsKHR +// CHECK: OpExtension "SPV_KHR_compute_shader_derivatives" +// CHECK: OpExecutionMode %main DerivativeGroupQuadsKHR + + +SamplerState ss : register(s2); +SamplerComparisonState scs; + +RWStructuredBuffer o; +Texture1D t1; + +[numthreads(2,2,1)] +void main(uint3 id : SV_GroupThreadID) +{ + // CHECK: OpDPdx %float %float_0_5 + o[0] = ddx(0.5); + // CHECK: OpDPdxCoarse %float %float_0_5 + o[1] = ddx_coarse(0.5); + // CHECK: OpDPdy %float %float_0_5 + o[2] = ddy(0.5); + // CHECK: OpDPdyCoarse %float %float_0_5 + o[3] = ddy_coarse(0.5); + // CHECK: OpDPdxFine %float %float_0_5 + o[4] = ddx_fine(0.5); + // CHECK: OpDPdyFine %float %float_0_5 + o[5] = ddy_fine(0.5); +} \ No newline at end of file diff --git a/tools/clang/test/CodeGenSPIRV/texture.calculate.lod.compute.linear.khr.hlsl b/tools/clang/test/CodeGenSPIRV/texture.calculate.lod.compute.linear.khr.hlsl new file mode 100644 index 0000000000..23f52ad4b5 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/texture.calculate.lod.compute.linear.khr.hlsl @@ -0,0 +1,23 @@ +// RUN: %dxc -T cs_6_6 -E main -fspv-extension=SPV_KHR_compute_shader_derivatives -fcgl %s -spirv 2>&1 | FileCheck %s --check-prefix=CHECK +// RUN: %dxc -T cs_6_6 -E main -fspv-extension=SPV_KHR_compute_shader_derivatives %s -spirv 2>&1 | FileCheck %s --check-prefix=CHECK + +// CHECK: OpCapability ComputeDerivativeGroupLinearKHR +// CHECK: OpExtension "SPV_KHR_compute_shader_derivatives" +// CHECK: OpExecutionMode %main DerivativeGroupLinearKHR + +SamplerState ss : register(s2); +SamplerComparisonState scs; + +RWStructuredBuffer o; +Texture1D t1; + +[numthreads(16,1,1)] +void main(uint3 id : SV_GroupThreadID) +{ + //CHECK: [[t1:%[0-9]+]] = OpLoad %type_1d_image %t1 + //CHECK-NEXT: [[ss1:%[0-9]+]] = OpLoad %type_sampler %ss + //CHECK-NEXT: [[si1:%[0-9]+]] = OpSampledImage %type_sampled_image [[t1]] [[ss1]] + //CHECK-NEXT: [[query1:%[0-9]+]] = OpImageQueryLod %v2float [[si1]] %float_0_5 + //CHECK-NEXT: {{%[0-9]+}} = OpCompositeExtract %float [[query1]] 0 + o[0] = t1.CalculateLevelOfDetail(ss, 0.5); +} From 31a2f581a9eb48e295c20df9be334981f8951b1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Thu, 27 Mar 2025 18:31:34 +0100 Subject: [PATCH 51/88] [SPIR-V] Fix usage of indices in subfunctions (#7242) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parameters tagged with indices are linked to a builtin. Because their layout is different between HLSL and SPIR-V, there is a common mechanism to handle those 'stage I/O variables'. Usually, a local variable with the correct HLSL layout is created, and when required, the value is copied in and copied out in the entrypoint wrapper. Then, a function-scoped pointer is passed to sub-functions. The issue is that `indices` marks an array which is also shared across invocations. Meaning we cannot simple copy-in/copy-out. We are only allowed to write to the indices touched by the shader. This required pushing the handling to the assignment expression handling: when a value is assigned to such builtin, the layout transformation is done, and the builtin written to. Issue was how to find back the Builtin from an assignment: the code assumed the ParmDecl of the entrypoint was the only way to access this variable, but nothing prevents the user to pass this indice array to another function. The simple solution is to move this out of the generic map, and have a new field which stored the SpirvVariable we created, and allow any HLSL function to access this as soon as the HLSLIndices attribute is found. Fixes #7009 --------- Signed-off-by: Nathan Gauër --- tools/clang/lib/SPIRV/DeclResultIdMapper.cpp | 4 +- tools/clang/lib/SPIRV/DeclResultIdMapper.h | 24 +++++++ tools/clang/lib/SPIRV/SpirvEmitter.cpp | 27 +++++--- .../meshshading.nv.triangle.indices.out.hlsl | 65 +++++++++++++++++++ 4 files changed, 108 insertions(+), 12 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/meshshading.nv.triangle.indices.out.hlsl diff --git a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp index fd0fa8a3d0..0358873589 100644 --- a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp +++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp @@ -860,7 +860,7 @@ bool DeclResultIdMapper::createStageOutputVar(const DeclaratorDecl *decl, QualType arrayType = astContext.getConstantArrayType( type, llvm::APInt(32, arraySize), clang::ArrayType::Normal, 0); - stageVarInstructions[cast(decl)] = + msOutIndicesBuiltin = getBuiltinVar(builtinID, arrayType, decl->getLocation()); } else { // For NV_mesh_shader, the built type is PrimitiveIndicesNV @@ -871,7 +871,7 @@ bool DeclResultIdMapper::createStageOutputVar(const DeclaratorDecl *decl, astContext.UnsignedIntTy, llvm::APInt(32, arraySize), clang::ArrayType::Normal, 0); - stageVarInstructions[cast(decl)] = + msOutIndicesBuiltin = getBuiltinVar(builtinID, arrayType, decl->getLocation()); } diff --git a/tools/clang/lib/SPIRV/DeclResultIdMapper.h b/tools/clang/lib/SPIRV/DeclResultIdMapper.h index 80723393ce..6ac17fde9d 100644 --- a/tools/clang/lib/SPIRV/DeclResultIdMapper.h +++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.h @@ -559,6 +559,11 @@ class DeclResultIdMapper { return value; } + SpirvVariable *getMSOutIndicesBuiltin() { + assert(msOutIndicesBuiltin && "Variable usage before decl parsing."); + return msOutIndicesBuiltin; + } + /// Decorate with spirv intrinsic attributes with lamda function variable /// check void decorateWithIntrinsicAttrs( @@ -1014,6 +1019,25 @@ class DeclResultIdMapper { /// creating that stage variable, so that we don't need to query them again /// for reading and writing. llvm::DenseMap stageVarInstructions; + + /// Special case for the Indices builtin: + /// - this builtin has a different layout in HLSL & SPIR-V, meaning it + /// requires + /// the same kind of handling as classic stageVarInstructions: + /// -> load into a HLSL compatible tmp + /// -> write back into the SPIR-V compatible layout. + /// - but the builtin is shared across invocations (not only lanes). + /// -> we must only write/read from the indices requested by the user. + /// - the variable can be passed to other functions as a out param + /// -> we cannot copy-in/copy-out because shared across invocations. + /// -> we cannot pass a simple pointer: layout differences between + /// HLSL/SPIR-V. + /// + /// All this means we must keep track of the builtin, and each assignment to + /// this will have to handle the layout differences. The easiest solution is + /// to keep this builtin global to the module if present. + SpirvVariable *msOutIndicesBuiltin = nullptr; + /// Vector of all defined resource variables. llvm::SmallVector resourceVars; /// Mapping from {RW|Append|Consume}StructuredBuffers to their diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 04d1a6d556..579af04ea6 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -8133,17 +8133,21 @@ void SpirvEmitter::assignToMSOutIndices( if (indices.size() > 1) { vecComponent = indices.back(); } - auto *var = declIdMapper.getStageVarInstruction(decl); - const auto *varTypeDecl = astContext.getAsConstantArrayType(decl->getType()); - QualType varType = varTypeDecl->getElementType(); + SpirvVariable *var = declIdMapper.getMSOutIndicesBuiltin(); + uint32_t numVertices = 1; - if (!isVectorType(varType, nullptr, &numVertices)) { - assert(isScalarType(varType)); - } - QualType valueType = value->getAstResultType(); uint32_t numValues = 1; - if (!isVectorType(valueType, nullptr, &numValues)) { - assert(isScalarType(valueType)); + { + const auto *varTypeDecl = + astContext.getAsConstantArrayType(decl->getType()); + QualType varType = varTypeDecl->getElementType(); + if (!isVectorType(varType, nullptr, &numVertices)) { + assert(isScalarType(varType)); + } + QualType valueType = value->getAstResultType(); + if (!isVectorType(valueType, nullptr, &numValues)) { + assert(isScalarType(valueType)); + } } const auto loc = decl->getLocation(); @@ -8190,7 +8194,10 @@ void SpirvEmitter::assignToMSOutIndices( assert(numValues == numVertices); if (extMesh) { // create accesschain for Primitive*IndicesEXT[vertIndex]. - auto *ptr = spvBuilder.createAccessChain(varType, var, vertIndex, loc); + const ConstantArrayType *CAT = + astContext.getAsConstantArrayType(var->getAstResultType()); + auto *ptr = spvBuilder.createAccessChain(CAT->getElementType(), var, + vertIndex, loc); // finally create store for Primitive*IndicesEXT[vertIndex] = value. spvBuilder.createStore(ptr, value, loc); } else { diff --git a/tools/clang/test/CodeGenSPIRV/meshshading.nv.triangle.indices.out.hlsl b/tools/clang/test/CodeGenSPIRV/meshshading.nv.triangle.indices.out.hlsl new file mode 100644 index 0000000000..05d9d8fb1c --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/meshshading.nv.triangle.indices.out.hlsl @@ -0,0 +1,65 @@ +// RUN: %dxc -T ms_6_5 -E outie -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T ms_6_5 -E innie -fcgl %s -spirv | FileCheck %s + +// CHECK-DAG: [[v4_n05_05_0_1:%[0-9]+]] = OpConstantComposite %v4float %float_n0_5 %float_0_5 %float_0 %float_1 +// CHECK-DAG: [[v4_05_05_0_1:%[0-9]+]] = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0 %float_1 +// CHECK-DAG: [[v4_0_n05_0_1:%[0-9]+]] = OpConstantComposite %v4float %float_0 %float_n0_5 %float_0 %float_1 +// CHECK-DAG: [[v3_1_0_0:%[0-9]+]] = OpConstantComposite %v3float %float_1 %float_0 %float_0 +// CHECK-DAG: [[v3_0_1_0:%[0-9]+]] = OpConstantComposite %v3float %float_0 %float_1 %float_0 +// CHECK-DAG: [[v3_0_0_1:%[0-9]+]] = OpConstantComposite %v3float %float_0 %float_0 %float_1 +// CHECK-DAG: [[u3_0_1_2:%[0-9]+]] = OpConstantComposite %v3uint %uint_0 %uint_1 %uint_2 + +// CHECK-DAG: OpDecorate [[indices:%[0-9]+]] BuiltIn PrimitiveIndicesNV + +struct MeshOutput { + float4 position : SV_Position; + float3 color : COLOR0; +}; + +[outputtopology("triangle")] +[numthreads(1, 1, 1)] +void innie(out indices uint3 triangles[1], out vertices MeshOutput verts[3]) { + SetMeshOutputCounts(3, 2); + + triangles[0] = uint3(0, 1, 2); +// CHECK: [[off:%[0-9]+]] = OpIMul %uint %uint_0 %uint_3 +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_uint [[indices]] [[off]] +// CHECK: [[tmp:%[0-9]+]] = OpCompositeExtract %uint [[u3_0_1_2]] 0 +// CHECK: OpStore [[ptr]] [[tmp]] +// CHECK: [[idx:%[0-9]+]] = OpIAdd %uint [[off]] %uint_1 +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_uint [[indices]] [[idx]] +// CHECK: [[tmp:%[0-9]+]] = OpCompositeExtract %uint [[u3_0_1_2]] 1 +// CHECK: OpStore [[ptr]] [[tmp]] +// CHECK: [[idx:%[0-9]+]] = OpIAdd %uint [[off]] %uint_2 +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_uint [[indices]] [[idx]] +// CHECK: [[tmp:%[0-9]+]] = OpCompositeExtract %uint [[u3_0_1_2]] 2 +// CHECK: OpStore [[ptr]] [[tmp]] + + verts[0].position = float4(-0.5, 0.5, 0.0, 1.0); +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v4float %gl_Position %int_0 +// CHECK: OpStore [[ptr]] [[v4_n05_05_0_1]] + verts[0].color = float3(1.0, 0.0, 0.0); +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v3float %out_var_COLOR0 %int_0 +// CHECK: OpStore [[ptr]] [[v3_1_0_0]] + + verts[1].position = float4(0.5, 0.5, 0.0, 1.0); +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v4float %gl_Position %int_1 +// CHECK: OpStore [[ptr]] [[v4_05_05_0_1]] + verts[1].color = float3(0.0, 1.0, 0.0); +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v3float %out_var_COLOR0 %int_1 +// CHECK: OpStore [[ptr]] [[v3_0_1_0]] + + verts[2].position = float4(0.0, -0.5, 0.0, 1.0); +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v4float %gl_Position %int_2 +// CHECK: OpStore [[ptr]] [[v4_0_n05_0_1]] + verts[2].color = float3(0.0, 0.0, 1.0); +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v3float %out_var_COLOR0 %int_2 +// CHECK: OpStore [[ptr]] [[v3_0_0_1]] + +} + +[outputtopology("triangle")] +[numthreads(1, 1, 1)] +void outie(out indices uint3 triangles[1], out vertices MeshOutput verts[3]) { + innie(triangles, verts); +} From 0fa207a4cd537f6a47d0570993a5cc4e43482042 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Thu, 27 Mar 2025 11:09:03 -0700 Subject: [PATCH 52/88] Update DXIL.rst (#7254) Minor grammar fixes --- docs/DXIL.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/DXIL.rst b/docs/DXIL.rst index c3baf4e454..a68e31d0a9 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -225,10 +225,10 @@ DXIL uses 32-bit pointers in its representation. Out-of-bounds behavior ---------------------- -Indexable thread-local accesses are done via LLVM pointer and have C-like OOB semantics. -Groupshared accesses are done via LLVM pointer too. The origin of a groupshared pointer must be a single TGSM allocation. -If a groupshared pointer uses in-bound GEP instruction, it should not OOB. The behavior for an OOB access for in-bound pointer is undefined. -For groupshared pointer from regular GEP, OOB will has same behavior as DXBC. Loads return 0 for OOB accesses; OOB stores are silently dropped. +Indexable thread-local accesses are done via LLVM pointers and have C-like OOB semantics. +Groupshared accesses are done via LLVM pointers too. The origin of a groupshared pointer must be a single TGSM allocation. +If a groupshared pointer uses an in-bound GEP instruction, it should not OOB. The behavior for an OOB access for in-bound pointer is undefined. +For a groupshared pointer from regular GEP, OOB will have the same behavior as DXBC. Loads return 0 for OOB accesses; OOB stores are silently dropped. Resource accesses keeps the same out-of-bounds behavior as DXBC. Loads return 0 for OOB accesses; OOB stores are silently dropped. @@ -3294,9 +3294,9 @@ Modules and Linking =================== HLSL has linking capabilities to enable third-party libraries. The linking step happens before shader DXIL is given to the driver compilers. -Experimental library generation is added in DXIL1.1. A library could be created by compile with lib_6_1 profile. -A library is a dxil container like the compile result of other shader profiles. The difference is library will keep information for linking like resource link info and entry function signatures. -Library support is not part of DXIL spec. Only requirement is linked shader must be valid DXIL. +Experimental library generation is added in DXIL1.1. A library could be created by compiling with the lib_6_1 profile. +A library is a dxil container like the compile result of other shader profiles. The difference is a library will keep information for linking like resource link info and entry function signatures. +Library support is not part of the DXIL spec. The only requirement is that the linked shader must be valid DXIL. Additional Notes From b7b532b145b7d40a2b4e44104f60040a97f5a13b Mon Sep 17 00:00:00 2001 From: Cassandra Beckley Date: Thu, 27 Mar 2025 11:58:44 -0700 Subject: [PATCH 53/88] [SPIR-V] Update submodules (#7269) spirv-val has added a validation that enforces a minimum version of SPIR-V 1.3 when using VulkanMemoryModel, so I've updated the tests that use it to use the correct target environment. Needed for #7266. --- external/SPIRV-Headers | 2 +- external/SPIRV-Tools | 2 +- tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl | 2 +- .../test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/external/SPIRV-Headers b/external/SPIRV-Headers index 54a521dd13..0e71067798 160000 --- a/external/SPIRV-Headers +++ b/external/SPIRV-Headers @@ -1 +1 @@ -Subproject commit 54a521dd130ae1b2f38fef79b09515702d135bdd +Subproject commit 0e710677989b4326ac974fd80c5308191ed80965 diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools index ada1771a9f..393d5c7df1 160000 --- a/external/SPIRV-Tools +++ b/external/SPIRV-Tools @@ -1 +1 @@ -Subproject commit ada1771a9f7a125573aa94fe551fdc44b45769bd +Subproject commit 393d5c7df150532045c50affffea2df22e8231b0 diff --git a/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl b/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl index a8578f7377..5815981057 100644 --- a/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl +++ b/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl @@ -1,5 +1,5 @@ // RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s -check-prefix=GLSL450 -// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model %s -spirv | FileCheck %s -check-prefix=VULKAN +// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model -fspv-target-env=vulkan1.1 %s -spirv | FileCheck %s -check-prefix=VULKAN // When the GLSL450 memory model is used, there should be no memory operands on the loads and stores. // When the Vulkan memory model is used, there should be no decorations. There should be memory operands on the loads and stores instead. diff --git a/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl b/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl index e9a1813f31..a0b2ab7207 100644 --- a/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl +++ b/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl @@ -1,5 +1,5 @@ // RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s -check-prefix=CHECK -check-prefix=GLSL450 -// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model %s -spirv | FileCheck %s -check-prefix=CHECK -check-prefix=VULKAN +// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model -fspv-target-env=vulkan1.1 %s -spirv | FileCheck %s -check-prefix=CHECK -check-prefix=VULKAN RWTexture1D g_tTex1di1; RWTexture1D g_tTex1du1; From eb169591adbc1403f09fb769d5c8f98e929e6f62 Mon Sep 17 00:00:00 2001 From: raoanag <127366241+raoanag@users.noreply.github.com> Date: Thu, 27 Mar 2025 12:07:17 -0700 Subject: [PATCH 54/88] Update print statements to be compatible with Python 3 (#7268) Update the print statements to be compatible with Python 3 --- tools/clang/utils/check_cfc/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/clang/utils/check_cfc/setup.py b/tools/clang/utils/check_cfc/setup.py index b5fc473639..7405513f0a 100644 --- a/tools/clang/utils/check_cfc/setup.py +++ b/tools/clang/utils/check_cfc/setup.py @@ -8,10 +8,10 @@ import platform import sys if platform.system() == 'Windows': - print "Could not find py2exe. Please install then run setup.py py2exe." + print("Could not find py2exe. Please install then run setup.py py2exe.") raise else: - print "setup.py only required on Windows." + print("setup.py only required on Windows.") sys.exit(1) setup( From 5ff9cbc7cb83ab2d5f52255412f61ac3226c4a08 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 27 Mar 2025 16:40:51 -0700 Subject: [PATCH 55/88] [Sema] Add and test new Subobject Attribute (#7258) This PR adds and tests a new subobject attribute. It will be useful for checking if a given decl is a subobject decl. This functionality will be used in https://github.com/microsoft/DirectXShaderCompiler/pull/7239 We need an attribute in order to determine whether to check its initializer for availability attributes or not. Fixes https://github.com/microsoft/DirectXShaderCompiler/issues/7257 --- tools/clang/include/clang/Basic/Attr.td | 7 + tools/clang/lib/AST/HlslTypes.cpp | 64 ++------- tools/clang/lib/Sema/SemaHLSL.cpp | 39 +++-- .../test/SemaHLSL/subobjects-ast-dump.hlsl | 136 ++++++++++++++++++ 4 files changed, 181 insertions(+), 65 deletions(-) create mode 100644 tools/clang/test/SemaHLSL/subobjects-ast-dump.hlsl diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index 48193f7077..7a009aa7e1 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -1157,6 +1157,13 @@ def HLSLRayQueryObject : InheritableAttr { let Documentation = [Undocumented]; } +def HLSLSubObject : InheritableAttr { + let Spellings = []; // No spellings! + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; + let Args = [UnsignedArgument<"SubObjKindUint">, UnsignedArgument<"HitGroupType">]; +} + // HLSL HitObject Attribute def HLSLHitObject : InheritableAttr { diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index 8f9460ce63..eaf8273413 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -684,64 +684,20 @@ bool DoesTypeDefineOverloadedOperator(clang::QualType typeWithOperator, bool GetHLSLSubobjectKind(clang::QualType type, DXIL::SubobjectKind &subobjectKind, DXIL::HitGroupType &hgType) { - hgType = (DXIL::HitGroupType)(-1); type = type.getCanonicalType(); if (const RecordType *RT = type->getAs()) { - StringRef name = RT->getDecl()->getName(); - switch (name.size()) { - case 17: - return name == "StateObjectConfig" - ? (subobjectKind = DXIL::SubobjectKind::StateObjectConfig, - true) - : false; - case 18: - return name == "LocalRootSignature" - ? (subobjectKind = DXIL::SubobjectKind::LocalRootSignature, - true) - : false; - case 19: - return name == "GlobalRootSignature" - ? (subobjectKind = DXIL::SubobjectKind::GlobalRootSignature, - true) - : false; - case 29: - return name == "SubobjectToExportsAssociation" - ? (subobjectKind = - DXIL::SubobjectKind::SubobjectToExportsAssociation, - true) - : false; - case 22: - return name == "RaytracingShaderConfig" - ? (subobjectKind = DXIL::SubobjectKind::RaytracingShaderConfig, - true) - : false; - case 24: - return name == "RaytracingPipelineConfig" - ? (subobjectKind = - DXIL::SubobjectKind::RaytracingPipelineConfig, - true) - : false; - case 25: - return name == "RaytracingPipelineConfig1" - ? (subobjectKind = - DXIL::SubobjectKind::RaytracingPipelineConfig1, - true) - : false; - case 16: - if (name == "TriangleHitGroup") { - subobjectKind = DXIL::SubobjectKind::HitGroup; - hgType = DXIL::HitGroupType::Triangle; - return true; - } - return false; - case 27: - if (name == "ProceduralPrimitiveHitGroup") { - subobjectKind = DXIL::SubobjectKind::HitGroup; - hgType = DXIL::HitGroupType::ProceduralPrimitive; - return true; - } + RecordDecl *RD = RT->getDecl(); + if (!RD->hasAttr()) { return false; } + + HLSLSubObjectAttr *Attr = RD->getAttr(); + subobjectKind = static_cast(Attr->getSubObjKindUint()); + hgType = static_cast(Attr->getHitGroupType()); + if (subobjectKind == DXIL::SubobjectKind::HitGroup) + DXASSERT(DXIL::IsValidHitGroupType(hgType), "invalid hit group type"); + + return true; } return false; } diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 243471bc55..d20daa0ac0 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -2785,13 +2785,17 @@ AddBuiltInTriangleIntersectionAttributes(ASTContext &context, // // Subobjects -static CXXRecordDecl *StartSubobjectDecl(ASTContext &context, - const char *name) { +static CXXRecordDecl * +StartSubobjectDecl(ASTContext &context, const char *name, + DXIL::SubobjectKind Kind, + DXIL::HitGroupType HGT = DXIL::HitGroupType::LastEntry) { IdentifierInfo &id = context.Idents.get(StringRef(name), tok::TokenKind::identifier); CXXRecordDecl *decl = CXXRecordDecl::Create( context, TagTypeKind::TTK_Struct, context.getTranslationUnitDecl(), NoLoc, NoLoc, &id, nullptr, DelayTypeCreationTrue); + decl->addAttr(HLSLSubObjectAttr::CreateImplicit( + context, static_cast(Kind), static_cast(HGT))); decl->addAttr(FinalAttr::CreateImplicit(context, FinalAttr::Keyword_final)); decl->startDefinition(); return decl; @@ -2808,7 +2812,8 @@ void FinishSubobjectDecl(ASTContext &context, CXXRecordDecl *decl) { // uint32_t Flags; // }; static CXXRecordDecl *CreateSubobjectStateObjectConfig(ASTContext &context) { - CXXRecordDecl *decl = StartSubobjectDecl(context, "StateObjectConfig"); + CXXRecordDecl *decl = StartSubobjectDecl( + context, "StateObjectConfig", DXIL::SubobjectKind::StateObjectConfig); CreateSimpleField(context, decl, "Flags", context.UnsignedIntTy, AccessSpecifier::AS_private); FinishSubobjectDecl(context, decl); @@ -2822,7 +2827,10 @@ static CXXRecordDecl *CreateSubobjectStateObjectConfig(ASTContext &context) { static CXXRecordDecl *CreateSubobjectRootSignature(ASTContext &context, bool global) { CXXRecordDecl *decl = StartSubobjectDecl( - context, global ? "GlobalRootSignature" : "LocalRootSignature"); + context, global ? "GlobalRootSignature" : "LocalRootSignature", + global ? DXIL::SubobjectKind::GlobalRootSignature + : DXIL::SubobjectKind::LocalRootSignature); + CreateSimpleField(context, decl, "Data", context.HLSLStringTy, AccessSpecifier::AS_private); FinishSubobjectDecl(context, decl); @@ -2837,7 +2845,8 @@ static CXXRecordDecl *CreateSubobjectRootSignature(ASTContext &context, static CXXRecordDecl * CreateSubobjectSubobjectToExportsAssoc(ASTContext &context) { CXXRecordDecl *decl = - StartSubobjectDecl(context, "SubobjectToExportsAssociation"); + StartSubobjectDecl(context, "SubobjectToExportsAssociation", + DXIL::SubobjectKind::SubobjectToExportsAssociation); CreateSimpleField(context, decl, "Subobject", context.HLSLStringTy, AccessSpecifier::AS_private); CreateSimpleField(context, decl, "Exports", context.HLSLStringTy, @@ -2853,7 +2862,9 @@ CreateSubobjectSubobjectToExportsAssoc(ASTContext &context) { // }; static CXXRecordDecl * CreateSubobjectRaytracingShaderConfig(ASTContext &context) { - CXXRecordDecl *decl = StartSubobjectDecl(context, "RaytracingShaderConfig"); + CXXRecordDecl *decl = + StartSubobjectDecl(context, "RaytracingShaderConfig", + DXIL::SubobjectKind::RaytracingShaderConfig); CreateSimpleField(context, decl, "MaxPayloadSizeInBytes", context.UnsignedIntTy, AccessSpecifier::AS_private); CreateSimpleField(context, decl, "MaxAttributeSizeInBytes", @@ -2868,7 +2879,9 @@ CreateSubobjectRaytracingShaderConfig(ASTContext &context) { // }; static CXXRecordDecl * CreateSubobjectRaytracingPipelineConfig(ASTContext &context) { - CXXRecordDecl *decl = StartSubobjectDecl(context, "RaytracingPipelineConfig"); + CXXRecordDecl *decl = + StartSubobjectDecl(context, "RaytracingPipelineConfig", + DXIL::SubobjectKind::RaytracingPipelineConfig); CreateSimpleField(context, decl, "MaxTraceRecursionDepth", context.UnsignedIntTy, AccessSpecifier::AS_private); FinishSubobjectDecl(context, decl); @@ -2883,7 +2896,8 @@ CreateSubobjectRaytracingPipelineConfig(ASTContext &context) { static CXXRecordDecl * CreateSubobjectRaytracingPipelineConfig1(ASTContext &context) { CXXRecordDecl *decl = - StartSubobjectDecl(context, "RaytracingPipelineConfig1"); + StartSubobjectDecl(context, "RaytracingPipelineConfig1", + DXIL::SubobjectKind::RaytracingPipelineConfig1); CreateSimpleField(context, decl, "MaxTraceRecursionDepth", context.UnsignedIntTy, AccessSpecifier::AS_private); CreateSimpleField(context, decl, "Flags", context.UnsignedIntTy, @@ -2898,7 +2912,9 @@ CreateSubobjectRaytracingPipelineConfig1(ASTContext &context) { // string ClosestHit; // }; static CXXRecordDecl *CreateSubobjectTriangleHitGroup(ASTContext &context) { - CXXRecordDecl *decl = StartSubobjectDecl(context, "TriangleHitGroup"); + CXXRecordDecl *decl = StartSubobjectDecl(context, "TriangleHitGroup", + DXIL::SubobjectKind::HitGroup, + DXIL::HitGroupType::Triangle); CreateSimpleField(context, decl, "AnyHit", context.HLSLStringTy, AccessSpecifier::AS_private); CreateSimpleField(context, decl, "ClosestHit", context.HLSLStringTy, @@ -2915,8 +2931,9 @@ static CXXRecordDecl *CreateSubobjectTriangleHitGroup(ASTContext &context) { // }; static CXXRecordDecl * CreateSubobjectProceduralPrimitiveHitGroup(ASTContext &context) { - CXXRecordDecl *decl = - StartSubobjectDecl(context, "ProceduralPrimitiveHitGroup"); + CXXRecordDecl *decl = StartSubobjectDecl( + context, "ProceduralPrimitiveHitGroup", DXIL::SubobjectKind::HitGroup, + DXIL::HitGroupType::ProceduralPrimitive); CreateSimpleField(context, decl, "AnyHit", context.HLSLStringTy, AccessSpecifier::AS_private); CreateSimpleField(context, decl, "ClosestHit", context.HLSLStringTy, diff --git a/tools/clang/test/SemaHLSL/subobjects-ast-dump.hlsl b/tools/clang/test/SemaHLSL/subobjects-ast-dump.hlsl new file mode 100644 index 0000000000..6133847fb8 --- /dev/null +++ b/tools/clang/test/SemaHLSL/subobjects-ast-dump.hlsl @@ -0,0 +1,136 @@ +// RUN: %dxc -T lib_6_9 -ast-dump-implicit %s | FileCheck -check-prefix=ASTIMPL %s +// RUN: %dxc -T lib_6_9 -ast-dump %s | FileCheck -check-prefix=AST %s +// The HLSL source is just a copy of +// tools\clang\test\HLSLFileCheck\shader_targets\raytracing\subobjects_raytracingPipelineConfig1.hlsl + +// This test tests that the HLSLSubObjectAttr attribute is present on all +// HLSL subobjects, and tests the ast representation of subobjects + +// ASTIMPL: CXXRecordDecl 0x{{.+}} <> implicit referenced struct StateObjectConfig definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 0 2 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit Flags 'unsigned int' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit referenced struct GlobalRootSignature definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 1 2 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit Data 'string' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit referenced struct LocalRootSignature definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 2 2 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit Data 'string' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit referenced struct SubobjectToExportsAssociation definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 8 2 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit Subobject 'string' +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit Exports 'string' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit referenced struct RaytracingShaderConfig definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 9 2 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit MaxPayloadSizeInBytes 'unsigned int' +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit MaxAttributeSizeInBytes 'unsigned int' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit struct RaytracingPipelineConfig definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 10 2 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit MaxTraceRecursionDepth 'unsigned int' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit referenced struct TriangleHitGroup definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 11 0 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit AnyHit 'string' +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit ClosestHit 'string' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit referenced struct ProceduralPrimitiveHitGroup definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 11 1 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit AnyHit 'string' +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit ClosestHit 'string' +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit Intersection 'string' +// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <> implicit referenced struct RaytracingPipelineConfig1 definition +// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <> Implicit 12 2 +// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <> Implicit final +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit MaxTraceRecursionDepth 'unsigned int' +// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <> implicit Flags 'unsigned int' + +// AST: VarDecl 0x{{.+}} grs 'GlobalRootSignature' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'GlobalRootSignature' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "CBV(b0)" +// AST-NEXT: VarDecl 0x{{.+}} soc 'StateObjectConfig' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'StateObjectConfig' +// AST-NEXT: BinaryOperator 0x{{.+}} 'unsigned int' '|' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'STATE_OBJECT_FLAGS_ALLOW_LOCAL_DEPENDENCIES_ON_EXTERNAL_DEFINITONS' 'const unsigned int' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS' 'const unsigned int' +// AST-NEXT: VarDecl 0x{{.+}} lrs 'LocalRootSignature' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'LocalRootSignature' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "UAV(u0, visibility = SHADER_VISIBILITY_GEOMETRY), RootFlags(LOCAL_ROOT_SIGNATURE)" +// AST-NEXT: VarDecl 0x{{.+}} sea 'SubobjectToExportsAssociation' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "grs" +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "a;b;foo;c" +// AST-NEXT: VarDecl 0x{{.+}} sea2 'SubobjectToExportsAssociation' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "grs" +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue ";" +// AST-NEXT: VarDecl 0x{{.+}} sea3 'SubobjectToExportsAssociation' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "grs" +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "" +// AST-NEXT: VarDecl 0x{{.+}} rsc 'RaytracingShaderConfig' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingShaderConfig' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 128 +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 64 +// AST-NEXT: VarDecl 0x{{.+}} rpc 'RaytracingPipelineConfig1' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingPipelineConfig1' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 32 +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'RAYTRACING_PIPELINE_FLAG_SKIP_TRIANGLES' 'const unsigned int' +// AST-NEXT: VarDecl 0x{{.+}} sea4 'SubobjectToExportsAssociation' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "rpc" +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue ";" +// AST-NEXT: VarDecl 0x{{.+}} rpc2 'RaytracingPipelineConfig1' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingPipelineConfig1' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 32 +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'RAYTRACING_PIPELINE_FLAG_NONE' 'const unsigned int' +// AST-NEXT: VarDecl 0x{{.+}} trHitGt 'TriangleHitGroup' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'TriangleHitGroup' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "a" +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "b" +// AST-NEXT: VarDecl 0x{{.+}} ppHitGt 'ProceduralPrimitiveHitGroup' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'ProceduralPrimitiveHitGroup' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "a" +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "b" +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' +// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "c" + +GlobalRootSignature grs = {"CBV(b0)"}; +StateObjectConfig soc = { STATE_OBJECT_FLAGS_ALLOW_LOCAL_DEPENDENCIES_ON_EXTERNAL_DEFINITONS | STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS }; +LocalRootSignature lrs = {"UAV(u0, visibility = SHADER_VISIBILITY_GEOMETRY), RootFlags(LOCAL_ROOT_SIGNATURE)"}; +SubobjectToExportsAssociation sea = { "grs", "a;b;foo;c" }; +// Empty association is well-defined: it creates a default association +SubobjectToExportsAssociation sea2 = { "grs", ";" }; +SubobjectToExportsAssociation sea3 = { "grs", "" }; +RaytracingShaderConfig rsc = { 128, 64 }; +RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_SKIP_TRIANGLES }; +SubobjectToExportsAssociation sea4 = {"rpc", ";"}; +RaytracingPipelineConfig1 rpc2 = {32, RAYTRACING_PIPELINE_FLAG_NONE }; +TriangleHitGroup trHitGt = {"a", "b"}; +ProceduralPrimitiveHitGroup ppHitGt = { "a", "b", "c"}; From 206b77577d15fc5798eb7ad52290388539b7146d Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 28 Mar 2025 15:37:35 -0700 Subject: [PATCH 56/88] [OMM] Add D3D Flag RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS, and run d3dreflect tests (#7239) This PR adds a new flag, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS, according to the spec. It can be used with `RaytracingPipelineConfig1` subobjects. We expect this new flag to be represented in the output. Additionally, d3dreflect tests are run to ensure that when a rayquery object is using the new OMM enablement flags, that the minimum shader model target is 6.9. Fixes https://github.com/microsoft/DirectXShaderCompiler/issues/7190 --- tools/clang/lib/AST/ASTContextHLSL.cpp | 4 +++ tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp | 32 +++++++++++++++---- .../d3dreflect/raytracingpipelineconfig1.hlsl | 19 +++++++++++ .../raytracingpipelineconfig1-warnings.hlsl | 6 ++++ .../tools/dxcompiler/dxcdisassembler.cpp | 2 ++ 5 files changed, 56 insertions(+), 7 deletions(-) create mode 100644 tools/clang/test/HLSLFileCheck/d3dreflect/raytracingpipelineconfig1.hlsl create mode 100644 tools/clang/test/SemaHLSL/raytracingpipelineconfig1-warnings.hlsl diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index dcd3e89e9a..3748f8f8f8 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -699,6 +699,10 @@ void hlsl::AddRaytracingConstants(ASTContext &context) { AddConstUInt( context, StringRef("RAYTRACING_PIPELINE_FLAG_SKIP_PROCEDURAL_PRIMITIVES"), (unsigned)DXIL::RaytracingPipelineFlags::SkipProceduralPrimitives); + AddConstUInt(context, context.getTranslationUnitDecl(), + StringRef("RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS"), + (unsigned)DXIL::RaytracingPipelineFlags::AllowOpacityMicromaps, + ConstructAvailabilityAttribute(context, VT69)); } /// Adds all constants and enums for sampler feedback diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index ed727af149..c562ee8d52 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -301,7 +301,8 @@ std::vector GetAllExportedFDecls(clang::Sema *self) { } void GatherGlobalsWithInitializers( - DeclContext *DC, llvm::SmallVectorImpl &GlobalsWithInit) { + DeclContext *DC, llvm::SmallVectorImpl &GlobalsWithInit, + llvm::SmallVectorImpl &SubObjects) { for (auto *D : DC->decls()) { // Skip built-ins and function decls. if (D->isImplicit() || isa(D)) @@ -310,11 +311,19 @@ void GatherGlobalsWithInitializers( // Add if user-defined static or groupshared global with initializer. if (VD->hasInit() && VD->hasGlobalStorage() && (VD->getStorageClass() == SC_Static || - VD->hasAttr())) + VD->hasAttr())) { + // Place subobjects in a separate collection. + if (const RecordType *RT = VD->getType()->getAs()) { + if (RT->getDecl()->hasAttr()) { + SubObjects.push_back(VD); + continue; + } + } GlobalsWithInit.push_back(VD); + } } else if (auto *DC = dyn_cast(D)) { // Recurse into DeclContexts like namespace, cbuffer, class/struct, etc. - GatherGlobalsWithInitializers(DC, GlobalsWithInit); + GatherGlobalsWithInitializers(DC, GlobalsWithInit, SubObjects); } } } @@ -592,14 +601,24 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { hlsl::ShaderModel::GetByName(self->getLangOpts().HLSLProfile.c_str()); llvm::SmallVector GlobalsWithInit; - GatherGlobalsWithInitializers(self->getASTContext().getTranslationUnitDecl(), - GlobalsWithInit); - + llvm::SmallVector SubObjects; std::set DiagnosedRecursiveDecls; llvm::SmallPtrSet DiagnosedCalls; llvm::SmallPtrSet DeclAvailabilityChecked; llvm::SmallSet DiagnosedTypeLocs; + GatherGlobalsWithInitializers(self->getASTContext().getTranslationUnitDecl(), + GlobalsWithInit, SubObjects); + + if (shaderModel->GetKind() == DXIL::ShaderKind::Library) { + DXIL::NodeLaunchType NodeLaunchTy = DXIL::NodeLaunchType::Invalid; + HLSLReachableDiagnoseVisitor Visitor( + self, shaderModel, shaderModel->GetKind(), NodeLaunchTy, nullptr, + DiagnosedCalls, DeclAvailabilityChecked, DiagnosedTypeLocs); + for (VarDecl *VD : SubObjects) + Visitor.TraverseDecl(VD); + } + // for each FDecl, check for recursion for (FunctionDecl *FDecl : FDeclsToCheck) { CallGraphWithRecurseGuard callGraph; @@ -705,7 +724,6 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { << PatchConstantFunctionReturnIdx; } } - DXIL::ShaderKind EntrySK = shaderModel->GetKind(); DXIL::NodeLaunchType NodeLaunchTy = DXIL::NodeLaunchType::Invalid; if (EntrySK == DXIL::ShaderKind::Library) { diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/raytracingpipelineconfig1.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/raytracingpipelineconfig1.hlsl new file mode 100644 index 0000000000..44424f5d14 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/d3dreflect/raytracingpipelineconfig1.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxilver 1.9 | %dxc -T lib_6_9 %s | FileCheck %s +// RUN: %dxilver 1.9 | %dxc -T lib_6_9 -ast-dump %s | FileCheck -check-prefix=AST %s +// RUN: %dxilver 1.9 | %dxc -T lib_6_9 -ast-dump-implicit %s | FileCheck -check-prefix=ASTIMPL %s + + +// CHECK: ; RaytracingPipelineConfig1 rpc = { MaxTraceRecursionDepth = 32, Flags = RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS }; + +// AST: TranslationUnitDecl 0x{{.+}} <> +// AST-NEXT: VarDecl 0x{{.+}} rpc 'RaytracingPipelineConfig1' static cinit +// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingPipelineConfig1' +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 32 +// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' +// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS' 'const unsigned int' +// ASTIMPL: VarDecl 0x{{.+}} <> implicit referenced RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS 'const unsigned int' static cinit +// ASTIMPL-NEXT: IntegerLiteral 0x{{.+}} <> 'const unsigned int' 1024 +// ASTIMPL-NEXT: AvailabilityAttr 0x{{.+}} <> Implicit 6.9 0 0 "" + +RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS }; diff --git a/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-warnings.hlsl b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-warnings.hlsl new file mode 100644 index 0000000000..c220f5734d --- /dev/null +++ b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-warnings.hlsl @@ -0,0 +1,6 @@ +// RUN: %dxc -T lib_6_8 -verify %s + +// expected-warning@+1{{potential misuse of built-in constant 'RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_8; introduced in shader model 6.9}} +RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS }; + + diff --git a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp index 01f4973fbe..3af305d52a 100644 --- a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp +++ b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp @@ -671,6 +671,8 @@ static const char *FlagToString(DXIL::RaytracingPipelineFlags Flag) { return "RAYTRACING_PIPELINE_FLAG_SKIP_TRIANGLES"; case DXIL::RaytracingPipelineFlags::SkipProceduralPrimitives: return "RAYTRACING_PIPELINE_FLAG_SKIP_PROCEDURAL_PRIMITIVES"; + case DXIL::RaytracingPipelineFlags::AllowOpacityMicromaps: + return "RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS"; } return ""; } From 3035d316c35289b68e8fc9d8cf21d86a204fb0e2 Mon Sep 17 00:00:00 2001 From: Chris B Date: Tue, 1 Apr 2025 12:10:06 -0500 Subject: [PATCH 57/88] Require CMake 3.17, remove CMP0051 (#7287) Hopefully this works and gets us able to build with CMake 4+. --- CMakeLists.txt | 9 --------- tools/clang/CMakeLists.txt | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 74244c1d58..0977fa1246 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,15 +17,6 @@ if(POLICY CMP0022) cmake_policy(SET CMP0022 NEW) # automatic when 2.8.12 is required endif() -if (POLICY CMP0051) - # CMake 3.1 and higher include generator expressions of the form - # $ in the SOURCES property. These need to be - # stripped everywhere that access the SOURCES property, so we just - # defer to the OLD behavior of not including generator expressions - # in the output for now. - cmake_policy(SET CMP0051 OLD) -endif() - if(CMAKE_VERSION VERSION_LESS 3.1.20141117) set(cmake_3_2_USES_TERMINAL) else() diff --git a/tools/clang/CMakeLists.txt b/tools/clang/CMakeLists.txt index 71190336ca..449e6c28b4 100644 --- a/tools/clang/CMakeLists.txt +++ b/tools/clang/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8.8) +cmake_minimum_required(VERSION 3.17.2) # HLSL Change - Require CMake 3.17.2. # FIXME: It may be removed when we use 2.8.12. if(CMAKE_VERSION VERSION_LESS 2.8.12) From 30bfd82296a04f8302c949d79387b06fc37a31c6 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Tue, 1 Apr 2025 11:56:18 -0700 Subject: [PATCH 58/88] NFC: Infrastructure changes for DXIL op vector and multi-dim overloads (#7259) This change adds vector and multi-dimensional overload support for DXIL operations. Multi-dimensional (or "extended") overloads are added, where two or more types in a DXIL Op function signature may vary independently, such as both the return type and a parameter type. Until now, only one overload dimension has been necessary. For single-dim overloads, any number of parameters in a DXIL op may refer to this single overload type. For multi-dim overloads, each type that can vary must have a unique overload dimension, even when two or more types must be the same. This follows a pattern from llvm intrinsics. If two or more of the types need to be the same, this constraint must be handled manually, outside the automatic overload constraints defined by the DXIL op definitions. Vector overloads are also added, requiring an additional set of scalar overload types to define the allowed vector element types, on top of the original set describing the allowed scalar overloads for an operation, since both scalar and vector overloads may be allowed on the same operation. There are several components involved in handling DXIL operation overloads, with some changes: - DXIL Op definitions in `hctdb.py` use a string of characters to define the allowed overloads, and special type names used in parameter definitions that refer to the overload type. - Overload string syntax updated and more heavily validated. - `','` may separate dimensions for multi-dim overloads - `'<'` indicates that a vector overload is allowed, in which case, scalar components on the left indicate normal scalar overloads allowed, and scalar components on the right indicate the allowed vector element overloads. - If scalar overloads are present to the left, and omitted to the right, the scalar components are replicated to the right automatically. For instance: `"hf<"` is equivalent to `"hf}`. This makes it compatible with all these existing mechanisms without requiring an API overhaul impacting the broader code base. `GetExtendedOverloadType` is used to construct this type from multiple types. While updating `DxilOperations.h|cpp`, I noticed and removed some unused methods: `IsDxilOpTypeName`, `IsDxilOpType`, `IsDupDxilOpType`, `GetOriginalDxilOpType`. --------- Co-authored-by: Greg Roth --- include/dxc/DXIL/DxilConstants.h | 5 + include/dxc/DXIL/DxilOperations.h | 103 +- lib/DXIL/DxilOperations.cpp | 5884 ++++++++++++------------- lib/DxilValidation/DxilValidation.cpp | 17 +- utils/hct/hctdb.py | 170 +- utils/hct/hctdb_instrhelp.py | 142 +- 6 files changed, 3162 insertions(+), 3159 deletions(-) diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 0a9c6a4ffd..447728300b 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -155,6 +155,11 @@ const float kMinMipLodBias = -16.0f; const unsigned kResRetStatusIndex = 4; +/* hctdb_instrhelp.get_max_oload_dims()*/ +// OLOAD_DIMS-TEXT:BEGIN +const unsigned kDxilMaxOloadDims = 2; +// OLOAD_DIMS-TEXT:END + enum class ComponentType : uint32_t { Invalid = 0, I1, diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h index e522e06204..05021ce789 100644 --- a/include/dxc/DXIL/DxilOperations.h +++ b/include/dxc/DXIL/DxilOperations.h @@ -57,12 +57,31 @@ class OP { // caches. void RefreshCache(); + // The single llvm::Type * "OverloadType" has one of these forms: + // No overloads (NumOverloadDims == 0): + // - TS_Void: VoidTy + // For single overload dimension (NumOverloadDims == 1): + // - TS_F*, TS_I*: a scalar numeric type (half, float, i1, i64, etc.), + // - TS_UDT: a pointer to a StructType representing a User Defined Type, + // - TS_Object: a named StructType representing a built-in object, or + // - TS_Vector: a vector type (<4 x float>, <16 x i16>, etc.) + // For multiple overload dimensions (TS_Extended, NumOverloadDims > 1): + // - an unnamed StructType containing each type for the corresponding + // dimension, such as: type { i32, <2 x float> } + // - contained type options are the same as for single dimension. + llvm::Function *GetOpFunc(OpCode OpCode, llvm::Type *pOverloadType); + + // N-dimension convenience version of GetOpFunc: + llvm::Function *GetOpFunc(OpCode OpCode, + llvm::ArrayRef OverloadTypes); + const llvm::SmallMapVector & GetOpFuncList(OpCode OpCode) const; bool IsDxilOpUsed(OpCode opcode) const; void RemoveFunction(llvm::Function *F); llvm::LLVMContext &GetCtx() { return m_Ctx; } + llvm::Module *GetModule() { return m_pModule; } llvm::Type *GetHandleType() const; llvm::Type *GetHitObjectType() const; llvm::Type *GetNodeHandleType() const; @@ -81,9 +100,14 @@ class OP { llvm::Type *GetResRetType(llvm::Type *pOverloadType); llvm::Type *GetCBufferRetType(llvm::Type *pOverloadType); - llvm::Type *GetVectorType(unsigned numElements, llvm::Type *pOverloadType); + llvm::Type *GetStructVectorType(unsigned numElements, + llvm::Type *pOverloadType); bool IsResRetType(llvm::Type *Ty); + // Construct an unnamed struct type containing the set of member types. + llvm::StructType * + GetExtendedOverloadType(llvm::ArrayRef OverloadTypes); + // Try to get the opcode class for a function. // Return true and set `opClass` if the given function is a dxil function. // Return false if the given function is not a dxil function. @@ -128,11 +152,6 @@ class OP { static bool BarrierRequiresGroup(const llvm::CallInst *CI); static bool BarrierRequiresNode(const llvm::CallInst *CI); static DXIL::BarrierMode TranslateToBarrierMode(const llvm::CallInst *CI); - static bool IsDxilOpTypeName(llvm::StringRef name); - static bool IsDxilOpType(llvm::StructType *ST); - static bool IsDupDxilOpType(llvm::StructType *ST); - static llvm::StructType *GetOriginalDxilOpType(llvm::StructType *ST, - llvm::Module &M); static void GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, unsigned &major, unsigned &minor, unsigned &mask); @@ -141,6 +160,13 @@ class OP { unsigned valMinor, unsigned &major, unsigned &minor, unsigned &mask); + static bool IsDxilOpExtendedOverload(OpCode C); + + // Return true if the overload name suffix for this operation may be + // constructed based on a user-defined or user-influenced type name + // that may not represent the same type in different linked modules. + static bool MayHaveNonCanonicalOverload(OpCode OC); + private: // Per-module properties. llvm::LLVMContext &m_Ctx; @@ -164,13 +190,33 @@ class OP { DXIL::LowPrecisionMode m_LowPrecisionMode; - static const unsigned kUserDefineTypeSlot = 9; - static const unsigned kObjectTypeSlot = 10; - static const unsigned kNumTypeOverloads = - 11; // void, h,f,d, i1, i8,i16,i32,i64, udt, obj + // Overload types are split into "basic" overload types and special types + // Basic: void, half, float, double, i1, i8, i16, i32, i64 + // - These have one canonical overload per TypeSlot + // Special: udt, obj, vec, extended + // - These may have many overloads per type slot + enum TypeSlot : unsigned { + TS_F16 = 0, + TS_F32 = 1, + TS_F64 = 2, + TS_I1 = 3, + TS_I8 = 4, + TS_I16 = 5, + TS_I32 = 6, + TS_I64 = 7, + TS_BasicCount, + TS_UDT = 8, // Ex: %"struct.MyStruct" * + TS_Object = 9, // Ex: %"class.StructuredBuffer" + TS_Vector = 10, // Ex: <8 x i16> + TS_MaskBitCount, // Types used in Mask end here + // TS_Extended is only used to identify the unnamed struct type used to wrap + // multiple overloads when using GetTypeSlot. + TS_Extended, // Ex: type { float, <16 x i32> } + TS_Invalid = UINT_MAX, + }; - llvm::Type *m_pResRetType[kNumTypeOverloads]; - llvm::Type *m_pCBufferRetType[kNumTypeOverloads]; + llvm::Type *m_pResRetType[TS_BasicCount]; + llvm::Type *m_pCBufferRetType[TS_BasicCount]; struct OpCodeCacheItem { llvm::SmallMapVector pOverloads; @@ -181,27 +227,46 @@ class OP { private: // Static properties. + struct OverloadMask { + // mask of type slot bits as (1 << TypeSlot) + uint16_t SlotMask; + static_assert(TS_MaskBitCount <= (sizeof(SlotMask) * 8)); + bool operator[](unsigned TypeSlot) const { + return (TypeSlot < TS_MaskBitCount) ? (bool)(SlotMask & (1 << TypeSlot)) + : 0; + } + operator bool() const { return SlotMask != 0; } + }; struct OpCodeProperty { OpCode opCode; const char *pOpCodeName; OpCodeClass opCodeClass; const char *pOpCodeClassName; - bool bAllowOverload[kNumTypeOverloads]; // void, h,f,d, i1, i8,i16,i32,i64, - // udt llvm::Attribute::AttrKind FuncAttr; + + // Number of overload dimensions used by the operation. + unsigned int NumOverloadDims; + + // Mask of supported overload types for each overload dimension. + OverloadMask AllowedOverloads[DXIL::kDxilMaxOloadDims]; + + // Mask of scalar components allowed for each demension where + // AllowedOverloads[n][TS_Vector] is true. + OverloadMask AllowedVectorElements[DXIL::kDxilMaxOloadDims]; }; static const OpCodeProperty m_OpCodeProps[(unsigned)OpCode::NumOpCodes]; - static const char *m_OverloadTypeName[kNumTypeOverloads]; + static const char *m_OverloadTypeName[TS_BasicCount]; static const char *m_NamePrefix; static const char *m_TypePrefix; static const char *m_MatrixTypePrefix; static unsigned GetTypeSlot(llvm::Type *pType); static const char *GetOverloadTypeName(unsigned TypeSlot); - static llvm::StringRef GetTypeName(llvm::Type *Ty, std::string &str); - static llvm::StringRef ConstructOverloadName(llvm::Type *Ty, - DXIL::OpCode opCode, - std::string &funcNameStorage); + static llvm::StringRef GetTypeName(llvm::Type *Ty, + llvm::SmallVectorImpl &Storage); + static llvm::StringRef + ConstructOverloadName(llvm::Type *Ty, DXIL::OpCode opCode, + llvm::SmallVectorImpl &Storage); }; } // namespace hlsl diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 86049fee9c..56cdd0d04f 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -23,8 +23,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -using std::string; -using std::vector; namespace hlsl { @@ -41,2989 +39,2605 @@ import hctdb_instrhelp /* hctdb_instrhelp.get_oloads_props()*/ // OPCODE-OLOADS:BEGIN const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { - // OpCode OpCode name, OpCodeClass - // OpCodeClass name, void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj, function attribute - // Temporary, indexable, input, output registers void, h, f, d, - // i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::TempRegLoad, - "TempRegLoad", - OCC::TempRegLoad, - "tempRegLoad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::TempRegStore, - "TempRegStore", - OCC::TempRegStore, - "tempRegStore", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::MinPrecXRegLoad, - "MinPrecXRegLoad", - OCC::MinPrecXRegLoad, - "minPrecXRegLoad", - {false, true, false, false, false, false, true, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::MinPrecXRegStore, - "MinPrecXRegStore", - OCC::MinPrecXRegStore, - "minPrecXRegStore", - {false, true, false, false, false, false, true, false, false, false, - false}, - Attribute::None, - }, - { - OC::LoadInput, - "LoadInput", - OCC::LoadInput, - "loadInput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::StoreOutput, - "StoreOutput", - OCC::StoreOutput, - "storeOutput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - - // Unary float void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FAbs, - "FAbs", - OCC::Unary, - "unary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Saturate, - "Saturate", - OCC::Unary, - "unary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsNaN, - "IsNaN", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsInf, - "IsInf", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsFinite, - "IsFinite", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsNormal, - "IsNormal", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Cos, - "Cos", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Sin, - "Sin", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Tan, - "Tan", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Acos, - "Acos", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Asin, - "Asin", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Atan, - "Atan", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Hcos, - "Hcos", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Hsin, - "Hsin", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Htan, - "Htan", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Exp, - "Exp", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Frc, - "Frc", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Log, - "Log", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Sqrt, - "Sqrt", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Rsqrt, - "Rsqrt", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Unary float - rounding void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::Round_ne, - "Round_ne", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Round_ni, - "Round_ni", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Round_pi, - "Round_pi", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Round_z, - "Round_z", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Unary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Bfrev, - "Bfrev", - OCC::Unary, - "unary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::Countbits, - "Countbits", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::FirstbitLo, - "FirstbitLo", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Unary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FirstbitHi, - "FirstbitHi", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Unary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FirstbitSHi, - "FirstbitSHi", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Binary float void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FMax, - "FMax", - OCC::Binary, - "binary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::FMin, - "FMin", - OCC::Binary, - "binary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Binary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::IMax, - "IMax", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::IMin, - "IMin", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Binary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::UMax, - "UMax", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::UMin, - "UMin", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Binary int with two outputs void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::IMul, - "IMul", - OCC::BinaryWithTwoOuts, - "binaryWithTwoOuts", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Binary uint with two outputs void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::UMul, - "UMul", - OCC::BinaryWithTwoOuts, - "binaryWithTwoOuts", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::UDiv, - "UDiv", - OCC::BinaryWithTwoOuts, - "binaryWithTwoOuts", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Binary uint with carry or borrow void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::UAddc, - "UAddc", - OCC::BinaryWithCarryOrBorrow, - "binaryWithCarryOrBorrow", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::USubb, - "USubb", - OCC::BinaryWithCarryOrBorrow, - "binaryWithCarryOrBorrow", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Tertiary float void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::FMad, - "FMad", - OCC::Tertiary, - "tertiary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Fma, - "Fma", - OCC::Tertiary, - "tertiary", - {false, false, false, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Tertiary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::IMad, - "IMad", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Tertiary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::UMad, - "UMad", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Tertiary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Msad, - "Msad", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::Ibfe, - "Ibfe", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Tertiary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Ubfe, - "Ubfe", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::ReadNone, - }, - - // Quaternary void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Bfi, - "Bfi", - OCC::Quaternary, - "quaternary", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Dot void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::Dot2, - "Dot2", - OCC::Dot2, - "dot2", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot3, - "Dot3", - OCC::Dot3, - "dot3", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot4, - "Dot4", - OCC::Dot4, - "dot4", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::CreateHandle, - "CreateHandle", - OCC::CreateHandle, - "createHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::CBufferLoad, - "CBufferLoad", - OCC::CBufferLoad, - "cbufferLoad", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::ReadOnly, - }, - { - OC::CBufferLoadLegacy, - "CBufferLoadLegacy", - OCC::CBufferLoadLegacy, - "cbufferLoadLegacy", - {false, true, true, true, false, false, true, true, true, false, false}, - Attribute::ReadOnly, - }, - - // Resources - sample void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Sample, - "Sample", - OCC::Sample, - "sample", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleBias, - "SampleBias", - OCC::SampleBias, - "sampleBias", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleLevel, - "SampleLevel", - OCC::SampleLevel, - "sampleLevel", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleGrad, - "SampleGrad", - OCC::SampleGrad, - "sampleGrad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleCmp, - "SampleCmp", - OCC::SampleCmp, - "sampleCmp", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleCmpLevelZero, - "SampleCmpLevelZero", - OCC::SampleCmpLevelZero, - "sampleCmpLevelZero", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::TextureLoad, - "TextureLoad", - OCC::TextureLoad, - "textureLoad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::TextureStore, - "TextureStore", - OCC::TextureStore, - "textureStore", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::BufferLoad, - "BufferLoad", - OCC::BufferLoad, - "bufferLoad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::BufferStore, - "BufferStore", - OCC::BufferStore, - "bufferStore", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::BufferUpdateCounter, - "BufferUpdateCounter", - OCC::BufferUpdateCounter, - "bufferUpdateCounter", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::CheckAccessFullyMapped, - "CheckAccessFullyMapped", - OCC::CheckAccessFullyMapped, - "checkAccessFullyMapped", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::GetDimensions, - "GetDimensions", - OCC::GetDimensions, - "getDimensions", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Resources - gather void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::TextureGather, - "TextureGather", - OCC::TextureGather, - "textureGather", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::TextureGatherCmp, - "TextureGatherCmp", - OCC::TextureGatherCmp, - "textureGatherCmp", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - - // Resources - sample void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Texture2DMSGetSamplePosition, - "Texture2DMSGetSamplePosition", - OCC::Texture2DMSGetSamplePosition, - "texture2DMSGetSamplePosition", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RenderTargetGetSamplePosition, - "RenderTargetGetSamplePosition", - OCC::RenderTargetGetSamplePosition, - "renderTargetGetSamplePosition", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RenderTargetGetSampleCount, - "RenderTargetGetSampleCount", - OCC::RenderTargetGetSampleCount, - "renderTargetGetSampleCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Synchronization void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AtomicBinOp, - "AtomicBinOp", - OCC::AtomicBinOp, - "atomicBinOp", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::None, - }, - { - OC::AtomicCompareExchange, - "AtomicCompareExchange", - OCC::AtomicCompareExchange, - "atomicCompareExchange", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::None, - }, - { - OC::Barrier, - "Barrier", - OCC::Barrier, - "barrier", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, - - // Derivatives void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::CalculateLOD, - "CalculateLOD", - OCC::CalculateLOD, - "calculateLOD", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Pixel shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Discard, - "Discard", - OCC::Discard, - "discard", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Derivatives void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::DerivCoarseX, - "DerivCoarseX", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DerivCoarseY, - "DerivCoarseY", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DerivFineX, - "DerivFineX", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DerivFineY, - "DerivFineY", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Pixel shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::EvalSnapped, - "EvalSnapped", - OCC::EvalSnapped, - "evalSnapped", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::EvalSampleIndex, - "EvalSampleIndex", - OCC::EvalSampleIndex, - "evalSampleIndex", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::EvalCentroid, - "EvalCentroid", - OCC::EvalCentroid, - "evalCentroid", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::SampleIndex, - "SampleIndex", - OCC::SampleIndex, - "sampleIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Coverage, - "Coverage", - OCC::Coverage, - "coverage", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::InnerCoverage, - "InnerCoverage", - OCC::InnerCoverage, - "innerCoverage", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Compute/Mesh/Amplification/Node shader void, h, f, d, i1, - // i8, i16, i32, i64, udt, obj , function attribute - { - OC::ThreadId, - "ThreadId", - OCC::ThreadId, - "threadId", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::GroupId, - "GroupId", - OCC::GroupId, - "groupId", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::ThreadIdInGroup, - "ThreadIdInGroup", - OCC::ThreadIdInGroup, - "threadIdInGroup", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::FlattenedThreadIdInGroup, - "FlattenedThreadIdInGroup", - OCC::FlattenedThreadIdInGroup, - "flattenedThreadIdInGroup", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Geometry shader void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::EmitStream, - "EmitStream", - OCC::EmitStream, - "emitStream", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::CutStream, - "CutStream", - OCC::CutStream, - "cutStream", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::EmitThenCutStream, - "EmitThenCutStream", - OCC::EmitThenCutStream, - "emitThenCutStream", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::GSInstanceID, - "GSInstanceID", - OCC::GSInstanceID, - "gsInstanceID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Double precision void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::MakeDouble, - "MakeDouble", - OCC::MakeDouble, - "makeDouble", - {false, false, false, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::SplitDouble, - "SplitDouble", - OCC::SplitDouble, - "splitDouble", - {false, false, false, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Domain and hull shader void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::LoadOutputControlPoint, - "LoadOutputControlPoint", - OCC::LoadOutputControlPoint, - "loadOutputControlPoint", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LoadPatchConstant, - "LoadPatchConstant", - OCC::LoadPatchConstant, - "loadPatchConstant", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Domain shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::DomainLocation, - "DomainLocation", - OCC::DomainLocation, - "domainLocation", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Hull shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::StorePatchConstant, - "StorePatchConstant", - OCC::StorePatchConstant, - "storePatchConstant", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::OutputControlPointID, - "OutputControlPointID", - OCC::OutputControlPointID, - "outputControlPointID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Hull, Domain and Geometry shaders void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::PrimitiveID, - "PrimitiveID", - OCC::PrimitiveID, - "primitiveID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Other void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::CycleCounterLegacy, - "CycleCounterLegacy", - OCC::CycleCounterLegacy, - "cycleCounterLegacy", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Wave void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::WaveIsFirstLane, - "WaveIsFirstLane", - OCC::WaveIsFirstLane, - "waveIsFirstLane", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveGetLaneIndex, - "WaveGetLaneIndex", - OCC::WaveGetLaneIndex, - "waveGetLaneIndex", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::WaveGetLaneCount, - "WaveGetLaneCount", - OCC::WaveGetLaneCount, - "waveGetLaneCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::WaveAnyTrue, - "WaveAnyTrue", - OCC::WaveAnyTrue, - "waveAnyTrue", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveAllTrue, - "WaveAllTrue", - OCC::WaveAllTrue, - "waveAllTrue", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveActiveAllEqual, - "WaveActiveAllEqual", - OCC::WaveActiveAllEqual, - "waveActiveAllEqual", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveActiveBallot, - "WaveActiveBallot", - OCC::WaveActiveBallot, - "waveActiveBallot", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveReadLaneAt, - "WaveReadLaneAt", - OCC::WaveReadLaneAt, - "waveReadLaneAt", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveReadLaneFirst, - "WaveReadLaneFirst", - OCC::WaveReadLaneFirst, - "waveReadLaneFirst", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveActiveOp, - "WaveActiveOp", - OCC::WaveActiveOp, - "waveActiveOp", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveActiveBit, - "WaveActiveBit", - OCC::WaveActiveBit, - "waveActiveBit", - {false, false, false, false, false, true, true, true, true, false, - false}, - Attribute::None, - }, - { - OC::WavePrefixOp, - "WavePrefixOp", - OCC::WavePrefixOp, - "wavePrefixOp", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, - - // Quad Wave Ops void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::QuadReadLaneAt, - "QuadReadLaneAt", - OCC::QuadReadLaneAt, - "quadReadLaneAt", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::QuadOp, - "QuadOp", - OCC::QuadOp, - "quadOp", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, - - // Bitcasts with different sizes void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::BitcastI16toF16, - "BitcastI16toF16", - OCC::BitcastI16toF16, - "bitcastI16toF16", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastF16toI16, - "BitcastF16toI16", - OCC::BitcastF16toI16, - "bitcastF16toI16", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastI32toF32, - "BitcastI32toF32", - OCC::BitcastI32toF32, - "bitcastI32toF32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastF32toI32, - "BitcastF32toI32", - OCC::BitcastF32toI32, - "bitcastF32toI32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastI64toF64, - "BitcastI64toF64", - OCC::BitcastI64toF64, - "bitcastI64toF64", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastF64toI64, - "BitcastF64toI64", - OCC::BitcastF64toI64, - "bitcastF64toI64", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Legacy floating-point void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::LegacyF32ToF16, - "LegacyF32ToF16", - OCC::LegacyF32ToF16, - "legacyF32ToF16", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LegacyF16ToF32, - "LegacyF16ToF32", - OCC::LegacyF16ToF32, - "legacyF16ToF32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Double precision void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::LegacyDoubleToFloat, - "LegacyDoubleToFloat", - OCC::LegacyDoubleToFloat, - "legacyDoubleToFloat", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LegacyDoubleToSInt32, - "LegacyDoubleToSInt32", - OCC::LegacyDoubleToSInt32, - "legacyDoubleToSInt32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LegacyDoubleToUInt32, - "LegacyDoubleToUInt32", - OCC::LegacyDoubleToUInt32, - "legacyDoubleToUInt32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Wave void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::WaveAllBitCount, - "WaveAllBitCount", - OCC::WaveAllOp, - "waveAllOp", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WavePrefixBitCount, - "WavePrefixBitCount", - OCC::WavePrefixOp, - "wavePrefixOp", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Pixel shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::AttributeAtVertex, - "AttributeAtVertex", - OCC::AttributeAtVertex, - "attributeAtVertex", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Graphics shader void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::ViewID, - "ViewID", - OCC::ViewID, - "viewID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::RawBufferLoad, - "RawBufferLoad", - OCC::RawBufferLoad, - "rawBufferLoad", - {false, true, true, true, false, false, true, true, true, false, false}, - Attribute::ReadOnly, - }, - { - OC::RawBufferStore, - "RawBufferStore", - OCC::RawBufferStore, - "rawBufferStore", - {false, true, true, true, false, false, true, true, true, false, false}, - Attribute::None, - }, - - // Raytracing object space uint System Values void, h, f, d, i1, - // i8, i16, i32, i64, udt, obj , function attribute - { - OC::InstanceID, - "InstanceID", - OCC::InstanceID, - "instanceID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::InstanceIndex, - "InstanceIndex", - OCC::InstanceIndex, - "instanceIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Raytracing hit uint System Values void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::HitKind, - "HitKind", - OCC::HitKind, - "hitKind", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Raytracing uint System Values void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::RayFlags, - "RayFlags", - OCC::RayFlags, - "rayFlags", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Ray Dispatch Arguments void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::DispatchRaysIndex, - "DispatchRaysIndex", - OCC::DispatchRaysIndex, - "dispatchRaysIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DispatchRaysDimensions, - "DispatchRaysDimensions", - OCC::DispatchRaysDimensions, - "dispatchRaysDimensions", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Ray Vectors void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::WorldRayOrigin, - "WorldRayOrigin", - OCC::WorldRayOrigin, - "worldRayOrigin", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::WorldRayDirection, - "WorldRayDirection", - OCC::WorldRayDirection, - "worldRayDirection", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Ray object space Vectors void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::ObjectRayOrigin, - "ObjectRayOrigin", - OCC::ObjectRayOrigin, - "objectRayOrigin", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::ObjectRayDirection, - "ObjectRayDirection", - OCC::ObjectRayDirection, - "objectRayDirection", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Ray Transforms void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::ObjectToWorld, - "ObjectToWorld", - OCC::ObjectToWorld, - "objectToWorld", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::WorldToObject, - "WorldToObject", - OCC::WorldToObject, - "worldToObject", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // RayT void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::RayTMin, - "RayTMin", - OCC::RayTMin, - "rayTMin", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::RayTCurrent, - "RayTCurrent", - OCC::RayTCurrent, - "rayTCurrent", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // AnyHit Terminals void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::IgnoreHit, - "IgnoreHit", - OCC::IgnoreHit, - "ignoreHit", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoReturn, - }, - { - OC::AcceptHitAndEndSearch, - "AcceptHitAndEndSearch", - OCC::AcceptHitAndEndSearch, - "acceptHitAndEndSearch", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoReturn, - }, - - // Indirect Shader Invocation void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::TraceRay, - "TraceRay", - OCC::TraceRay, - "traceRay", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, - { - OC::ReportHit, - "ReportHit", - OCC::ReportHit, - "reportHit", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, - { - OC::CallShader, - "CallShader", - OCC::CallShader, - "callShader", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, - - // Library create handle from resource struct (like HL intrinsic) void, h, - // f, d, i1, i8, i16, i32, i64, udt, obj , function - // attribute - { - OC::CreateHandleForLib, - "CreateHandleForLib", - OCC::CreateHandleForLib, - "createHandleForLib", - {false, false, false, false, false, false, false, false, false, false, - true}, - Attribute::ReadOnly, - }, - - // Raytracing object space uint System Values void, h, f, d, i1, - // i8, i16, i32, i64, udt, obj , function attribute - { - OC::PrimitiveIndex, - "PrimitiveIndex", - OCC::PrimitiveIndex, - "primitiveIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Dot product with accumulate void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::Dot2AddHalf, - "Dot2AddHalf", - OCC::Dot2AddHalf, - "dot2AddHalf", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot4AddI8Packed, - "Dot4AddI8Packed", - OCC::Dot4AddPacked, - "dot4AddPacked", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot4AddU8Packed, - "Dot4AddU8Packed", - OCC::Dot4AddPacked, - "dot4AddPacked", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Wave void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::WaveMatch, - "WaveMatch", - OCC::WaveMatch, - "waveMatch", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveMultiPrefixOp, - "WaveMultiPrefixOp", - OCC::WaveMultiPrefixOp, - "waveMultiPrefixOp", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveMultiPrefixBitCount, - "WaveMultiPrefixBitCount", - OCC::WaveMultiPrefixBitCount, - "waveMultiPrefixBitCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Mesh shader instructions void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::SetMeshOutputCounts, - "SetMeshOutputCounts", - OCC::SetMeshOutputCounts, - "setMeshOutputCounts", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::EmitIndices, - "EmitIndices", - OCC::EmitIndices, - "emitIndices", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::GetMeshPayload, - "GetMeshPayload", - OCC::GetMeshPayload, - "getMeshPayload", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::ReadOnly, - }, - { - OC::StoreVertexOutput, - "StoreVertexOutput", - OCC::StoreVertexOutput, - "storeVertexOutput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::StorePrimitiveOutput, - "StorePrimitiveOutput", - OCC::StorePrimitiveOutput, - "storePrimitiveOutput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - - // Amplification shader instructions void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::DispatchMesh, - "DispatchMesh", - OCC::DispatchMesh, - "dispatchMesh", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, - - // Sampler Feedback void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::WriteSamplerFeedback, - "WriteSamplerFeedback", - OCC::WriteSamplerFeedback, - "writeSamplerFeedback", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WriteSamplerFeedbackBias, - "WriteSamplerFeedbackBias", - OCC::WriteSamplerFeedbackBias, - "writeSamplerFeedbackBias", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WriteSamplerFeedbackLevel, - "WriteSamplerFeedbackLevel", - OCC::WriteSamplerFeedbackLevel, - "writeSamplerFeedbackLevel", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WriteSamplerFeedbackGrad, - "WriteSamplerFeedbackGrad", - OCC::WriteSamplerFeedbackGrad, - "writeSamplerFeedbackGrad", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Inline Ray Query void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AllocateRayQuery, - "AllocateRayQuery", - OCC::AllocateRayQuery, - "allocateRayQuery", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_TraceRayInline, - "RayQuery_TraceRayInline", - OCC::RayQuery_TraceRayInline, - "rayQuery_TraceRayInline", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_Proceed, - "RayQuery_Proceed", - OCC::RayQuery_Proceed, - "rayQuery_Proceed", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_Abort, - "RayQuery_Abort", - OCC::RayQuery_Abort, - "rayQuery_Abort", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_CommitNonOpaqueTriangleHit, - "RayQuery_CommitNonOpaqueTriangleHit", - OCC::RayQuery_CommitNonOpaqueTriangleHit, - "rayQuery_CommitNonOpaqueTriangleHit", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_CommitProceduralPrimitiveHit, - "RayQuery_CommitProceduralPrimitiveHit", - OCC::RayQuery_CommitProceduralPrimitiveHit, - "rayQuery_CommitProceduralPrimitiveHit", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_CommittedStatus, - "RayQuery_CommittedStatus", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateType, - "RayQuery_CandidateType", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateObjectToWorld3x4, - "RayQuery_CandidateObjectToWorld3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateWorldToObject3x4, - "RayQuery_CandidateWorldToObject3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedObjectToWorld3x4, - "RayQuery_CommittedObjectToWorld3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedWorldToObject3x4, - "RayQuery_CommittedWorldToObject3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateProceduralPrimitiveNonOpaque, - "RayQuery_CandidateProceduralPrimitiveNonOpaque", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateTriangleFrontFace, - "RayQuery_CandidateTriangleFrontFace", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedTriangleFrontFace, - "RayQuery_CommittedTriangleFrontFace", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateTriangleBarycentrics, - "RayQuery_CandidateTriangleBarycentrics", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedTriangleBarycentrics, - "RayQuery_CommittedTriangleBarycentrics", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_RayFlags, - "RayQuery_RayFlags", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_WorldRayOrigin, - "RayQuery_WorldRayOrigin", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_WorldRayDirection, - "RayQuery_WorldRayDirection", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_RayTMin, - "RayQuery_RayTMin", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateTriangleRayT, - "RayQuery_CandidateTriangleRayT", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedRayT, - "RayQuery_CommittedRayT", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateInstanceIndex, - "RayQuery_CandidateInstanceIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateInstanceID, - "RayQuery_CandidateInstanceID", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateGeometryIndex, - "RayQuery_CandidateGeometryIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidatePrimitiveIndex, - "RayQuery_CandidatePrimitiveIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateObjectRayOrigin, - "RayQuery_CandidateObjectRayOrigin", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateObjectRayDirection, - "RayQuery_CandidateObjectRayDirection", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedInstanceIndex, - "RayQuery_CommittedInstanceIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedInstanceID, - "RayQuery_CommittedInstanceID", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedGeometryIndex, - "RayQuery_CommittedGeometryIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedPrimitiveIndex, - "RayQuery_CommittedPrimitiveIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedObjectRayOrigin, - "RayQuery_CommittedObjectRayOrigin", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedObjectRayDirection, - "RayQuery_CommittedObjectRayDirection", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Raytracing object space uint System Values, raytracing tier 1.1 void, h, - // f, d, i1, i8, i16, i32, i64, udt, obj , function - // attribute - { - OC::GeometryIndex, - "GeometryIndex", - OCC::GeometryIndex, - "geometryIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Inline Ray Query void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::RayQuery_CandidateInstanceContributionToHitGroupIndex, - "RayQuery_CandidateInstanceContributionToHitGroupIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedInstanceContributionToHitGroupIndex, - "RayQuery_CommittedInstanceContributionToHitGroupIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - - // Get handle from heap void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AnnotateHandle, - "AnnotateHandle", - OCC::AnnotateHandle, - "annotateHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::CreateHandleFromBinding, - "CreateHandleFromBinding", - OCC::CreateHandleFromBinding, - "createHandleFromBinding", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::CreateHandleFromHeap, - "CreateHandleFromHeap", - OCC::CreateHandleFromHeap, - "createHandleFromHeap", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Unpacking intrinsics void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Unpack4x8, - "Unpack4x8", - OCC::Unpack4x8, - "unpack4x8", - {false, false, false, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Packing intrinsics void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Pack4x8, - "Pack4x8", - OCC::Pack4x8, - "pack4x8", - {false, false, false, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Helper Lanes void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::IsHelperLane, - "IsHelperLane", - OCC::IsHelperLane, - "isHelperLane", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Quad Wave Ops void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::QuadVote, - "QuadVote", - OCC::QuadVote, - "quadVote", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Resources - gather void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::TextureGatherRaw, - "TextureGatherRaw", - OCC::TextureGatherRaw, - "textureGatherRaw", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadOnly, - }, - - // Resources - sample void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::SampleCmpLevel, - "SampleCmpLevel", - OCC::SampleCmpLevel, - "sampleCmpLevel", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::TextureStoreSample, - "TextureStoreSample", - OCC::TextureStoreSample, - "textureStoreSample", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - - // void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::Reserved0, - "Reserved0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved1, - "Reserved1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved2, - "Reserved2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved3, - "Reserved3", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved4, - "Reserved4", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved5, - "Reserved5", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved6, - "Reserved6", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved7, - "Reserved7", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved8, - "Reserved8", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved9, - "Reserved9", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved10, - "Reserved10", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved11, - "Reserved11", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Create/Annotate Node Handles void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::AllocateNodeOutputRecords, - "AllocateNodeOutputRecords", - OCC::AllocateNodeOutputRecords, - "allocateNodeOutputRecords", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Get Pointer to Node Record in Address Space 6 void, h, f, d, - // i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::GetNodeRecordPtr, - "GetNodeRecordPtr", - OCC::GetNodeRecordPtr, - "getNodeRecordPtr", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::ReadNone, - }, - - // Work Graph intrinsics void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::IncrementOutputCount, - "IncrementOutputCount", - OCC::IncrementOutputCount, - "incrementOutputCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::OutputComplete, - "OutputComplete", - OCC::OutputComplete, - "outputComplete", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::GetInputRecordCount, - "GetInputRecordCount", - OCC::GetInputRecordCount, - "getInputRecordCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::FinishedCrossGroupSharing, - "FinishedCrossGroupSharing", - OCC::FinishedCrossGroupSharing, - "finishedCrossGroupSharing", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Synchronization void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::BarrierByMemoryType, - "BarrierByMemoryType", - OCC::BarrierByMemoryType, - "barrierByMemoryType", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, - { - OC::BarrierByMemoryHandle, - "BarrierByMemoryHandle", - OCC::BarrierByMemoryHandle, - "barrierByMemoryHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, - { - OC::BarrierByNodeRecordHandle, - "BarrierByNodeRecordHandle", - OCC::BarrierByNodeRecordHandle, - "barrierByNodeRecordHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, - - // Create/Annotate Node Handles void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::CreateNodeOutputHandle, - "CreateNodeOutputHandle", - OCC::createNodeOutputHandle, - "createNodeOutputHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IndexNodeHandle, - "IndexNodeHandle", - OCC::IndexNodeHandle, - "indexNodeHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::AnnotateNodeHandle, - "AnnotateNodeHandle", - OCC::AnnotateNodeHandle, - "annotateNodeHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::CreateNodeInputRecordHandle, - "CreateNodeInputRecordHandle", - OCC::CreateNodeInputRecordHandle, - "createNodeInputRecordHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::AnnotateNodeRecordHandle, - "AnnotateNodeRecordHandle", - OCC::AnnotateNodeRecordHandle, - "annotateNodeRecordHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // Work Graph intrinsics void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::NodeOutputIsValid, - "NodeOutputIsValid", - OCC::NodeOutputIsValid, - "nodeOutputIsValid", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::GetRemainingRecursionLevels, - "GetRemainingRecursionLevels", - OCC::GetRemainingRecursionLevels, - "getRemainingRecursionLevels", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Comparison Samples void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::SampleCmpGrad, - "SampleCmpGrad", - OCC::SampleCmpGrad, - "sampleCmpGrad", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleCmpBias, - "SampleCmpBias", - OCC::SampleCmpBias, - "sampleCmpBias", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - - // Extended Command Information void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::StartVertexLocation, - "StartVertexLocation", - OCC::StartVertexLocation, - "startVertexLocation", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::StartInstanceLocation, - "StartInstanceLocation", - OCC::StartInstanceLocation, - "startInstanceLocation", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - - // Inline Ray Query void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AllocateRayQuery2, - "AllocateRayQuery2", - OCC::AllocateRayQuery2, - "allocateRayQuery2", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::ReservedA0, - "ReservedA0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedA1, - "ReservedA1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedA2, - "ReservedA2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB0, - "ReservedB0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB1, - "ReservedB1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB2, - "ReservedB2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // Shader Execution Reordering void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::HitObject_MakeMiss, - "HitObject_MakeMiss", - OCC::HitObject_MakeMiss, - "hitObject_MakeMiss", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::HitObject_MakeNop, - "HitObject_MakeNop", - OCC::HitObject_MakeNop, - "hitObject_MakeNop", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - - // void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::ReservedB5, - "ReservedB5", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB6, - "ReservedB6", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB7, - "ReservedB7", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB8, - "ReservedB8", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB9, - "ReservedB9", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB10, - "ReservedB10", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB11, - "ReservedB11", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB12, - "ReservedB12", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB13, - "ReservedB13", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB14, - "ReservedB14", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB15, - "ReservedB15", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB16, - "ReservedB16", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB17, - "ReservedB17", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB18, - "ReservedB18", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB19, - "ReservedB19", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB20, - "ReservedB20", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB21, - "ReservedB21", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB22, - "ReservedB22", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB23, - "ReservedB23", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB24, - "ReservedB24", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB25, - "ReservedB25", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB26, - "ReservedB26", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB27, - "ReservedB27", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB28, - "ReservedB28", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB29, - "ReservedB29", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB30, - "ReservedB30", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC0, - "ReservedC0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC1, - "ReservedC1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC2, - "ReservedC2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC3, - "ReservedC3", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC4, - "ReservedC4", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC5, - "ReservedC5", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC6, - "ReservedC6", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC7, - "ReservedC7", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC8, - "ReservedC8", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC9, - "ReservedC9", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, + // Temporary, indexable, input, output registers + {OC::TempRegLoad, + "TempRegLoad", + OCC::TempRegLoad, + "tempRegLoad", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::TempRegStore, + "TempRegStore", + OCC::TempRegStore, + "tempRegStore", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::MinPrecXRegLoad, + "MinPrecXRegLoad", + OCC::MinPrecXRegLoad, + "minPrecXRegLoad", + Attribute::ReadOnly, + 1, + {{0x21}}, + {{0x0}}}, // Overloads: hw + {OC::MinPrecXRegStore, + "MinPrecXRegStore", + OCC::MinPrecXRegStore, + "minPrecXRegStore", + Attribute::None, + 1, + {{0x21}}, + {{0x0}}}, // Overloads: hw + {OC::LoadInput, + "LoadInput", + OCC::LoadInput, + "loadInput", + Attribute::ReadNone, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::StoreOutput, + "StoreOutput", + OCC::StoreOutput, + "storeOutput", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + + // Unary float + {OC::FAbs, + "FAbs", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x7}}, + {{0x0}}}, // Overloads: hfd + {OC::Saturate, + "Saturate", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x7}}, + {{0x0}}}, // Overloads: hfd + {OC::IsNaN, + "IsNaN", + OCC::IsSpecialFloat, + "isSpecialFloat", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::IsInf, + "IsInf", + OCC::IsSpecialFloat, + "isSpecialFloat", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::IsFinite, + "IsFinite", + OCC::IsSpecialFloat, + "isSpecialFloat", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::IsNormal, + "IsNormal", + OCC::IsSpecialFloat, + "isSpecialFloat", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Cos, + "Cos", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Sin, + "Sin", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Tan, + "Tan", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Acos, + "Acos", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Asin, + "Asin", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Atan, + "Atan", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Hcos, + "Hcos", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Hsin, + "Hsin", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Htan, + "Htan", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Exp, + "Exp", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Frc, + "Frc", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Log, + "Log", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Sqrt, + "Sqrt", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Rsqrt, + "Rsqrt", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + + // Unary float - rounding + {OC::Round_ne, + "Round_ne", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Round_ni, + "Round_ni", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Round_pi, + "Round_pi", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Round_z, + "Round_z", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + + // Unary int + {OC::Bfrev, + "Bfrev", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + {OC::Countbits, + "Countbits", + OCC::UnaryBits, + "unaryBits", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + {OC::FirstbitLo, + "FirstbitLo", + OCC::UnaryBits, + "unaryBits", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Unary uint + {OC::FirstbitHi, + "FirstbitHi", + OCC::UnaryBits, + "unaryBits", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Unary int + {OC::FirstbitSHi, + "FirstbitSHi", + OCC::UnaryBits, + "unaryBits", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Binary float + {OC::FMax, + "FMax", + OCC::Binary, + "binary", + Attribute::ReadNone, + 1, + {{0x7}}, + {{0x0}}}, // Overloads: hfd + {OC::FMin, + "FMin", + OCC::Binary, + "binary", + Attribute::ReadNone, + 1, + {{0x7}}, + {{0x0}}}, // Overloads: hfd + + // Binary int + {OC::IMax, + "IMax", + OCC::Binary, + "binary", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + {OC::IMin, + "IMin", + OCC::Binary, + "binary", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Binary uint + {OC::UMax, + "UMax", + OCC::Binary, + "binary", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + {OC::UMin, + "UMin", + OCC::Binary, + "binary", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Binary int with two outputs + {OC::IMul, + "IMul", + OCC::BinaryWithTwoOuts, + "binaryWithTwoOuts", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Binary uint with two outputs + {OC::UMul, + "UMul", + OCC::BinaryWithTwoOuts, + "binaryWithTwoOuts", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::UDiv, + "UDiv", + OCC::BinaryWithTwoOuts, + "binaryWithTwoOuts", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Binary uint with carry or borrow + {OC::UAddc, + "UAddc", + OCC::BinaryWithCarryOrBorrow, + "binaryWithCarryOrBorrow", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::USubb, + "USubb", + OCC::BinaryWithCarryOrBorrow, + "binaryWithCarryOrBorrow", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Tertiary float + {OC::FMad, + "FMad", + OCC::Tertiary, + "tertiary", + Attribute::ReadNone, + 1, + {{0x7}}, + {{0x0}}}, // Overloads: hfd + {OC::Fma, + "Fma", + OCC::Tertiary, + "tertiary", + Attribute::ReadNone, + 1, + {{0x4}}, + {{0x0}}}, // Overloads: d + + // Tertiary int + {OC::IMad, + "IMad", + OCC::Tertiary, + "tertiary", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Tertiary uint + {OC::UMad, + "UMad", + OCC::Tertiary, + "tertiary", + Attribute::ReadNone, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Tertiary int + {OC::Msad, + "Msad", + OCC::Tertiary, + "tertiary", + Attribute::ReadNone, + 1, + {{0xc0}}, + {{0x0}}}, // Overloads: il + {OC::Ibfe, + "Ibfe", + OCC::Tertiary, + "tertiary", + Attribute::ReadNone, + 1, + {{0xc0}}, + {{0x0}}}, // Overloads: il + + // Tertiary uint + {OC::Ubfe, + "Ubfe", + OCC::Tertiary, + "tertiary", + Attribute::ReadNone, + 1, + {{0xc0}}, + {{0x0}}}, // Overloads: il + + // Quaternary + {OC::Bfi, + "Bfi", + OCC::Quaternary, + "quaternary", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Dot + {OC::Dot2, + "Dot2", + OCC::Dot2, + "dot2", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Dot3, + "Dot3", + OCC::Dot3, + "dot3", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::Dot4, + "Dot4", + OCC::Dot4, + "dot4", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + + // Resources + {OC::CreateHandle, + "CreateHandle", + OCC::CreateHandle, + "createHandle", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + {OC::CBufferLoad, + "CBufferLoad", + OCC::CBufferLoad, + "cbufferLoad", + Attribute::ReadOnly, + 1, + {{0xf7}}, + {{0x0}}}, // Overloads: hfd8wil + {OC::CBufferLoadLegacy, + "CBufferLoadLegacy", + OCC::CBufferLoadLegacy, + "cbufferLoadLegacy", + Attribute::ReadOnly, + 1, + {{0xe7}}, + {{0x0}}}, // Overloads: hfdwil + + // Resources - sample + {OC::Sample, + "Sample", + OCC::Sample, + "sample", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::SampleBias, + "SampleBias", + OCC::SampleBias, + "sampleBias", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::SampleLevel, + "SampleLevel", + OCC::SampleLevel, + "sampleLevel", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::SampleGrad, + "SampleGrad", + OCC::SampleGrad, + "sampleGrad", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::SampleCmp, + "SampleCmp", + OCC::SampleCmp, + "sampleCmp", + Attribute::ReadOnly, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::SampleCmpLevelZero, + "SampleCmpLevelZero", + OCC::SampleCmpLevelZero, + "sampleCmpLevelZero", + Attribute::ReadOnly, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + + // Resources + {OC::TextureLoad, + "TextureLoad", + OCC::TextureLoad, + "textureLoad", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::TextureStore, + "TextureStore", + OCC::TextureStore, + "textureStore", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::BufferLoad, + "BufferLoad", + OCC::BufferLoad, + "bufferLoad", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::BufferStore, + "BufferStore", + OCC::BufferStore, + "bufferStore", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::BufferUpdateCounter, + "BufferUpdateCounter", + OCC::BufferUpdateCounter, + "bufferUpdateCounter", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::CheckAccessFullyMapped, + "CheckAccessFullyMapped", + OCC::CheckAccessFullyMapped, + "checkAccessFullyMapped", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::GetDimensions, + "GetDimensions", + OCC::GetDimensions, + "getDimensions", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + + // Resources - gather + {OC::TextureGather, + "TextureGather", + OCC::TextureGather, + "textureGather", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::TextureGatherCmp, + "TextureGatherCmp", + OCC::TextureGatherCmp, + "textureGatherCmp", + Attribute::ReadOnly, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + + // Resources - sample + {OC::Texture2DMSGetSamplePosition, + "Texture2DMSGetSamplePosition", + OCC::Texture2DMSGetSamplePosition, + "texture2DMSGetSamplePosition", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + {OC::RenderTargetGetSamplePosition, + "RenderTargetGetSamplePosition", + OCC::RenderTargetGetSamplePosition, + "renderTargetGetSamplePosition", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + {OC::RenderTargetGetSampleCount, + "RenderTargetGetSampleCount", + OCC::RenderTargetGetSampleCount, + "renderTargetGetSampleCount", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + + // Synchronization + {OC::AtomicBinOp, + "AtomicBinOp", + OCC::AtomicBinOp, + "atomicBinOp", + Attribute::None, + 1, + {{0xc0}}, + {{0x0}}}, // Overloads: li + {OC::AtomicCompareExchange, + "AtomicCompareExchange", + OCC::AtomicCompareExchange, + "atomicCompareExchange", + Attribute::None, + 1, + {{0xc0}}, + {{0x0}}}, // Overloads: li + {OC::Barrier, + "Barrier", + OCC::Barrier, + "barrier", + Attribute::NoDuplicate, + 0, + {}, + {}}, // Overloads: v + + // Derivatives + {OC::CalculateLOD, + "CalculateLOD", + OCC::CalculateLOD, + "calculateLOD", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + + // Pixel shader + {OC::Discard, + "Discard", + OCC::Discard, + "discard", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Derivatives + {OC::DerivCoarseX, + "DerivCoarseX", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::DerivCoarseY, + "DerivCoarseY", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::DerivFineX, + "DerivFineX", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::DerivFineY, + "DerivFineY", + OCC::Unary, + "unary", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + + // Pixel shader + {OC::EvalSnapped, + "EvalSnapped", + OCC::EvalSnapped, + "evalSnapped", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::EvalSampleIndex, + "EvalSampleIndex", + OCC::EvalSampleIndex, + "evalSampleIndex", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::EvalCentroid, + "EvalCentroid", + OCC::EvalCentroid, + "evalCentroid", + Attribute::ReadNone, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::SampleIndex, + "SampleIndex", + OCC::SampleIndex, + "sampleIndex", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::Coverage, + "Coverage", + OCC::Coverage, + "coverage", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::InnerCoverage, + "InnerCoverage", + OCC::InnerCoverage, + "innerCoverage", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Compute/Mesh/Amplification/Node shader + {OC::ThreadId, + "ThreadId", + OCC::ThreadId, + "threadId", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::GroupId, + "GroupId", + OCC::GroupId, + "groupId", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::ThreadIdInGroup, + "ThreadIdInGroup", + OCC::ThreadIdInGroup, + "threadIdInGroup", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::FlattenedThreadIdInGroup, + "FlattenedThreadIdInGroup", + OCC::FlattenedThreadIdInGroup, + "flattenedThreadIdInGroup", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Geometry shader + {OC::EmitStream, + "EmitStream", + OCC::EmitStream, + "emitStream", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::CutStream, + "CutStream", + OCC::CutStream, + "cutStream", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::EmitThenCutStream, + "EmitThenCutStream", + OCC::EmitThenCutStream, + "emitThenCutStream", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::GSInstanceID, + "GSInstanceID", + OCC::GSInstanceID, + "gsInstanceID", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Double precision + {OC::MakeDouble, + "MakeDouble", + OCC::MakeDouble, + "makeDouble", + Attribute::ReadNone, + 1, + {{0x4}}, + {{0x0}}}, // Overloads: d + {OC::SplitDouble, + "SplitDouble", + OCC::SplitDouble, + "splitDouble", + Attribute::ReadNone, + 1, + {{0x4}}, + {{0x0}}}, // Overloads: d + + // Domain and hull shader + {OC::LoadOutputControlPoint, + "LoadOutputControlPoint", + OCC::LoadOutputControlPoint, + "loadOutputControlPoint", + Attribute::ReadNone, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::LoadPatchConstant, + "LoadPatchConstant", + OCC::LoadPatchConstant, + "loadPatchConstant", + Attribute::ReadNone, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + + // Domain shader + {OC::DomainLocation, + "DomainLocation", + OCC::DomainLocation, + "domainLocation", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + + // Hull shader + {OC::StorePatchConstant, + "StorePatchConstant", + OCC::StorePatchConstant, + "storePatchConstant", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::OutputControlPointID, + "OutputControlPointID", + OCC::OutputControlPointID, + "outputControlPointID", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Hull, Domain and Geometry shaders + {OC::PrimitiveID, + "PrimitiveID", + OCC::PrimitiveID, + "primitiveID", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Other + {OC::CycleCounterLegacy, + "CycleCounterLegacy", + OCC::CycleCounterLegacy, + "cycleCounterLegacy", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Wave + {OC::WaveIsFirstLane, + "WaveIsFirstLane", + OCC::WaveIsFirstLane, + "waveIsFirstLane", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WaveGetLaneIndex, + "WaveGetLaneIndex", + OCC::WaveGetLaneIndex, + "waveGetLaneIndex", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + {OC::WaveGetLaneCount, + "WaveGetLaneCount", + OCC::WaveGetLaneCount, + "waveGetLaneCount", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::WaveAnyTrue, + "WaveAnyTrue", + OCC::WaveAnyTrue, + "waveAnyTrue", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WaveAllTrue, + "WaveAllTrue", + OCC::WaveAllTrue, + "waveAllTrue", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WaveActiveAllEqual, + "WaveActiveAllEqual", + OCC::WaveActiveAllEqual, + "waveActiveAllEqual", + Attribute::None, + 1, + {{0xff}}, + {{0x0}}}, // Overloads: hfd18wil + {OC::WaveActiveBallot, + "WaveActiveBallot", + OCC::WaveActiveBallot, + "waveActiveBallot", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WaveReadLaneAt, + "WaveReadLaneAt", + OCC::WaveReadLaneAt, + "waveReadLaneAt", + Attribute::None, + 1, + {{0xff}}, + {{0x0}}}, // Overloads: hfd18wil + {OC::WaveReadLaneFirst, + "WaveReadLaneFirst", + OCC::WaveReadLaneFirst, + "waveReadLaneFirst", + Attribute::None, + 1, + {{0xff}}, + {{0x0}}}, // Overloads: hfd18wil + {OC::WaveActiveOp, + "WaveActiveOp", + OCC::WaveActiveOp, + "waveActiveOp", + Attribute::None, + 1, + {{0xff}}, + {{0x0}}}, // Overloads: hfd18wil + {OC::WaveActiveBit, + "WaveActiveBit", + OCC::WaveActiveBit, + "waveActiveBit", + Attribute::None, + 1, + {{0xf0}}, + {{0x0}}}, // Overloads: 8wil + {OC::WavePrefixOp, + "WavePrefixOp", + OCC::WavePrefixOp, + "wavePrefixOp", + Attribute::None, + 1, + {{0xf7}}, + {{0x0}}}, // Overloads: hfd8wil + + // Quad Wave Ops + {OC::QuadReadLaneAt, + "QuadReadLaneAt", + OCC::QuadReadLaneAt, + "quadReadLaneAt", + Attribute::None, + 1, + {{0xff}}, + {{0x0}}}, // Overloads: hfd18wil + {OC::QuadOp, + "QuadOp", + OCC::QuadOp, + "quadOp", + Attribute::None, + 1, + {{0xf7}}, + {{0x0}}}, // Overloads: hfd8wil + + // Bitcasts with different sizes + {OC::BitcastI16toF16, + "BitcastI16toF16", + OCC::BitcastI16toF16, + "bitcastI16toF16", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::BitcastF16toI16, + "BitcastF16toI16", + OCC::BitcastF16toI16, + "bitcastF16toI16", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::BitcastI32toF32, + "BitcastI32toF32", + OCC::BitcastI32toF32, + "bitcastI32toF32", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::BitcastF32toI32, + "BitcastF32toI32", + OCC::BitcastF32toI32, + "bitcastF32toI32", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::BitcastI64toF64, + "BitcastI64toF64", + OCC::BitcastI64toF64, + "bitcastI64toF64", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::BitcastF64toI64, + "BitcastF64toI64", + OCC::BitcastF64toI64, + "bitcastF64toI64", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + + // Legacy floating-point + {OC::LegacyF32ToF16, + "LegacyF32ToF16", + OCC::LegacyF32ToF16, + "legacyF32ToF16", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::LegacyF16ToF32, + "LegacyF16ToF32", + OCC::LegacyF16ToF32, + "legacyF16ToF32", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + + // Double precision + {OC::LegacyDoubleToFloat, + "LegacyDoubleToFloat", + OCC::LegacyDoubleToFloat, + "legacyDoubleToFloat", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::LegacyDoubleToSInt32, + "LegacyDoubleToSInt32", + OCC::LegacyDoubleToSInt32, + "legacyDoubleToSInt32", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::LegacyDoubleToUInt32, + "LegacyDoubleToUInt32", + OCC::LegacyDoubleToUInt32, + "legacyDoubleToUInt32", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + + // Wave + {OC::WaveAllBitCount, + "WaveAllBitCount", + OCC::WaveAllOp, + "waveAllOp", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WavePrefixBitCount, + "WavePrefixBitCount", + OCC::WavePrefixOp, + "wavePrefixOp", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Pixel shader + {OC::AttributeAtVertex, + "AttributeAtVertex", + OCC::AttributeAtVertex, + "attributeAtVertex", + Attribute::ReadNone, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfiw + + // Graphics shader + {OC::ViewID, + "ViewID", + OCC::ViewID, + "viewID", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Resources + {OC::RawBufferLoad, + "RawBufferLoad", + OCC::RawBufferLoad, + "rawBufferLoad", + Attribute::ReadOnly, + 1, + {{0xe7}}, + {{0x0}}}, // Overloads: hfwidl + {OC::RawBufferStore, + "RawBufferStore", + OCC::RawBufferStore, + "rawBufferStore", + Attribute::None, + 1, + {{0xe7}}, + {{0x0}}}, // Overloads: hfwidl + + // Raytracing object space uint System Values + {OC::InstanceID, + "InstanceID", + OCC::InstanceID, + "instanceID", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::InstanceIndex, + "InstanceIndex", + OCC::InstanceIndex, + "instanceIndex", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Raytracing hit uint System Values + {OC::HitKind, + "HitKind", + OCC::HitKind, + "hitKind", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Raytracing uint System Values + {OC::RayFlags, + "RayFlags", + OCC::RayFlags, + "rayFlags", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Ray Dispatch Arguments + {OC::DispatchRaysIndex, + "DispatchRaysIndex", + OCC::DispatchRaysIndex, + "dispatchRaysIndex", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::DispatchRaysDimensions, + "DispatchRaysDimensions", + OCC::DispatchRaysDimensions, + "dispatchRaysDimensions", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Ray Vectors + {OC::WorldRayOrigin, + "WorldRayOrigin", + OCC::WorldRayOrigin, + "worldRayOrigin", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::WorldRayDirection, + "WorldRayDirection", + OCC::WorldRayDirection, + "worldRayDirection", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + + // Ray object space Vectors + {OC::ObjectRayOrigin, + "ObjectRayOrigin", + OCC::ObjectRayOrigin, + "objectRayOrigin", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::ObjectRayDirection, + "ObjectRayDirection", + OCC::ObjectRayDirection, + "objectRayDirection", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + + // Ray Transforms + {OC::ObjectToWorld, + "ObjectToWorld", + OCC::ObjectToWorld, + "objectToWorld", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::WorldToObject, + "WorldToObject", + OCC::WorldToObject, + "worldToObject", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + + // RayT + {OC::RayTMin, + "RayTMin", + OCC::RayTMin, + "rayTMin", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayTCurrent, + "RayTCurrent", + OCC::RayTCurrent, + "rayTCurrent", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + + // AnyHit Terminals + {OC::IgnoreHit, + "IgnoreHit", + OCC::IgnoreHit, + "ignoreHit", + Attribute::NoReturn, + 0, + {}, + {}}, // Overloads: v + {OC::AcceptHitAndEndSearch, + "AcceptHitAndEndSearch", + OCC::AcceptHitAndEndSearch, + "acceptHitAndEndSearch", + Attribute::NoReturn, + 0, + {}, + {}}, // Overloads: v + + // Indirect Shader Invocation + {OC::TraceRay, + "TraceRay", + OCC::TraceRay, + "traceRay", + Attribute::None, + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u + {OC::ReportHit, + "ReportHit", + OCC::ReportHit, + "reportHit", + Attribute::None, + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u + {OC::CallShader, + "CallShader", + OCC::CallShader, + "callShader", + Attribute::None, + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u + + // Library create handle from resource struct (like HL intrinsic) + {OC::CreateHandleForLib, + "CreateHandleForLib", + OCC::CreateHandleForLib, + "createHandleForLib", + Attribute::ReadOnly, + 1, + {{0x200}}, + {{0x0}}}, // Overloads: o + + // Raytracing object space uint System Values + {OC::PrimitiveIndex, + "PrimitiveIndex", + OCC::PrimitiveIndex, + "primitiveIndex", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Dot product with accumulate + {OC::Dot2AddHalf, + "Dot2AddHalf", + OCC::Dot2AddHalf, + "dot2AddHalf", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::Dot4AddI8Packed, + "Dot4AddI8Packed", + OCC::Dot4AddPacked, + "dot4AddPacked", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::Dot4AddU8Packed, + "Dot4AddU8Packed", + OCC::Dot4AddPacked, + "dot4AddPacked", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Wave + {OC::WaveMatch, + "WaveMatch", + OCC::WaveMatch, + "waveMatch", + Attribute::None, + 1, + {{0xf7}}, + {{0x0}}}, // Overloads: hfd8wil + {OC::WaveMultiPrefixOp, + "WaveMultiPrefixOp", + OCC::WaveMultiPrefixOp, + "waveMultiPrefixOp", + Attribute::None, + 1, + {{0xf7}}, + {{0x0}}}, // Overloads: hfd8wil + {OC::WaveMultiPrefixBitCount, + "WaveMultiPrefixBitCount", + OCC::WaveMultiPrefixBitCount, + "waveMultiPrefixBitCount", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Mesh shader instructions + {OC::SetMeshOutputCounts, + "SetMeshOutputCounts", + OCC::SetMeshOutputCounts, + "setMeshOutputCounts", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::EmitIndices, + "EmitIndices", + OCC::EmitIndices, + "emitIndices", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::GetMeshPayload, + "GetMeshPayload", + OCC::GetMeshPayload, + "getMeshPayload", + Attribute::ReadOnly, + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u + {OC::StoreVertexOutput, + "StoreVertexOutput", + OCC::StoreVertexOutput, + "storeVertexOutput", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + {OC::StorePrimitiveOutput, + "StorePrimitiveOutput", + OCC::StorePrimitiveOutput, + "storePrimitiveOutput", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + + // Amplification shader instructions + {OC::DispatchMesh, + "DispatchMesh", + OCC::DispatchMesh, + "dispatchMesh", + Attribute::None, + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u + + // Sampler Feedback + {OC::WriteSamplerFeedback, + "WriteSamplerFeedback", + OCC::WriteSamplerFeedback, + "writeSamplerFeedback", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WriteSamplerFeedbackBias, + "WriteSamplerFeedbackBias", + OCC::WriteSamplerFeedbackBias, + "writeSamplerFeedbackBias", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WriteSamplerFeedbackLevel, + "WriteSamplerFeedbackLevel", + OCC::WriteSamplerFeedbackLevel, + "writeSamplerFeedbackLevel", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::WriteSamplerFeedbackGrad, + "WriteSamplerFeedbackGrad", + OCC::WriteSamplerFeedbackGrad, + "writeSamplerFeedbackGrad", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Inline Ray Query + {OC::AllocateRayQuery, + "AllocateRayQuery", + OCC::AllocateRayQuery, + "allocateRayQuery", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::RayQuery_TraceRayInline, + "RayQuery_TraceRayInline", + OCC::RayQuery_TraceRayInline, + "rayQuery_TraceRayInline", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::RayQuery_Proceed, + "RayQuery_Proceed", + OCC::RayQuery_Proceed, + "rayQuery_Proceed", + Attribute::None, + 1, + {{0x8}}, + {{0x0}}}, // Overloads: 1 + {OC::RayQuery_Abort, + "RayQuery_Abort", + OCC::RayQuery_Abort, + "rayQuery_Abort", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::RayQuery_CommitNonOpaqueTriangleHit, + "RayQuery_CommitNonOpaqueTriangleHit", + OCC::RayQuery_CommitNonOpaqueTriangleHit, + "rayQuery_CommitNonOpaqueTriangleHit", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::RayQuery_CommitProceduralPrimitiveHit, + "RayQuery_CommitProceduralPrimitiveHit", + OCC::RayQuery_CommitProceduralPrimitiveHit, + "rayQuery_CommitProceduralPrimitiveHit", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::RayQuery_CommittedStatus, + "RayQuery_CommittedStatus", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CandidateType, + "RayQuery_CandidateType", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CandidateObjectToWorld3x4, + "RayQuery_CandidateObjectToWorld3x4", + OCC::RayQuery_StateMatrix, + "rayQuery_StateMatrix", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CandidateWorldToObject3x4, + "RayQuery_CandidateWorldToObject3x4", + OCC::RayQuery_StateMatrix, + "rayQuery_StateMatrix", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CommittedObjectToWorld3x4, + "RayQuery_CommittedObjectToWorld3x4", + OCC::RayQuery_StateMatrix, + "rayQuery_StateMatrix", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CommittedWorldToObject3x4, + "RayQuery_CommittedWorldToObject3x4", + OCC::RayQuery_StateMatrix, + "rayQuery_StateMatrix", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CandidateProceduralPrimitiveNonOpaque, + "RayQuery_CandidateProceduralPrimitiveNonOpaque", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x8}}, + {{0x0}}}, // Overloads: 1 + {OC::RayQuery_CandidateTriangleFrontFace, + "RayQuery_CandidateTriangleFrontFace", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x8}}, + {{0x0}}}, // Overloads: 1 + {OC::RayQuery_CommittedTriangleFrontFace, + "RayQuery_CommittedTriangleFrontFace", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x8}}, + {{0x0}}}, // Overloads: 1 + {OC::RayQuery_CandidateTriangleBarycentrics, + "RayQuery_CandidateTriangleBarycentrics", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CommittedTriangleBarycentrics, + "RayQuery_CommittedTriangleBarycentrics", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_RayFlags, + "RayQuery_RayFlags", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_WorldRayOrigin, + "RayQuery_WorldRayOrigin", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_WorldRayDirection, + "RayQuery_WorldRayDirection", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_RayTMin, + "RayQuery_RayTMin", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CandidateTriangleRayT, + "RayQuery_CandidateTriangleRayT", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CommittedRayT, + "RayQuery_CommittedRayT", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CandidateInstanceIndex, + "RayQuery_CandidateInstanceIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CandidateInstanceID, + "RayQuery_CandidateInstanceID", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CandidateGeometryIndex, + "RayQuery_CandidateGeometryIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CandidatePrimitiveIndex, + "RayQuery_CandidatePrimitiveIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CandidateObjectRayOrigin, + "RayQuery_CandidateObjectRayOrigin", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CandidateObjectRayDirection, + "RayQuery_CandidateObjectRayDirection", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CommittedInstanceIndex, + "RayQuery_CommittedInstanceIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CommittedInstanceID, + "RayQuery_CommittedInstanceID", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CommittedGeometryIndex, + "RayQuery_CommittedGeometryIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CommittedPrimitiveIndex, + "RayQuery_CommittedPrimitiveIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CommittedObjectRayOrigin, + "RayQuery_CommittedObjectRayOrigin", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::RayQuery_CommittedObjectRayDirection, + "RayQuery_CommittedObjectRayDirection", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + Attribute::ReadOnly, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + + // Raytracing object space uint System Values, raytracing tier 1.1 + {OC::GeometryIndex, + "GeometryIndex", + OCC::GeometryIndex, + "geometryIndex", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Inline Ray Query + {OC::RayQuery_CandidateInstanceContributionToHitGroupIndex, + "RayQuery_CandidateInstanceContributionToHitGroupIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::RayQuery_CommittedInstanceContributionToHitGroupIndex, + "RayQuery_CommittedInstanceContributionToHitGroupIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + Attribute::ReadOnly, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Get handle from heap + {OC::AnnotateHandle, + "AnnotateHandle", + OCC::AnnotateHandle, + "annotateHandle", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::CreateHandleFromBinding, + "CreateHandleFromBinding", + OCC::CreateHandleFromBinding, + "createHandleFromBinding", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::CreateHandleFromHeap, + "CreateHandleFromHeap", + OCC::CreateHandleFromHeap, + "createHandleFromHeap", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + + // Unpacking intrinsics + {OC::Unpack4x8, + "Unpack4x8", + OCC::Unpack4x8, + "unpack4x8", + Attribute::ReadNone, + 1, + {{0x60}}, + {{0x0}}}, // Overloads: iw + + // Packing intrinsics + {OC::Pack4x8, + "Pack4x8", + OCC::Pack4x8, + "pack4x8", + Attribute::ReadNone, + 1, + {{0x60}}, + {{0x0}}}, // Overloads: iw + + // Helper Lanes + {OC::IsHelperLane, + "IsHelperLane", + OCC::IsHelperLane, + "isHelperLane", + Attribute::ReadOnly, + 1, + {{0x8}}, + {{0x0}}}, // Overloads: 1 + + // Quad Wave Ops + {OC::QuadVote, + "QuadVote", + OCC::QuadVote, + "quadVote", + Attribute::None, + 1, + {{0x8}}, + {{0x0}}}, // Overloads: 1 + + // Resources - gather + {OC::TextureGatherRaw, + "TextureGatherRaw", + OCC::TextureGatherRaw, + "textureGatherRaw", + Attribute::ReadOnly, + 1, + {{0xe0}}, + {{0x0}}}, // Overloads: wil + + // Resources - sample + {OC::SampleCmpLevel, + "SampleCmpLevel", + OCC::SampleCmpLevel, + "sampleCmpLevel", + Attribute::ReadOnly, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + + // Resources + {OC::TextureStoreSample, + "TextureStoreSample", + OCC::TextureStoreSample, + "textureStoreSample", + Attribute::None, + 1, + {{0x63}}, + {{0x0}}}, // Overloads: hfwi + + {OC::Reserved0, + "Reserved0", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved1, + "Reserved1", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved2, + "Reserved2", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved3, + "Reserved3", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved4, + "Reserved4", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved5, + "Reserved5", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved6, + "Reserved6", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved7, + "Reserved7", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved8, + "Reserved8", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved9, + "Reserved9", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved10, + "Reserved10", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::Reserved11, + "Reserved11", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Create/Annotate Node Handles + {OC::AllocateNodeOutputRecords, + "AllocateNodeOutputRecords", + OCC::AllocateNodeOutputRecords, + "allocateNodeOutputRecords", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Get Pointer to Node Record in Address Space 6 + {OC::GetNodeRecordPtr, + "GetNodeRecordPtr", + OCC::GetNodeRecordPtr, + "getNodeRecordPtr", + Attribute::ReadNone, + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u + + // Work Graph intrinsics + {OC::IncrementOutputCount, + "IncrementOutputCount", + OCC::IncrementOutputCount, + "incrementOutputCount", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::OutputComplete, + "OutputComplete", + OCC::OutputComplete, + "outputComplete", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::GetInputRecordCount, + "GetInputRecordCount", + OCC::GetInputRecordCount, + "getInputRecordCount", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + {OC::FinishedCrossGroupSharing, + "FinishedCrossGroupSharing", + OCC::FinishedCrossGroupSharing, + "finishedCrossGroupSharing", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Synchronization + {OC::BarrierByMemoryType, + "BarrierByMemoryType", + OCC::BarrierByMemoryType, + "barrierByMemoryType", + Attribute::NoDuplicate, + 0, + {}, + {}}, // Overloads: v + {OC::BarrierByMemoryHandle, + "BarrierByMemoryHandle", + OCC::BarrierByMemoryHandle, + "barrierByMemoryHandle", + Attribute::NoDuplicate, + 0, + {}, + {}}, // Overloads: v + {OC::BarrierByNodeRecordHandle, + "BarrierByNodeRecordHandle", + OCC::BarrierByNodeRecordHandle, + "barrierByNodeRecordHandle", + Attribute::NoDuplicate, + 0, + {}, + {}}, // Overloads: v + + // Create/Annotate Node Handles + {OC::CreateNodeOutputHandle, + "CreateNodeOutputHandle", + OCC::createNodeOutputHandle, + "createNodeOutputHandle", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::IndexNodeHandle, + "IndexNodeHandle", + OCC::IndexNodeHandle, + "indexNodeHandle", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::AnnotateNodeHandle, + "AnnotateNodeHandle", + OCC::AnnotateNodeHandle, + "annotateNodeHandle", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::CreateNodeInputRecordHandle, + "CreateNodeInputRecordHandle", + OCC::CreateNodeInputRecordHandle, + "createNodeInputRecordHandle", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::AnnotateNodeRecordHandle, + "AnnotateNodeRecordHandle", + OCC::AnnotateNodeRecordHandle, + "annotateNodeRecordHandle", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + + // Work Graph intrinsics + {OC::NodeOutputIsValid, + "NodeOutputIsValid", + OCC::NodeOutputIsValid, + "nodeOutputIsValid", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + {OC::GetRemainingRecursionLevels, + "GetRemainingRecursionLevels", + OCC::GetRemainingRecursionLevels, + "getRemainingRecursionLevels", + Attribute::ReadOnly, + 0, + {}, + {}}, // Overloads: v + + // Comparison Samples + {OC::SampleCmpGrad, + "SampleCmpGrad", + OCC::SampleCmpGrad, + "sampleCmpGrad", + Attribute::ReadOnly, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + {OC::SampleCmpBias, + "SampleCmpBias", + OCC::SampleCmpBias, + "sampleCmpBias", + Attribute::ReadOnly, + 1, + {{0x3}}, + {{0x0}}}, // Overloads: hf + + // Extended Command Information + {OC::StartVertexLocation, + "StartVertexLocation", + OCC::StartVertexLocation, + "startVertexLocation", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::StartInstanceLocation, + "StartInstanceLocation", + OCC::StartInstanceLocation, + "startInstanceLocation", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + + // Inline Ray Query + {OC::AllocateRayQuery2, + "AllocateRayQuery2", + OCC::AllocateRayQuery2, + "allocateRayQuery2", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + {OC::ReservedA0, + "ReservedA0", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedA1, + "ReservedA1", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedA2, + "ReservedA2", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB0, + "ReservedB0", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB1, + "ReservedB1", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB2, + "ReservedB2", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + + // Shader Execution Reordering + {OC::HitObject_MakeMiss, + "HitObject_MakeMiss", + OCC::HitObject_MakeMiss, + "hitObject_MakeMiss", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + {OC::HitObject_MakeNop, + "HitObject_MakeNop", + OCC::HitObject_MakeNop, + "hitObject_MakeNop", + Attribute::ReadNone, + 0, + {}, + {}}, // Overloads: v + + {OC::ReservedB5, + "ReservedB5", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB6, + "ReservedB6", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB7, + "ReservedB7", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB8, + "ReservedB8", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB9, + "ReservedB9", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB10, + "ReservedB10", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB11, + "ReservedB11", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB12, + "ReservedB12", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB13, + "ReservedB13", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB14, + "ReservedB14", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB15, + "ReservedB15", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB16, + "ReservedB16", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB17, + "ReservedB17", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB18, + "ReservedB18", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB19, + "ReservedB19", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB20, + "ReservedB20", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB21, + "ReservedB21", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB22, + "ReservedB22", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB23, + "ReservedB23", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB24, + "ReservedB24", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB25, + "ReservedB25", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB26, + "ReservedB26", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB27, + "ReservedB27", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB28, + "ReservedB28", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB29, + "ReservedB29", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedB30, + "ReservedB30", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC0, + "ReservedC0", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC1, + "ReservedC1", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC2, + "ReservedC2", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC3, + "ReservedC3", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC4, + "ReservedC4", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC5, + "ReservedC5", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC6, + "ReservedC6", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC7, + "ReservedC7", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC8, + "ReservedC8", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v + {OC::ReservedC9, + "ReservedC9", + OCC::Reserved, + "reserved", + Attribute::None, + 0, + {}, + {}}, // Overloads: v }; // OPCODE-OLOADS:END -const char *OP::m_OverloadTypeName[kNumTypeOverloads] = { - "void", "f16", "f32", "f64", "i1", "i8", - "i16", "i32", "i64", "udt", "obj", // These should not be used -}; +const char *OP::m_OverloadTypeName[TS_BasicCount] = { + "f16", "f32", "f64", "i1", "i8", "i16", "i32", "i64"}; const char *OP::m_NamePrefix = "dx.op."; const char *OP::m_TypePrefix = "dx.types."; @@ -3040,82 +2654,110 @@ unsigned OP::GetTypeSlot(Type *pType) { Type::TypeID T = pType->getTypeID(); switch (T) { case Type::VoidTyID: - return 0; + return TS_Invalid; case Type::HalfTyID: - return 1; + return TS_F16; case Type::FloatTyID: - return 2; + return TS_F32; case Type::DoubleTyID: - return 3; + return TS_F64; case Type::IntegerTyID: { IntegerType *pIT = dyn_cast(pType); unsigned Bits = pIT->getBitWidth(); switch (Bits) { case 1: - return 4; + return TS_I1; case 8: - return 5; + return TS_I8; case 16: - return 6; + return TS_I16; case 32: - return 7; + return TS_I32; case 64: - return 8; + return TS_I64; } llvm_unreachable("Invalid Bits size"); + return TS_Invalid; } case Type::PointerTyID: { pType = cast(pType)->getElementType(); if (pType->isStructTy()) - return kUserDefineTypeSlot; + return TS_UDT; DXASSERT(!pType->isPointerTy(), "pointer-to-pointer type unsupported"); return GetTypeSlot(pType); } case Type::StructTyID: - return kObjectTypeSlot; + // Named struct value (not pointer) indicates a built-in object type. + // Anonymous struct value is used to wrap multi-overload dimensions. + if (cast(pType)->hasName()) + return TS_Object; + else + return TS_Extended; + case Type::VectorTyID: + return TS_Vector; default: break; } - return UINT_MAX; + return TS_Invalid; } const char *OP::GetOverloadTypeName(unsigned TypeSlot) { - DXASSERT(TypeSlot < kUserDefineTypeSlot, "otherwise caller passed OOB index"); + DXASSERT(TypeSlot < TS_BasicCount, "otherwise caller passed OOB index"); return m_OverloadTypeName[TypeSlot]; } -llvm::StringRef OP::GetTypeName(Type *Ty, std::string &str) { +StringRef OP::GetTypeName(Type *Ty, SmallVectorImpl &Storage) { + DXASSERT(!Ty->isVoidTy(), "must not pass void type here"); unsigned TypeSlot = OP::GetTypeSlot(Ty); - if (TypeSlot < kUserDefineTypeSlot) { + if (TypeSlot < TS_BasicCount) { return GetOverloadTypeName(TypeSlot); - } else if (TypeSlot == kUserDefineTypeSlot) { + } else if (TypeSlot == TS_UDT) { if (Ty->isPointerTy()) Ty = Ty->getPointerElementType(); StructType *ST = cast(Ty); return ST->getStructName(); - } else if (TypeSlot == kObjectTypeSlot) { + } else if (TypeSlot == TS_Object) { StructType *ST = cast(Ty); return ST->getStructName(); + } else if (TypeSlot == TS_Vector) { + VectorType *VecTy = cast(Ty); + return (Twine("v") + Twine(VecTy->getNumElements()) + + Twine( + GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType())))) + .toStringRef(Storage); + } else if (TypeSlot == TS_Extended) { + DXASSERT(isa(Ty), + "otherwise, extended overload type not wrapped in struct type."); + StructType *ST = cast(Ty); + DXASSERT(ST->getNumElements() <= DXIL::kDxilMaxOloadDims, + "otherwise, extended overload has too many dimensions."); + // Iterate extended slots, recurse, separate with '.' + raw_svector_ostream OS(Storage); + for (unsigned I = 0; I < ST->getNumElements(); ++I) { + if (I > 0) + OS << "."; + SmallVector TempStr; + OS << GetTypeName(ST->getElementType(I), TempStr); + } + return OS.str(); } else { - raw_string_ostream os(str); - Ty->print(os); - os.flush(); - return str; + raw_svector_ostream OS(Storage); + Ty->print(OS); + return OS.str(); } } -llvm::StringRef OP::ConstructOverloadName(Type *Ty, DXIL::OpCode opCode, - std::string &funcNameStorage) { +StringRef OP::ConstructOverloadName(Type *Ty, DXIL::OpCode opCode, + SmallVectorImpl &Storage) { if (Ty == Type::getVoidTy(Ty->getContext())) { - funcNameStorage = - (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode))).str(); + return (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode))) + .toStringRef(Storage); } else { - funcNameStorage = - (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode)) + "." + - GetTypeName(Ty, funcNameStorage)) - .str(); + llvm::SmallVector TempStr; + return (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode)) + "." + + GetTypeName(Ty, TempStr)) + .toStringRef(Storage); } - return funcNameStorage; } const char *OP::GetOpCodeName(OpCode opCode) { @@ -3143,13 +2785,41 @@ llvm::Attribute::AttrKind OP::GetMemAccessAttr(OpCode opCode) { } bool OP::IsOverloadLegal(OpCode opCode, Type *pType) { - if (!pType) + if (static_cast(opCode) >= + static_cast(OpCode::NumOpCodes)) return false; - if (opCode == OpCode::NumOpCodes) + if (!pType) return false; - unsigned TypeSlot = GetTypeSlot(pType); - return TypeSlot != UINT_MAX && - m_OpCodeProps[(unsigned)opCode].bAllowOverload[TypeSlot]; + auto &OpProps = m_OpCodeProps[static_cast(opCode)]; + + if (OpProps.NumOverloadDims == 0) + return pType->isVoidTy(); + + // Normalize 1+ overload dimensions into array. + Type *Types[DXIL::kDxilMaxOloadDims] = {pType}; + if (OpProps.NumOverloadDims > 1) { + StructType *ST = dyn_cast(pType); + // Make sure multi-overload is well-formed. + if (!ST || ST->hasName() || ST->getNumElements() != OpProps.NumOverloadDims) + return false; + for (unsigned I = 0; I < ST->getNumElements(); ++I) + Types[I] = ST->getElementType(I); + } + + for (unsigned I = 0; I < OpProps.NumOverloadDims; ++I) { + Type *Ty = Types[I]; + unsigned TypeSlot = GetTypeSlot(Ty); + if (!OpProps.AllowedOverloads[I][TypeSlot]) + return false; + if (TypeSlot == TS_Vector) { + unsigned EltTypeSlot = + GetTypeSlot(cast(Ty)->getElementType()); + if (!OpProps.AllowedVectorElements[I][EltTypeSlot]) + return false; + } + } + + return true; } bool OP::CheckOpCodeTable() { @@ -3173,41 +2843,6 @@ bool OP::IsDxilOpFunc(const llvm::Function *F) { return IsDxilOpFuncName(F->getName()); } -bool OP::IsDxilOpTypeName(StringRef name) { - return name.startswith(m_TypePrefix) || name.startswith(m_MatrixTypePrefix); -} - -bool OP::IsDxilOpType(llvm::StructType *ST) { - if (!ST->hasName()) - return false; - StringRef Name = ST->getName(); - return IsDxilOpTypeName(Name); -} - -bool OP::IsDupDxilOpType(llvm::StructType *ST) { - if (!ST->hasName()) - return false; - StringRef Name = ST->getName(); - if (!IsDxilOpTypeName(Name)) - return false; - size_t DotPos = Name.rfind('.'); - if (DotPos == 0 || DotPos == StringRef::npos || Name.back() == '.' || - !isdigit(static_cast(Name[DotPos + 1]))) - return false; - return true; -} - -StructType *OP::GetOriginalDxilOpType(llvm::StructType *ST, llvm::Module &M) { - DXASSERT(IsDupDxilOpType(ST), "else should not call GetOriginalDxilOpType"); - StringRef Name = ST->getName(); - size_t DotPos = Name.rfind('.'); - StructType *OriginalST = M.getTypeByName(Name.substr(0, DotPos)); - DXASSERT(OriginalST, "else name collison without original type"); - DXASSERT(ST->isLayoutIdentical(OriginalST), - "else invalid layout for dxil types"); - return OriginalST; -} - bool OP::IsDxilOpFuncCallInst(const llvm::Instruction *I) { const CallInst *CI = dyn_cast(I); if (CI == nullptr) @@ -3297,6 +2932,12 @@ bool OP::IsDxilOpBarrier(OpCode C) { // OPCODE-BARRIER:END } +bool OP::IsDxilOpExtendedOverload(OpCode C) { + if (C >= OpCode::NumOpCodes) + return false; + return m_OpCodeProps[static_cast(C)].NumOverloadDims > 1; +} + static unsigned MaskMemoryTypeFlagsIfAllowed(unsigned memoryTypeFlags, unsigned allowedMask) { // If the memory type is AllMemory, masking inapplicable flags is allowed. @@ -3945,13 +3586,12 @@ void OP::FixOverloadNames() { if (F.isDeclaration() && OP::IsDxilOpFunc(&F) && !F.user_empty()) { CallInst *CI = cast(*F.user_begin()); DXIL::OpCode opCode = OP::GetDxilOpFuncCallInst(CI); + if (!MayHaveNonCanonicalOverload(opCode)) + continue; llvm::Type *Ty = OP::GetOverloadType(opCode, &F); if (!OP::IsOverloadLegal(opCode, Ty)) continue; - if (!isa(Ty) && !isa(Ty)) - continue; - - std::string funcName; + SmallVector funcName; if (OP::ConstructOverloadName(Ty, opCode, funcName) .compare(F.getName()) != 0) F.setName(funcName); @@ -3964,11 +3604,54 @@ void OP::UpdateCache(OpCodeClass opClass, Type *Ty, llvm::Function *F) { m_FunctionToOpClass[F] = opClass; } +bool OP::MayHaveNonCanonicalOverload(OpCode OC) { + if (OC >= OpCode::NumOpCodes) + return false; + const unsigned CheckMask = (1 << TS_UDT) | (1 << TS_Object); + auto &OpProps = m_OpCodeProps[static_cast(OC)]; + for (unsigned I = 0; I < OpProps.NumOverloadDims; ++I) + if ((CheckMask & OpProps.AllowedOverloads[I].SlotMask) != 0) + return true; + return false; +} + +Function *OP::GetOpFunc(OpCode OC, ArrayRef OverloadTypes) { + if (OC >= OpCode::NumOpCodes) + return nullptr; + if (OverloadTypes.size() != + m_OpCodeProps[static_cast(OC)].NumOverloadDims) { + llvm_unreachable("incorrect overload dimensions"); + return nullptr; + } + if (OverloadTypes.size() == 0) { + return GetOpFunc(OC, Type::getVoidTy(m_Ctx)); + } else if (OverloadTypes.size() == 1) { + return GetOpFunc(OC, OverloadTypes[0]); + } + return GetOpFunc(OC, GetExtendedOverloadType(OverloadTypes)); +} + Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { - if (opCode == OpCode::NumOpCodes) + if (opCode >= OpCode::NumOpCodes) return nullptr; if (!pOverloadType) return nullptr; + + auto &OpProps = m_OpCodeProps[static_cast(opCode)]; + if (IsDxilOpExtendedOverload(opCode)) { + // Make sure pOverloadType is well formed for an extended overload. + StructType *ST = dyn_cast(pOverloadType); + DXASSERT(ST != nullptr, + "otherwise, extended overload type is not a struct"); + if (ST == nullptr) + return nullptr; + bool EltCountValid = ST->getNumElements() == OpProps.NumOverloadDims; + DXASSERT(EltCountValid, + "otherwise, incorrect type count for extended overload."); + if (!EltCountValid) + return nullptr; + } + // Illegal overloads are generated and eliminated by DXIL op constant // evaluation for a number of cases where a double overload of an HL intrinsic // that otherwise does not support double is used for literal values, when @@ -3976,7 +3659,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { // Illegal overloads of DXIL intrinsics may survive through to final DXIL, // but these will be caught by the validator, and this is not a regression. - OpCodeClass opClass = m_OpCodeProps[(unsigned)opCode].opCodeClass; + OpCodeClass opClass = OpProps.opCodeClass; Function *&F = m_OpCodeClassCache[(unsigned)opClass].pOverloads[pOverloadType]; if (F != nullptr) { @@ -3984,7 +3667,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { return F; } - vector ArgTypes; // RetType is ArgTypes[0] + SmallVector ArgTypes; // RetType is ArgTypes[0] Type *pETy = pOverloadType; Type *pRes = GetHandleType(); Type *pNodeHandle = GetNodeHandleType(); @@ -4020,7 +3703,10 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { #define A(_x) ArgTypes.emplace_back(_x) #define RRT(_y) A(GetResRetType(_y)) #define CBRT(_y) A(GetCBufferRetType(_y)) -#define VEC4(_y) A(GetVectorType(4, _y)) +#define VEC4(_y) A(GetStructVectorType(4, _y)) + +// Extended Overload types are wrapped in an anonymous struct +#define EXT(_y) A(cast(pOverloadType)->getElementType(_y)) /* hctdb_instrhelp.get_oloads_funcs()*/ switch (opCode) { // return opCode @@ -6066,14 +5752,15 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { pFT = FunctionType::get( ArgTypes[0], ArrayRef(&ArgTypes[1], ArgTypes.size() - 1), false); - std::string funcName; - ConstructOverloadName(pOverloadType, opCode, funcName); + SmallVector FuncStorage; + StringRef FuncName = + ConstructOverloadName(pOverloadType, opCode, FuncStorage); // Try to find existing function with the same name in the module. // This needs to happen after the switch statement that constructs arguments // and return values to ensure that ResRetType is constructed in the // RefreshCache case. - if (Function *existF = m_pModule->getFunction(funcName)) { + if (Function *existF = m_pModule->getFunction(FuncName)) { if (existF->getFunctionType() != pFT) return nullptr; F = existF; @@ -6081,13 +5768,13 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { return F; } - F = cast(m_pModule->getOrInsertFunction(funcName, pFT)); + F = cast(m_pModule->getOrInsertFunction(FuncName, pFT)); UpdateCache(opClass, pOverloadType, F); F->setCallingConv(CallingConv::C); F->addFnAttr(Attribute::NoUnwind); - if (m_OpCodeProps[(unsigned)opCode].FuncAttr != Attribute::None) - F->addFnAttr(m_OpCodeProps[(unsigned)opCode].FuncAttr); + if (OpProps.FuncAttr != Attribute::None) + F->addFnAttr(OpProps.FuncAttr); return F; } @@ -6494,62 +6181,91 @@ Type *OP::GetFourI32Type() const { return m_pFourI32Type; } Type *OP::GetFourI16Type() const { return m_pFourI16Type; } bool OP::IsResRetType(llvm::Type *Ty) { + if (!Ty->isStructTy()) + return false; for (Type *ResTy : m_pResRetType) { if (Ty == ResTy) return true; } - return false; + // Check for vector overload which isn't cached in m_pResRetType. + StructType *ST = cast(Ty); + if (!ST->hasName() || ST->getNumElements() < 2 || + !ST->getElementType(0)->isVectorTy()) + return false; + return Ty == GetResRetType(ST->getElementType(0)); } Type *OP::GetResRetType(Type *pOverloadType) { unsigned TypeSlot = GetTypeSlot(pOverloadType); - if (m_pResRetType[TypeSlot] == nullptr) { - string TypeName("dx.types.ResRet."); - TypeName += GetOverloadTypeName(TypeSlot); - Type *FieldTypes[5] = {pOverloadType, pOverloadType, pOverloadType, - pOverloadType, Type::getInt32Ty(m_Ctx)}; - m_pResRetType[TypeSlot] = - GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + if (TypeSlot < TS_BasicCount) { + if (m_pResRetType[TypeSlot] == nullptr) { + SmallVector Storage; + StringRef TypeName = + (Twine("dx.types.ResRet.") + Twine(GetOverloadTypeName(TypeSlot))) + .toStringRef(Storage); + Type *FieldTypes[5] = {pOverloadType, pOverloadType, pOverloadType, + pOverloadType, Type::getInt32Ty(m_Ctx)}; + m_pResRetType[TypeSlot] = + GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + } + return m_pResRetType[TypeSlot]; + } else if (TypeSlot == TS_Vector) { + SmallVector Storage; + VectorType *VecTy = cast(pOverloadType); + StringRef TypeName = + (Twine("dx.types.ResRet.v") + Twine(VecTy->getNumElements()) + + Twine(GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType())))) + .toStringRef(Storage); + Type *FieldTypes[2] = {pOverloadType, Type::getInt32Ty(m_Ctx)}; + return GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); } - return m_pResRetType[TypeSlot]; + llvm_unreachable("Invalid overload for GetResRetType"); + return nullptr; } Type *OP::GetCBufferRetType(Type *pOverloadType) { unsigned TypeSlot = GetTypeSlot(pOverloadType); + if (TypeSlot >= TS_BasicCount) { + llvm_unreachable("Invalid overload for GetResRetType"); + return nullptr; + } + if (m_pCBufferRetType[TypeSlot] == nullptr) { DXASSERT(m_LowPrecisionMode != DXIL::LowPrecisionMode::Undefined, "m_LowPrecisionMode must be set before constructing type."); - string TypeName("dx.types.CBufRet."); - TypeName += GetOverloadTypeName(TypeSlot); + SmallVector Storage; + raw_svector_ostream OS(Storage); + OS << "dx.types.CBufRet."; + OS << GetOverloadTypeName(TypeSlot); Type *i64Ty = Type::getInt64Ty(pOverloadType->getContext()); Type *i16Ty = Type::getInt16Ty(pOverloadType->getContext()); if (pOverloadType->isDoubleTy() || pOverloadType == i64Ty) { Type *FieldTypes[2] = {pOverloadType, pOverloadType}; m_pCBufferRetType[TypeSlot] = - GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + GetOrCreateStructType(m_Ctx, FieldTypes, OS.str(), m_pModule); } else if (!UseMinPrecision() && (pOverloadType->isHalfTy() || pOverloadType == i16Ty)) { - TypeName += ".8"; // dx.types.CBufRet.fp16.8 for buffer of 8 halves + OS << ".8"; // dx.types.CBufRet.f16.8 for buffer of 8 halves Type *FieldTypes[8] = { pOverloadType, pOverloadType, pOverloadType, pOverloadType, pOverloadType, pOverloadType, pOverloadType, pOverloadType, }; m_pCBufferRetType[TypeSlot] = - GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + GetOrCreateStructType(m_Ctx, FieldTypes, OS.str(), m_pModule); } else { Type *FieldTypes[4] = {pOverloadType, pOverloadType, pOverloadType, pOverloadType}; m_pCBufferRetType[TypeSlot] = - GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + GetOrCreateStructType(m_Ctx, FieldTypes, OS.str(), m_pModule); } } return m_pCBufferRetType[TypeSlot]; } -Type *OP::GetVectorType(unsigned numElements, Type *pOverloadType) { +Type *OP::GetStructVectorType(unsigned numElements, Type *pOverloadType) { if (numElements == 4) { if (pOverloadType == Type::getInt32Ty(pOverloadType->getContext())) { return m_pFourI32Type; @@ -6561,6 +6277,10 @@ Type *OP::GetVectorType(unsigned numElements, Type *pOverloadType) { return nullptr; } +StructType *OP::GetExtendedOverloadType(ArrayRef OverloadTypes) { + return StructType::get(m_Ctx, OverloadTypes); +} + //------------------------------------------------------------------------------ // // LLVM utility methods. diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 4622256dfe..cac074adc3 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -2037,7 +2037,7 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) { ValCtx.EmitInstrError(CI, ValidationRule::InstrOload); continue; } - dxilFunc = hlslOP->GetOpFunc(dxilOpcode, Ty->getScalarType()); + dxilFunc = hlslOP->GetOpFunc(dxilOpcode, Ty); } if (!dxilFunc) { @@ -2109,17 +2109,20 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) { return true; unsigned EltNum = ST->getNumElements(); + Type *EltTy = ST->getElementType(0); switch (EltNum) { case 2: + // Check if it's a native vector resret. + if (EltTy->isVectorTy()) + return ST == hlslOP->GetResRetType(EltTy); + LLVM_FALLTHROUGH; case 4: - case 8: { // 2 for doubles, 8 for halfs. - Type *EltTy = ST->getElementType(0); + case 8: // 2 for doubles, 8 for halfs. return ST == hlslOP->GetCBufferRetType(EltTy); - } break; - case 5: { - Type *EltTy = ST->getElementType(0); + break; + case 5: return ST == hlslOP->GetResRetType(EltTy); - } break; + break; default: return false; } diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index e32ab1915a..05bc7d472d 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -37,6 +37,30 @@ "array_local_ldst", ] +# These are the valid overload type characters for DXIL instructions. +# - "v" is for void, and can only be used alone. +# - "u" is for user defined type (UDT), and is mutually exclusive with the other +# types. +# - "o" is for an HLSL object type (e.g. Texture, Sampler, etc.), and is +# mutually exclusive with the other types. +# - "<" is for vector overloads, and may be followed by a set of supported +# component types. +# - If "<" is not followed by any component types, any preceding scalar types +# are used. +# - Vector component types are captured into a separate list during +# processing. +# - "," is used to separate multiple overload dimensions. +# - When used, only $x0, $x1, etc. are supported for overloaded parameter +# types. +# dxil_all_user_oload_chars must be kept in sync with the indices in +# hlsl::OP::TypeSlot in DxilOperations.h. +dxil_all_user_oload_chars = "hfd18wiluo<" +dxil_scalar_oload_chars = "hfd18wil" + +# Maximum number of overload dimensions supported through the extended overload +# in DXIL instructions. +dxil_max_overload_dims = 2 + class db_dxil_enum_value(object): "A representation for a value in an enumeration type" @@ -81,6 +105,7 @@ def __init__(self, name, **kwargs): self.ops = [] # the operands that this instruction takes self.is_allowed = True # whether this instruction is allowed in a DXIL program self.oload_types = "" # overload types if applicable + # Always call process_oload_types() after setting oload_types. self.fn_attr = "" # attribute shorthands: rn=does not access memory,ro=only reads from memory, self.is_deriv = False # whether this is some kind of derivative self.is_gradient = False # whether this requires a gradient calculation @@ -98,6 +123,9 @@ def __init__(self, name, **kwargs): self.is_reserved = self.dxil_class == "Reserved" self.shader_model_translated = () # minimum shader model required with translation by linker self.props = {} # extra properties + self.num_oloads = 0 # number of overloads for this instruction + if self.is_dxil_op: + self.process_oload_types() def __str__(self): return self.name @@ -105,6 +133,127 @@ def __str__(self): def fully_qualified_name(self): return "{}::{}".format(self.fully_qualified_name_prefix, self.name) + def process_oload_types(self): + if type(self.oload_types) is not str: + raise ValueError( + f"overload for '{self.name}' should be a string - use empty if n/a" + ) + # Early out for LLVM instructions + if not self.is_dxil_op: + return + + self.num_oloads = 0 + + # Early out for void overloads. + if self.oload_types == "v": + return + + if self.oload_types == "": + raise ValueError( + f"overload for '{self.name}' should not be empty - use void if n/a" + ) + if "v" in self.oload_types: + raise ValueError( + f"void overload should be exclusive to other types for '({self.name})'" + ) + + # Process oload_types for extended and vector overloads. + # Contrived example: "hf<, dxil_max_overload_dims: + raise ValueError( + "Too many overload dimensions for DXIL op " + f"{self.name}: '{self.oload_types}'" + ) + + def check_duplicate_overloads(oloads): + if len(oloads) != len(set(oloads)): + raise ValueError( + "Duplicate overload types specified for DXIL op " + f"{self.name}: '{oloads}' in '{self.oload_types}'" + ) + + def check_overload_chars(oloads, valid_chars): + invalid_chars = set(oloads).difference(set(valid_chars)) + if invalid_chars: + raise ValueError( + "Invalid overload type character(s) used for DXIL op " + f"{self.name}: '{invalid_chars}' in '{oloads}' from " + f"'{self.oload_types}'" + ) + + for n, oloads in enumerate(oload_types): + if len(oloads) == 0: + raise ValueError( + f"Invalid empty overload type for DXIL op " + f"{self.name}: '{self.oload_types}'" + ) + check_overload_chars(oloads, dxil_all_user_oload_chars) + + # split at vector for component overloads, if vector specified + # without following components, use the scalar overloads that + # precede the vector character. + split = oloads.split("<") + if len(split) == 1: + # No vector overload. + continue + elif len(split) != 2: + raise ValueError( + f"Invalid vector overload for DXIL op {self.name}: " + f"{oloads} in '{self.oload_types}'" + ) + + # Split into scalar and vector component overloads. + scalars, vector_oloads = split + check_duplicate_overloads(scalars) + if not vector_oloads: + vector_oloads = scalars + else: + check_duplicate_overloads(vector_oloads) + if not vector_oloads: + raise ValueError( + "No scalar overload types provided with vector overload " + f"for DXIL op {self.name}: '{self.oload_types}'" + ) + check_overload_chars(vector_oloads, dxil_scalar_oload_chars) + oload_types[n] = scalars + "<" + vector_oloads + # Reconstruct overload string with default vector overloads. + self.oload_types = ",".join(oload_types) + self.check_extended_oload_ops() + + def check_extended_oload_ops(self): + "Ensure ops has sequential extended overload references with $x0, $x1, etc." + if self.num_oloads < 2: + return + next_oload_idx = 0 + for i in self.ops: + if i.llvm_type.startswith("$x"): + if i.llvm_type != "$x" + str(next_oload_idx): + raise ValueError( + "Extended overloads are not sequentially referenced in " + f"DXIL op {self.name}: {i.llvm_type} != $x{next_oload_idx}" + ) + next_oload_idx += 1 + if next_oload_idx != self.num_oloads: + raise ValueError( + "Extended overloads are not referenced for all overload " + f"dimensions in DXIL op {self.name}: {next_oload_idx} != " + f"{self.num_oloads}" + ) + class db_dxil_metadata(object): "A representation for a metadata record" @@ -477,9 +626,7 @@ def populate_categories_and_models(self): "closesthit", ) for i in "GeometryIndex".split(","): - self.name_idx[ - i - ].category = ( + self.name_idx[i].category = ( "Raytracing object space uint System Values, raytracing tier 1.1" ) self.name_idx[i].shader_model = 6, 5 @@ -574,9 +721,7 @@ def populate_categories_and_models(self): self.name_idx[i].shader_model = 6, 3 self.name_idx[i].shader_stages = ("library", "intersection") for i in "CreateHandleForLib".split(","): - self.name_idx[ - i - ].category = ( + self.name_idx[i].category = ( "Library create handle from resource struct (like HL intrinsic)" ) self.name_idx[i].shader_model = 6, 3 @@ -5652,18 +5797,6 @@ def UFI(name, **mappings): ) for i in self.instr: self.verify_dense(i.ops, lambda x: x.pos, lambda x: i.name) - for i in self.instr: - if i.is_dxil_op: - assert i.oload_types != "", ( - "overload for DXIL operation %s should not be empty - use void if n/a" - % (i.name) - ) - assert i.oload_types == "v" or i.oload_types.find("v") < 0, ( - "void overload should be exclusive to other types (%s)" % i.name - ) - assert ( - type(i.oload_types) is str - ), "overload for %s should be a string - use empty if n/a" % (i.name) # Verify that all operations in each class have the same signature. import itertools @@ -8391,6 +8524,7 @@ def __init__( self.template_id_idx = template_id_idx # Template ID numeric value self.component_id_idx = component_id_idx # Component ID numeric value + class db_hlsl(object): "A database of HLSL language data" diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py index 4580e6c12c..f0d8b0ebae 100644 --- a/utils/hct/hctdb_instrhelp.py +++ b/utils/hct/hctdb_instrhelp.py @@ -40,8 +40,10 @@ def get_hlsl_opcode_data(): g_hlsl_opcode_data = {} return g_hlsl_opcode_data + g_db_hlsl = None + def get_db_hlsl(): global g_db_hlsl if g_db_hlsl is None: @@ -51,6 +53,10 @@ def get_db_hlsl(): return g_db_hlsl +def get_max_oload_dims(): + return f"const unsigned kDxilMaxOloadDims = {dxil_max_overload_dims};" + + def format_comment(prefix, val): "Formats a value with a line-comment prefix." result = "" @@ -507,26 +513,15 @@ def print_opfunc_props(self): OP=self.OP ) ) - print( - "// OpCode OpCode name, OpCodeClass OpCodeClass name, void, h, f, d, i1, i8, i16, i32, i64, udt, obj, function attribute" - ) - # Example formatted string: - # { OC::TempRegLoad, "TempRegLoad", OCC::TempRegLoad, "tempRegLoad", false, true, true, false, true, false, true, true, false, Attribute::ReadOnly, }, - # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 - # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 last_category = None - # overload types are a string of (v)oid, (h)alf, (f)loat, (d)ouble, (1)-bit, (8)-bit, (w)ord, (i)nt, (l)ong, u(dt) - f = lambda i, c: "true" if i.oload_types.find(c) >= 0 else "false" lower_exceptions = { "CBufferLoad": "cbufferLoad", "CBufferLoadLegacy": "cbufferLoadLegacy", "GSInstanceID": "gsInstanceID", } - lower_fn = ( - lambda t: lower_exceptions[t] - if t in lower_exceptions - else t[:1].lower() + t[1:] + lower_fn = lambda t: ( + lower_exceptions[t] if t in lower_exceptions else t[:1].lower() + t[1:] ) attr_dict = { "": "None", @@ -537,35 +532,47 @@ def print_opfunc_props(self): "nr": "NoReturn", "wv": "None", } - attr_fn = lambda i: "Attribute::" + attr_dict[i.fn_attr] + "," + attr_fn = lambda i: "Attribute::" + attr_dict[i.fn_attr] + oload_to_mask = lambda oload: sum( + [1 << dxil_all_user_oload_chars.find(c) for c in oload] + ) + oloads_fn = lambda oloads: ( + "{" + ",".join(["{0x%x}" % m for m in oloads]) + "}" + ) for i in self.instrs: if last_category != i.category: if last_category != None: print("") - print( - " // {category:118} void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute".format( - category=i.category - ) - ) + if not i.is_reserved: + print(f" // {i.category}") last_category = i.category + scalar_masks = [] + vector_masks = [] + if i.num_oloads > 0: + for n, o in enumerate(i.oload_types.split(",")): + if "<" in o: + v = o.split("<") + scalar_masks.append(oload_to_mask(v[0] + "<")) + vector_masks.append(oload_to_mask(v[1])) + else: + scalar_masks.append(oload_to_mask(o)) + vector_masks.append(0) print( - " {{ {OC}::{name:24} {quotName:27} {OCC}::{className:25} {classNameQuot:28} {{{v:>6},{h:>6},{f:>6},{d:>6},{b:>6},{e:>6},{w:>6},{i:>6},{l:>6},{u:>6},{o:>6}}}, {attr:20} }},".format( + ( + " {{ {OC}::{name:24} {quotName:27} {OCC}::{className:25} " + + "{classNameQuot:28} {attr:20}, {num_oloads}, " + + "{scalar_masks:16}, {vector_masks:16} }}, " + + "// Overloads: {oloads}" + ).format( name=i.name + ",", quotName='"' + i.name + '",', className=i.dxil_class + ",", classNameQuot='"' + lower_fn(i.dxil_class) + '",', - v=f(i, "v"), - h=f(i, "h"), - f=f(i, "f"), - d=f(i, "d"), - b=f(i, "1"), - e=f(i, "8"), - w=f(i, "w"), - i=f(i, "i"), - l=f(i, "l"), - u=f(i, "u"), - o=f(i, "o"), attr=attr_fn(i), + num_oloads=i.num_oloads, + scalar_masks=oloads_fn(scalar_masks), + vector_masks=oloads_fn(vector_masks), + oloads=i.oload_types, OC=self.OC, OCC=self.OCC, ) @@ -621,6 +628,9 @@ def print_opfunc_table(self): "nodeproperty": "A(nodeProperty);", "noderecordproperty": "A(nodeRecordProperty);", "hit_object": "A(pHit);", + # Extended overload slots, extend as needed: + "$x0": "EXT(0);", + "$x1": "EXT(1);", } last_category = None for i in self.instrs: @@ -651,14 +661,24 @@ def print_opfunc_oload_type(self): obj_ty = "obj" vec_ty = "$vec" gsptr_ty = "$gsptr" + extended_ty = "$x" last_category = None index_dict = collections.OrderedDict() ptr_index_dict = collections.OrderedDict() single_dict = collections.OrderedDict() + # extended_dict collects overloads with multiple overload types + # grouped by the set of overload parameter indices. + extended_dict = collections.OrderedDict() struct_list = [] + extended_list = [] for instr in self.instrs: + if instr.num_oloads > 1: + # Process extended overloads separately. + extended_list.append(instr) + continue + ret_ty = instr.ops[0].llvm_type # Skip case return type is overload type if ret_ty == elt_ty: @@ -730,8 +750,7 @@ def print_opfunc_oload_type(self): "i": "IntegerType::get(Ctx, 32)", "l": "IntegerType::get(Ctx, 64)", "v": "Type::getVoidTy(Ctx)", - "u": "Type::getInt32PtrTy(Ctx)", - "o": "Type::getInt32PtrTy(Ctx)", + # No other types should be referenced here. } assert ty in type_code_texts, "llvm type %s is unknown" % (ty) ty_code = type_code_texts[ty] @@ -791,6 +810,61 @@ def print_opfunc_oload_type(self): line = line + "}" print(line) + for instr in extended_list: + # Collect indices for overloaded return and types, make a tuple of + # indices the key, and add the opcode to a list of opcodes for that + # key. Indices start with 0 for return type, and 1 for the first + # function parameter, which is the DXIL OpCode. + indices = [] + for index, op in enumerate(instr.ops): + # Skip dxil opcode. + if op.pos == 1: + continue + + op_type = op.llvm_type + if op_type.startswith(extended_ty): + try: + extended_index = int(op_type[2:]) + except: + raise ValueError( + "Error parsing extended operand type " + + f"'{op_type}' for DXIL op '{instr.name}'" + ) + if extended_index != len(indices): + raise ValueError( + f"'$x{extended_index}' is not in sequential " + + f"order for DXIL op '{instr.name}'" + ) + indices.append(op.pos) + + if len(indices) != instr.num_oloads: + raise ValueError( + f"DXIL op {instr.name}: extended overload count " + + "mismatches the number of overload types" + ) + extended_dict.setdefault(tuple(indices), []).append(instr.name) + + def get_type_at_index(index): + if index == 0: + return "FT->getReturnType()" + return f"FT->getParamType({index - 1})" + + for index_tuple, opcodes in extended_dict.items(): + line = "" + for opcode in opcodes: + line = line + f"case OpCode::{opcode}:\n" + if index_tuple[-1] > 0: + line += ( + f" if (FT->getNumParams() < {index_tuple[-1]})\n" + + " return nullptr;\n" + ) + line += ( + " return llvm::StructType::get(Ctx, {" + + ", ".join([get_type_at_index(index) for index in index_tuple]) + + "});\n" + ) + print(line) + class db_valfns_gen: "A generator of validation functions." @@ -1599,6 +1673,7 @@ def get_highest_released_shader_model(): ) return result + def get_highest_shader_model(): result = """static const unsigned kHighestMajor = %d; static const unsigned kHighestMinor = %d;""" % ( @@ -1607,6 +1682,7 @@ def get_highest_shader_model(): ) return result + def get_dxil_version_minor(): return "const unsigned kDxilMinor = %d;" % highest_minor From a13938dd6bcd08b12ef086c834c35859f050ff3f Mon Sep 17 00:00:00 2001 From: Jeff Noyle Date: Tue, 1 Apr 2025 12:55:48 -0700 Subject: [PATCH 59/88] PIX: Check for existing PIX UAV in roots sigs before adding it again (#7238) The DXR invocation counting pass calls a function to add an output UAV twice. As part of adding the UAV, any DXIL-defined rootsigs will be extended to include this new UAV. If the UAV already exists in the rootsig, we should not add it again. (Doing so results in root sig that will fail validation.) Note: the test is not a file-check style because dxil-defined subobjects don't get rehydrated into the DxilModule when the output of dxc.exe is piped into the input of opt.exe, meaning that the broken case can't be exercised. --- lib/DxilPIXPasses/PixPassHelpers.cpp | 12 +++++++ tools/clang/unittests/HLSL/PixTest.cpp | 40 ++++++++++++++++++++- tools/clang/unittests/HLSL/PixTestUtils.cpp | 2 +- 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/lib/DxilPIXPasses/PixPassHelpers.cpp b/lib/DxilPIXPasses/PixPassHelpers.cpp index dfb4b3aa83..69385ae048 100644 --- a/lib/DxilPIXPasses/PixPassHelpers.cpp +++ b/lib/DxilPIXPasses/PixPassHelpers.cpp @@ -199,6 +199,18 @@ constexpr uint32_t toolsUAVRegister = 0; template void ExtendRootSig(RootSigDesc &rootSigDesc) { auto *existingParams = rootSigDesc.pParameters; + for (uint32_t i = 0; i < rootSigDesc.NumParameters; ++i) { + if (rootSigDesc.pParameters[i].ParameterType == + DxilRootParameterType::UAV) { + if (rootSigDesc.pParameters[i].Descriptor.RegisterSpace == + toolsRegisterSpace && + rootSigDesc.pParameters[i].Descriptor.ShaderRegister == + toolsUAVRegister) { + // Already added + return; + } + } + } auto *newParams = new RootParameterDesc[rootSigDesc.NumParameters + 1]; if (existingParams != nullptr) { memcpy(newParams, existingParams, diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp index bb81c1c953..b97aa70c05 100644 --- a/tools/clang/unittests/HLSL/PixTest.cpp +++ b/tools/clang/unittests/HLSL/PixTest.cpp @@ -146,6 +146,7 @@ class PixTest : public ::testing::Test { TEST_METHOD(RootSignatureUpgrade_Annotation) TEST_METHOD(DxilPIXDXRInvocationsLog_SanityTest) + TEST_METHOD(DxilPIXDXRInvocationsLog_EmbeddedRootSigs) TEST_METHOD(DebugInstrumentation_TextOutput) TEST_METHOD(DebugInstrumentation_BlockReport) @@ -660,7 +661,7 @@ CComPtr PixTest::RunDxilPIXDXRInvocationsLog(IDxcBlob *blob) { CComPtr pOptimizedModule; CComPtr pText; VERIFY_SUCCEEDED(pOptimizer->RunOptimizer( - dxil, Options.data(), Options.size(), &pOptimizedModule, &pText)); + blob, Options.data(), Options.size(), &pOptimizedModule, &pText)); std::string outputText; if (pText->GetBufferSize() != 0) { @@ -2945,6 +2946,43 @@ void MyMiss(inout MyPayload payload) RunDxilPIXDXRInvocationsLog(compiledLib); } +TEST_F(PixTest, DxilPIXDXRInvocationsLog_EmbeddedRootSigs) { + + const char *source = R"x( + +GlobalRootSignature grs = {"CBV(b0)"}; +struct MyPayload +{ + float4 color; +}; + +[shader("raygeneration")] +void MyRayGen() +{ +} + +[shader("closesthit")] +void MyClosestHit(inout MyPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ +} + +[shader("anyhit")] +void MyAnyHit(inout MyPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ +} + +[shader("miss")] +void MyMiss(inout MyPayload payload) +{ +} + +)x"; + + auto compiledLib = Compile(m_dllSupport, source, L"lib_6_3", + {L"-Qstrip_reflect"}, L"RootSig"); + RunDxilPIXDXRInvocationsLog(compiledLib); +} + TEST_F(PixTest, DebugInstrumentation_TextOutput) { const char *source = R"x( diff --git a/tools/clang/unittests/HLSL/PixTestUtils.cpp b/tools/clang/unittests/HLSL/PixTestUtils.cpp index 91b6c4479c..61647ff5fa 100644 --- a/tools/clang/unittests/HLSL/PixTestUtils.cpp +++ b/tools/clang/unittests/HLSL/PixTestUtils.cpp @@ -397,7 +397,7 @@ CComPtr Compile(dxc::DxcDllSupport &dllSupport, const char *hlsl, CheckOperationSucceeded(pResult, &pProgram); CComPtr pLib; - VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcLibrary, &pLib)); + VERIFY_SUCCEEDED(dllSupport.CreateInstance(CLSID_DxcLibrary, &pLib)); const hlsl::DxilContainerHeader *pContainer = hlsl::IsDxilContainerLike( pProgram->GetBufferPointer(), pProgram->GetBufferSize()); VERIFY_IS_NOT_NULL(pContainer); From 2f357a9d625eaaa982ce1fac513e5f77a7d81900 Mon Sep 17 00:00:00 2001 From: Antonio Maiorano Date: Tue, 1 Apr 2025 21:38:26 -0400 Subject: [PATCH 60/88] Fix assert due to unreachable discard (#7289) When emitting discard in an unreachable code context (e.g. after an infinite loop), DXC would assert (if asserts enabled), or trigger a UBSAN failure because the discard instruction would have no parent. When an infinite loop is emitted during CodeGen, the InsertPt is cleared, thus subsequent discard instructions would be created, but no parent set. We skip emitting discard in this case, which follows the same pattern as is done for EmitIfStmt, and EmitSwitchStmt. --- tools/clang/lib/CodeGen/CGStmt.cpp | 4 ++++ .../FinishCodeGen/unreachable-discard.hlsl | 21 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl diff --git a/tools/clang/lib/CodeGen/CGStmt.cpp b/tools/clang/lib/CodeGen/CGStmt.cpp index 080d824022..340550dbdd 100644 --- a/tools/clang/lib/CodeGen/CGStmt.cpp +++ b/tools/clang/lib/CodeGen/CGStmt.cpp @@ -525,6 +525,10 @@ void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) { // HLSL Change Begins. void CodeGenFunction::EmitDiscardStmt(const DiscardStmt &S) { + // Skip unreachable discard. + if (!HaveInsertPoint()) + return; + CGM.getHLSLRuntime().EmitHLSLDiscard(*this); } // HLSL Change Ends. diff --git a/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl b/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl new file mode 100644 index 0000000000..77c0f51911 --- /dev/null +++ b/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl @@ -0,0 +1,21 @@ +// RUN: %dxc /T ps_6_5 -fcgl %s | FileCheck %s + +// Compiling this HLSL would trigger an assertion: +// While deleting: void (i32, float)* %dx.hl.op..void (i32, float) +// Use still stuck around after Def is destroyed: call void @"dx.hl.op..void (i32, float)"(i32 120, float -1.000000e+00), !dbg <0x503000001cc8> +// Error: assert(use_empty() && "Uses remain when a value is destroyed!") +// File: /src/external/DirectXShaderCompiler/lib/IR/Value.cpp(83) +// +// Bug was fixed in CodeGenFunction::EmitDiscardStmt by skipping the emission of +// an unreachable discard. + +// CHECK: define void @main() +// CHECK: br label % +// CHECK-NOT: call void @"dx.hl.op..void (i32, float)" +// CHECK: ret void + +void main() { + while (true) { + } + discard; +} From 572aef579dc90cb8de5df254ed3e7225c2c8a30e Mon Sep 17 00:00:00 2001 From: Chris B Date: Tue, 1 Apr 2025 22:18:50 -0500 Subject: [PATCH 61/88] Disable code owners in main (#7298) MS just changed policy to enforce code owners across the whole enterprise, which is _not_ what we want. So we need to disable this in main for the time being. --- .github/CODEOWNERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 01ad1577b7..6cbdeb6270 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1,2 @@ -* @microsoft/hlsl-release +# Uncomment the next line in release branches after ask-mode begins +# * @microsoft/hlsl-release From 9eb71198c9425ee77178e081e5188659ee2cf02c Mon Sep 17 00:00:00 2001 From: Dan Brown <61992655+danbrown-amd@users.noreply.github.com> Date: Wed, 2 Apr 2025 05:04:38 -0600 Subject: [PATCH 62/88] [SPIRV] Implements vk::BufferPointer proposal (#7163) Implements [vk::BufferPointer proposal](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0010-vk-buffer-ref.md). Closes #6489. --- include/dxc/HlslIntrinsicOp.h | 5 +- include/dxc/dxcapi.internal.h | 8 +- lib/HLSL/HLOperationLower.cpp | 9 ++ tools/clang/include/clang/AST/HlslTypes.h | 33 ++++- .../clang/include/clang/AST/OperationKinds.h | 5 + tools/clang/include/clang/Basic/Attr.td | 17 +++ .../clang/Basic/DiagnosticSemaKinds.td | 9 +- .../clang/include/clang/SPIRV/SpirvBuilder.h | 11 ++ .../clang/include/clang/SPIRV/SpirvContext.h | 12 ++ .../include/clang/SPIRV/SpirvInstruction.h | 52 +++++++ tools/clang/include/clang/SPIRV/SpirvType.h | 24 ++++ .../clang/include/clang/SPIRV/SpirvVisitor.h | 6 + tools/clang/lib/AST/ASTContextHLSL.cpp | 79 +++++++++++ tools/clang/lib/AST/Expr.cpp | 9 +- tools/clang/lib/AST/ExprConstant.cpp | 9 ++ tools/clang/lib/AST/HlslTypes.cpp | 47 +++++++ tools/clang/lib/Lex/PPMacroExpansion.cpp | 7 +- .../lib/SPIRV/AlignmentSizeCalculator.cpp | 25 ++-- tools/clang/lib/SPIRV/CapabilityVisitor.cpp | 9 +- tools/clang/lib/SPIRV/EmitVisitor.cpp | 44 +++++- tools/clang/lib/SPIRV/EmitVisitor.h | 25 ++-- tools/clang/lib/SPIRV/LowerTypeVisitor.cpp | 33 ++++- tools/clang/lib/SPIRV/LowerTypeVisitor.h | 4 + tools/clang/lib/SPIRV/SpirvBuilder.cpp | 37 +++++ tools/clang/lib/SPIRV/SpirvContext.cpp | 26 ++++ tools/clang/lib/SPIRV/SpirvEmitter.cpp | 133 +++++++++++++++++- tools/clang/lib/SPIRV/SpirvEmitter.h | 13 ++ tools/clang/lib/SPIRV/SpirvInstruction.cpp | 28 ++++ tools/clang/lib/Sema/SemaCast.cpp | 17 +++ tools/clang/lib/Sema/SemaExprCXX.cpp | 28 ++++ tools/clang/lib/Sema/SemaHLSL.cpp | 111 ++++++++++++++- .../vk.buffer-pointer.alias.cs.hlsl | 28 ++++ .../CodeGenSPIRV/vk.buffer-pointer.alias.hlsl | 72 ++++++++++ .../vk.buffer-pointer.atomic.hlsl | 39 +++++ .../vk.buffer-pointer.error1.hlsl | 19 +++ .../vk.buffer-pointer.error2.hlsl | 19 +++ .../vk.buffer-pointer.error3.hlsl | 19 +++ .../vk.buffer-pointer.error4.hlsl | 18 +++ .../vk.buffer-pointer.error5.hlsl | 26 ++++ .../vk.buffer-pointer.error6.hlsl | 23 +++ .../vk.buffer-pointer.linked-list.hlsl | 101 +++++++++++++ .../CodeGenSPIRV/vk.buffer-pointer.read.hlsl | 48 +++++++ .../CodeGenSPIRV/vk.buffer-pointer.write.hlsl | 52 +++++++ utils/hct/gen_intrin_main.txt | 10 +- utils/hct/hctdb.py | 12 +- utils/hct/hlsl_intrinsic_opcodes.json | 7 +- 46 files changed, 1326 insertions(+), 42 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index 90f3fafd79..68b88822e8 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -231,6 +231,9 @@ enum class IntrinsicOp { IOP_VkReadClock = 223, IOP_Vkext_execution_mode = 224, IOP_Vkext_execution_mode_id = 225, + IOP_Vkreinterpret_pointer_cast = 360, + IOP_Vkstatic_pointer_cast = 361, + MOP_GetBufferContents = 362, MOP_Append = 226, MOP_RestartStrip = 227, MOP_CalculateLevelOfDetail = 228, @@ -366,7 +369,7 @@ enum class IntrinsicOp { IOP_usign = 355, MOP_InterlockedUMax = 356, MOP_InterlockedUMin = 357, - Num_Intrinsics = 360, + Num_Intrinsics = 363, }; inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) { switch (opcode) { diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h index bf8a040673..f183bb6cf0 100644 --- a/include/dxc/dxcapi.internal.h +++ b/include/dxc/dxcapi.internal.h @@ -7,6 +7,9 @@ // // // Provides non-public declarations for the DirectX Compiler component. // // // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // +// All rights reserved. // +// // /////////////////////////////////////////////////////////////////////////////// #ifndef __DXC_API_INTERNAL__ @@ -35,6 +38,7 @@ typedef struct ID3D10Blob ID3D10Blob; static const BYTE INTRIN_TEMPLATE_FROM_TYPE = 0xff; static const BYTE INTRIN_TEMPLATE_VARARGS = 0xfe; static const BYTE INTRIN_TEMPLATE_FROM_FUNCTION = 0xfd; +static const BYTE INTRIN_TEMPLATE_FROM_FUNCTION_2 = 0xfc; // Use this enumeration to describe allowed templates (layouts) in intrinsics. enum LEGAL_INTRINSIC_TEMPLATES { @@ -128,7 +132,9 @@ enum LEGAL_INTRINSIC_COMPTYPES { LICOMPTYPE_HIT_OBJECT = 51, - LICOMPTYPE_COUNT = 52 + LICOMPTYPE_VK_BUFFER_POINTER = 52, + + LICOMPTYPE_COUNT = 53 }; static const BYTE IA_SPECIAL_BASE = 0xf0; diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 3ab1f9fdec..445dbcc879 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7,6 +7,9 @@ // // // Lower functions to lower HL operations to DXIL operations. // // // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // +// All rights reserved. // +// // /////////////////////////////////////////////////////////////////////////////// #define _USE_MATH_DEFINES @@ -6818,6 +6821,12 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread, DXIL::OpCode::NumOpCodes_Dxil_1_8}, // FIXME: Just a placeholder Dxil // opcode + {IntrinsicOp::IOP_Vkstatic_pointer_cast, UnsupportedVulkanIntrinsic, + DXIL::OpCode::NumOpCodes}, + {IntrinsicOp::IOP_Vkreinterpret_pointer_cast, UnsupportedVulkanIntrinsic, + DXIL::OpCode::NumOpCodes}, + {IntrinsicOp::MOP_GetBufferContents, UnsupportedVulkanIntrinsic, + DXIL::OpCode::NumOpCodes}, }; } // namespace static_assert( diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index 3b517576fe..ab29e4bde7 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -6,6 +6,9 @@ // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // +// All rights reserved. // +// // /// /// \file // /// \brief Defines the HLSL type system interface. // @@ -31,6 +34,7 @@ namespace clang { class ASTContext; class AttributeList; +class CXXConstructorDecl; class CXXMethodDecl; class CXXRecordDecl; class ClassTemplateDecl; @@ -402,6 +406,10 @@ DeclareNodeOrRecordType(clang::ASTContext &Ctx, DXIL::NodeIOKind Type, bool IsCompleteType = false); #ifdef ENABLE_SPIRV_CODEGEN +clang::CXXRecordDecl * +DeclareVkBufferPointerType(clang::ASTContext &context, + clang::DeclContext *declContext); + clang::CXXRecordDecl *DeclareInlineSpirvType(clang::ASTContext &context, clang::DeclContext *declContext, llvm::StringRef typeName, @@ -427,7 +435,7 @@ clang::VarDecl *DeclareBuiltinGlobal(llvm::StringRef name, clang::QualType Ty, /// method. AST context in which to /// work. Class in which the function template /// is declared. Function for which a -/// template is created. Declarations for templates to the /// function. Count of /// template declarations. A new function template declaration @@ -533,6 +541,29 @@ bool DoesTypeDefineOverloadedOperator(clang::QualType typeWithOperator, clang::QualType paramType); bool IsPatchConstantFunctionDecl(const clang::FunctionDecl *FD); +#ifdef ENABLE_SPIRV_CODEGEN +bool IsVKBufferPointerType(clang::QualType type); +clang::QualType GetVKBufferPointerBufferType(clang::QualType type); +unsigned GetVKBufferPointerAlignment(clang::QualType type); +#endif + +/// Adds a constructor declaration to the specified class +/// record. ASTContext that owns +/// declarations. Record declaration in which +/// to add constructor. Result type for +/// constructor. Types for constructor +/// parameters. Names for constructor +/// parameters. Name for +/// constructor. Whether the constructor is a +/// const function. The method declaration for the +/// constructor. +clang::CXXConstructorDecl *CreateConstructorDeclarationWithParams( + clang::ASTContext &context, clang::CXXRecordDecl *recordDecl, + clang::QualType resultType, llvm::ArrayRef paramTypes, + llvm::ArrayRef paramNames, + clang::DeclarationName declarationName, bool isConst, + bool isTemplateFunction = false); + /// Adds a function declaration to the specified class /// record. ASTContext that owns /// declarations. Record declaration in which diff --git a/tools/clang/include/clang/AST/OperationKinds.h b/tools/clang/include/clang/AST/OperationKinds.h index 75e665a5e9..3909c8b5e8 100644 --- a/tools/clang/include/clang/AST/OperationKinds.h +++ b/tools/clang/include/clang/AST/OperationKinds.h @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file enumerates the different kinds of operations that can be @@ -321,6 +324,8 @@ enum CastKind { CK_HLSLCC_FloatingToIntegral, CK_HLSLCC_FloatingToBoolean, CK_HLSLCC_FloatingCast, + CK_VK_BufferPointerToIntegral, + CK_VK_IntegralToBufferPointer, // HLSL Change - Made CK_Invalid an enum case because otherwise it is UB to // assign it to a value of CastKind. diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index 7a009aa7e1..9c117fb3ce 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// class DocumentationCategory { @@ -1447,6 +1450,20 @@ def VKStorageClassExt : InheritableAttr { let Documentation = [Undocumented]; } +def VKBufferPointer : InheritableAttr { + let Spellings = [CXX11<"", "hlsl_vk_buffer_pointer", 2021>]; + let LangOpts = [SPIRV]; + let Documentation = [Undocumented]; +} + +def VKAliasedPointer : InheritableAttr { + let Spellings = [CXX11<"vk", "aliased_pointer">]; + let Subjects = SubjectList<[Var, ParmVar], ErrorDiag>; + let Args = []; + let LangOpts = [SPIRV]; + let Documentation = [Undocumented]; +} + // Global variables that are of struct type def StructGlobalVar : SubsetSubjecthasGlobalStorage() && S->getType()->isStructureType()}]>; diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 6ae59cac14..4f4dc28a4c 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -7838,7 +7841,7 @@ def warn_hlsl_intrinsic_in_wrong_shader_model : Warning< "intrinsic %0 potentially used by '%1' requires shader model %2 or greater">, DefaultError, InGroup; def warn_hlsl_intrinsic_overload_in_wrong_shader_model : Warning< - "overload of intrinsic %0 requires shader model %1 or greater">, + "overload of intrinsic %0 requires shader model %1 or greater">, DefaultError, InGroup; def err_hlsl_intrinsic_template_arg_unsupported: Error< "Explicit template arguments on intrinsic %0 are not supported">; @@ -8004,6 +8007,10 @@ def err_hlsl_hitobject_unsupported_stage : Error< // SPIRV Change Starts def err_hlsl_vulkan_specific_feature: Error<"%0 is a Vulkan specific feature">; +def err_hlsl_vk_pointer_cast_alignment: Error< + "Vulkan buffer pointer cannot be cast to greater alignment">; +def err_hlsl_vk_static_pointer_cast_type: Error< + "vk::static_pointer_cast() content type must be base class of argument's content type">; // SPIRV Change Ends let CategoryName = "OpenMP Issue" in { diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h index f03735115b..ed2cb3b6fd 100644 --- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h +++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVBUILDER_H #define LLVM_CLANG_SPIRV_SPIRVBUILDER_H @@ -273,6 +276,14 @@ class SpirvBuilder { SpirvInstruction *sample, SourceLocation); + /// \brief Creates an OpConverPtrToU SPIR-V instruction with the given + /// parameters. + SpirvConvertPtrToU *createConvertPtrToU(SpirvInstruction *ptr, QualType type); + + /// \brief Creates an OpConverUToPtr SPIR-V instruction with the given + /// parameters. + SpirvConvertUToPtr *createConvertUToPtr(SpirvInstruction *val, QualType type); + /// \brief Creates SPIR-V instructions for sampling the given image. /// /// If compareVal is given a non-zero value, *Dref* variants of OpImageSample* diff --git a/tools/clang/include/clang/SPIRV/SpirvContext.h b/tools/clang/include/clang/SPIRV/SpirvContext.h index e65097bedb..c18c139642 100644 --- a/tools/clang/include/clang/SPIRV/SpirvContext.h +++ b/tools/clang/include/clang/SPIRV/SpirvContext.h @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVCONTEXT_H #define LLVM_CLANG_SPIRV_SPIRVCONTEXT_H @@ -317,6 +320,13 @@ class SpirvContext { const HybridPointerType *getPointerType(QualType pointee, spv::StorageClass); + const ForwardPointerType *getForwardPointerType(QualType pointee); + + const SpirvPointerType *getForwardReference(QualType type); + + void registerForwardReference(QualType type, + const SpirvPointerType *pointerType); + /// Generates (or reuses an existing) OpString for the given string literal. SpirvString *getSpirvString(llvm::StringRef str); @@ -478,6 +488,8 @@ class SpirvContext { llvm::SmallVector hybridStructTypes; llvm::DenseMap pointerTypes; llvm::SmallVector hybridPointerTypes; + llvm::MapVector forwardPointerTypes; + llvm::MapVector forwardReferences; llvm::DenseSet functionTypes; llvm::DenseMap spirvIntrinsicTypesById; llvm::SmallVector spirvIntrinsicTypes; diff --git a/tools/clang/include/clang/SPIRV/SpirvInstruction.h b/tools/clang/include/clang/SPIRV/SpirvInstruction.h index 7ec1375bde..7a7ad3aa4d 100644 --- a/tools/clang/include/clang/SPIRV/SpirvInstruction.h +++ b/tools/clang/include/clang/SPIRV/SpirvInstruction.h @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H #define LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H @@ -67,6 +71,10 @@ class SpirvInstruction { IK_ConstantComposite, IK_ConstantNull, + // Pointer <-> uint conversions. + IK_ConvertPtrToU, + IK_ConvertUToPtr, + // OpUndef IK_Undef, @@ -1306,6 +1314,50 @@ class SpirvConstantNull : public SpirvConstant { bool operator==(const SpirvConstantNull &that) const; }; +class SpirvConvertPtrToU : public SpirvInstruction { +public: + SpirvConvertPtrToU(SpirvInstruction *ptr, QualType type, + SourceLocation loc = {}, SourceRange range = {}); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvConvertPtrToU) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_ConvertPtrToU; + } + + bool operator==(const SpirvConvertPtrToU &that) const; + + bool invokeVisitor(Visitor *v) override; + + SpirvInstruction *getPtr() const { return ptr; } + +private: + SpirvInstruction *ptr; +}; + +class SpirvConvertUToPtr : public SpirvInstruction { +public: + SpirvConvertUToPtr(SpirvInstruction *intValue, QualType type, + SourceLocation loc = {}, SourceRange range = {}); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvConvertUToPtr) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_ConvertUToPtr; + } + + bool operator==(const SpirvConvertUToPtr &that) const; + + bool invokeVisitor(Visitor *v) override; + + SpirvInstruction *getVal() const { return val; } + +private: + SpirvInstruction *val; +}; + class SpirvUndef : public SpirvInstruction { public: SpirvUndef(QualType type); diff --git a/tools/clang/include/clang/SPIRV/SpirvType.h b/tools/clang/include/clang/SPIRV/SpirvType.h index 221f01e5ff..00a00ef238 100644 --- a/tools/clang/include/clang/SPIRV/SpirvType.h +++ b/tools/clang/include/clang/SPIRV/SpirvType.h @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVTYPE_H #define LLVM_CLANG_SPIRV_SPIRVTYPE_H @@ -53,6 +56,7 @@ class SpirvType { TK_RuntimeArray, TK_Struct, TK_Pointer, + TK_ForwardPointer, TK_Function, TK_AccelerationStructureNV, TK_RayQueryKHR, @@ -387,6 +391,26 @@ class SpirvPointerType : public SpirvType { spv::StorageClass storageClass; }; +/// Represents a SPIR-V forwarding pointer type. +class ForwardPointerType : public SpirvType { +public: + ForwardPointerType(QualType pointee) + : SpirvType(TK_ForwardPointer), pointeeType(pointee) {} + + static bool classof(const SpirvType *t) { + return t->getKind() == TK_ForwardPointer; + } + + const QualType getPointeeType() const { return pointeeType; } + + bool operator==(const ForwardPointerType &that) const { + return pointeeType == that.pointeeType; + } + +private: + const QualType pointeeType; +}; + /// Represents a SPIR-V function type. None of the parameters nor the return /// type is allowed to be a hybrid type. class FunctionType : public SpirvType { diff --git a/tools/clang/include/clang/SPIRV/SpirvVisitor.h b/tools/clang/include/clang/SPIRV/SpirvVisitor.h index 303a4600a1..93682518a1 100644 --- a/tools/clang/include/clang/SPIRV/SpirvVisitor.h +++ b/tools/clang/include/clang/SPIRV/SpirvVisitor.h @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVVISITOR_H #define LLVM_CLANG_SPIRV_SPIRVVISITOR_H @@ -89,6 +93,8 @@ class Visitor { DEFINE_VISIT_METHOD(SpirvConstantFloat) DEFINE_VISIT_METHOD(SpirvConstantComposite) DEFINE_VISIT_METHOD(SpirvConstantNull) + DEFINE_VISIT_METHOD(SpirvConvertPtrToU) + DEFINE_VISIT_METHOD(SpirvConvertUToPtr) DEFINE_VISIT_METHOD(SpirvUndef) DEFINE_VISIT_METHOD(SpirvCompositeConstruct) DEFINE_VISIT_METHOD(SpirvCompositeExtract) diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index 3748f8f8f8..c7a031a219 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -6,6 +6,9 @@ // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // +// All rights reserved. // +// // // This file implements the ASTContext interface for HLSL. // // // /////////////////////////////////////////////////////////////////////////////// @@ -1072,6 +1075,47 @@ static void CreateConstructorDeclaration( (*constructorDecl)->setAccess(AccessSpecifier::AS_public); } +CXXConstructorDecl *hlsl::CreateConstructorDeclarationWithParams( + ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType, + ArrayRef paramTypes, ArrayRef paramNames, + DeclarationName declarationName, bool isConst, bool isTemplateFunction) { + DXASSERT_NOMSG(recordDecl != nullptr); + DXASSERT_NOMSG(!resultType.isNull()); + DXASSERT_NOMSG(paramTypes.size() == paramNames.size()); + + TypeSourceInfo *tinfo; + CXXConstructorDecl *constructorDecl; + CreateConstructorDeclaration(context, recordDecl, resultType, paramTypes, + declarationName, isConst, &constructorDecl, + &tinfo); + + // Create and associate parameters to constructor. + SmallVector parmVarDecls; + if (!paramTypes.empty()) { + for (unsigned int i = 0; i < paramTypes.size(); ++i) { + IdentifierInfo *argIi = &context.Idents.get(paramNames[i]); + ParmVarDecl *parmVarDecl = ParmVarDecl::Create( + context, constructorDecl, NoLoc, NoLoc, argIi, paramTypes[i], + context.getTrivialTypeSourceInfo(paramTypes[i], NoLoc), + StorageClass::SC_None, nullptr); + parmVarDecl->setScopeInfo(0, i); + DXASSERT(parmVarDecl->getFunctionScopeIndex() == i, + "otherwise failed to set correct index"); + parmVarDecls.push_back(parmVarDecl); + } + constructorDecl->setParams(ArrayRef(parmVarDecls)); + AssociateParametersToFunctionPrototype(tinfo, &parmVarDecls.front(), + parmVarDecls.size()); + } + + // If this is going to be part of a template function decl, don't add it to + // the record because the template function decl will be added instead. + if (!isTemplateFunction) + recordDecl->addDecl(constructorDecl); + + return constructorDecl; +} + static void CreateObjectFunctionDeclaration( ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType, ArrayRef args, DeclarationName declarationName, bool isConst, @@ -1324,6 +1368,41 @@ CXXRecordDecl *hlsl::DeclareNodeOrRecordType( } #ifdef ENABLE_SPIRV_CODEGEN +CXXRecordDecl *hlsl::DeclareVkBufferPointerType(ASTContext &context, + DeclContext *declContext) { + BuiltinTypeDeclBuilder Builder(declContext, "BufferPointer", + TagDecl::TagKind::TTK_Struct); + TemplateTypeParmDecl *TyParamDecl = + Builder.addTypeTemplateParam("recordtype"); + Builder.addIntegerTemplateParam("alignment", context.UnsignedIntTy, 0); + + Builder.startDefinition(); + + QualType paramType = QualType(TyParamDecl->getTypeForDecl(), 0); + CXXRecordDecl *recordDecl = Builder.getRecordDecl(); + + CXXMethodDecl *methodDecl = CreateObjectFunctionDeclarationWithParams( + context, recordDecl, context.getLValueReferenceType(paramType), {}, {}, + DeclarationName(&context.Idents.get("Get")), true); + CanQualType canQualType = + recordDecl->getTypeForDecl()->getCanonicalTypeUnqualified(); + CreateConstructorDeclarationWithParams( + context, recordDecl, context.VoidTy, + {context.getRValueReferenceType(canQualType)}, {"bufferPointer"}, + context.DeclarationNames.getCXXConstructorName(canQualType), false); + CreateConstructorDeclarationWithParams( + context, recordDecl, context.VoidTy, {context.UnsignedIntTy}, {"address"}, + context.DeclarationNames.getCXXConstructorName(canQualType), false); + + StringRef OpcodeGroup = GetHLOpcodeGroupName(HLOpcodeGroup::HLIntrinsic); + unsigned Opcode = static_cast(IntrinsicOp::MOP_GetBufferContents); + methodDecl->addAttr( + HLSLIntrinsicAttr::CreateImplicit(context, OpcodeGroup, "", Opcode)); + methodDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context)); + + return Builder.completeDefinition(); +} + CXXRecordDecl *hlsl::DeclareInlineSpirvType(clang::ASTContext &context, clang::DeclContext *declContext, llvm::StringRef typeName, diff --git a/tools/clang/lib/AST/Expr.cpp b/tools/clang/lib/AST/Expr.cpp index 0e2ec8c6c2..c6dc21217e 100644 --- a/tools/clang/lib/AST/Expr.cpp +++ b/tools/clang/lib/AST/Expr.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements the Expr class and subclasses. @@ -1716,7 +1719,11 @@ const char *CastExpr::getCastKindName() const { return "HLSLCC_FloatingToBoolean"; case CK_HLSLCC_FloatingCast: return "HLSLCC_FloatingCast"; - // HLSL Change Ends + case CK_VK_BufferPointerToIntegral: + return "VK_BufferPointerToIntegral"; + case CK_VK_IntegralToBufferPointer: + return "VK_IntegralToBufferPointer"; + // HLSL Change Ends } llvm_unreachable("Unhandled cast kind!"); diff --git a/tools/clang/lib/AST/ExprConstant.cpp b/tools/clang/lib/AST/ExprConstant.cpp index 5e8d4700bd..69e0760bce 100644 --- a/tools/clang/lib/AST/ExprConstant.cpp +++ b/tools/clang/lib/AST/ExprConstant.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements the Expr constant evaluator. @@ -7829,6 +7832,12 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { return false; return Success(Value, E); } + + // HLSL Change Starts + case CK_VK_BufferPointerToIntegral: { + return false; + // HLSL Change Ends + } } llvm_unreachable("unknown cast resulting in integral value"); diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index eaf8273413..d853125954 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -5,6 +5,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. // // /// /// \file // @@ -734,6 +737,50 @@ bool IsHLSLRayQueryType(clang::QualType type) { return false; } +#ifdef ENABLE_SPIRV_CODEGEN +static llvm::Optional> +MaybeGetVKBufferPointerParams(clang::QualType type) { + const RecordType *RT = dyn_cast(type.getCanonicalType()); + if (!RT) + return llvm::None; + + const ClassTemplateSpecializationDecl *templateDecl = + dyn_cast(RT->getAsCXXRecordDecl()); + if (!templateDecl || !templateDecl->getName().equals("BufferPointer")) + return llvm::None; + + auto *namespaceDecl = + dyn_cast_or_null(templateDecl->getDeclContext()); + if (!namespaceDecl || !namespaceDecl->getName().equals("vk")) + return llvm::None; + + const TemplateArgumentList &argList = templateDecl->getTemplateArgs(); + QualType bufferType = argList[0].getAsType(); + unsigned align = + argList.size() > 1 ? argList[1].getAsIntegral().getLimitedValue() : 0; + return std::make_pair(bufferType, align); +} + +bool IsVKBufferPointerType(clang::QualType type) { + return MaybeGetVKBufferPointerParams(type).hasValue(); +} + +QualType GetVKBufferPointerBufferType(clang::QualType type) { + auto bpParams = MaybeGetVKBufferPointerParams(type); + assert(bpParams.hasValue() && + "cannot get pointer type for type that is not a vk::BufferPointer"); + return bpParams.getValue().first; +} + +unsigned GetVKBufferPointerAlignment(clang::QualType type) { + auto bpParams = MaybeGetVKBufferPointerParams(type); + assert( + bpParams.hasValue() && + "cannot get pointer alignment for type that is not a vk::BufferPointer"); + return bpParams.getValue().second; +} +#endif + QualType GetHLSLResourceResultType(QualType type) { // Don't canonicalize the type as to not lose snorm in Buffer const RecordType *RT = type->getAs(); diff --git a/tools/clang/lib/Lex/PPMacroExpansion.cpp b/tools/clang/lib/Lex/PPMacroExpansion.cpp index 64ce8c9182..ebfb93df2e 100644 --- a/tools/clang/lib/Lex/PPMacroExpansion.cpp +++ b/tools/clang/lib/Lex/PPMacroExpansion.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements the top level handling of macro expansion for the @@ -1080,7 +1083,8 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("nullability", true) .Case("memory_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Memory)) .Case("thread_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Thread)) - .Case("dataflow_sanitizer", LangOpts.Sanitize.has(SanitizerKind::DataFlow)) + .Case("dataflow_sanitizer", + LangOpts.Sanitize.has(SanitizerKind::DataFlow)) // Objective-C features .Case("objc_arr", LangOpts.ObjCAutoRefCount) // FIXME: REMOVE? .Case("objc_arc", LangOpts.ObjCAutoRefCount) @@ -1180,6 +1184,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("has_trivial_constructor", LangOpts.CPlusPlus) .Case("has_trivial_destructor", LangOpts.CPlusPlus) .Case("has_virtual_destructor", LangOpts.CPlusPlus) + .Case("hlsl_vk_buffer_pointer", LangOpts.SPIRV) .Case("is_abstract", LangOpts.CPlusPlus) .Case("is_base_of", LangOpts.CPlusPlus) .Case("is_class", LangOpts.CPlusPlus) diff --git a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp index 492640c493..db140f4766 100644 --- a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp +++ b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #include "AlignmentSizeCalculator.h" @@ -277,14 +280,20 @@ std::pair AlignmentSizeCalculator::getAlignmentAndSize( if (recordType != nullptr) { const llvm::StringRef name = recordType->getDecl()->getName(); - if (isTypeInVkNamespace(recordType) && name == "SpirvType") { - const ClassTemplateSpecializationDecl *templateDecl = - cast(recordType->getDecl()); - const uint64_t size = - templateDecl->getTemplateArgs()[1].getAsIntegral().getZExtValue(); - const uint64_t alignment = - templateDecl->getTemplateArgs()[2].getAsIntegral().getZExtValue(); - return {alignment, size}; + if (isTypeInVkNamespace(recordType)) { + if (name == "BufferPointer") { + return {8, 8}; // same as uint64_t + } + + if (name == "SpirvType") { + const ClassTemplateSpecializationDecl *templateDecl = + cast(recordType->getDecl()); + const uint64_t size = + templateDecl->getTemplateArgs()[1].getAsIntegral().getZExtValue(); + const uint64_t alignment = + templateDecl->getTemplateArgs()[2].getAsIntegral().getZExtValue(); + return {alignment, size}; + } } } diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp index c2b5acff53..6fd0c6d950 100644 --- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp +++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #include "CapabilityVisitor.h" @@ -200,8 +203,10 @@ void CapabilityVisitor::addCapabilityForType(const SpirvType *type, } // Pointer type else if (const auto *ptrType = dyn_cast(type)) { - addCapabilityForType(ptrType->getPointeeType(), loc, sc); - if (sc == spv::StorageClass::PhysicalStorageBuffer) { + addCapabilityForType(ptrType->getPointeeType(), loc, + ptrType->getStorageClass()); + if (ptrType->getStorageClass() == + spv::StorageClass::PhysicalStorageBuffer) { addExtension(Extension::KHR_physical_storage_buffer, "SPV_KHR_physical_storage_buffer", loc); addCapability(spv::Capability::PhysicalStorageBufferAddresses); diff --git a/tools/clang/lib/SPIRV/EmitVisitor.cpp b/tools/clang/lib/SPIRV/EmitVisitor.cpp index 6f6f5f88cd..9c0368f7a1 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.cpp +++ b/tools/clang/lib/SPIRV/EmitVisitor.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // Do not change the inclusion order between "dxc/Support/*" files. @@ -488,6 +491,7 @@ std::vector EmitVisitor::takeBinary() { debugVariableBinary.end()); result.insert(result.end(), annotationsBinary.begin(), annotationsBinary.end()); + result.insert(result.end(), fwdDeclBinary.begin(), fwdDeclBinary.end()); result.insert(result.end(), typeConstantBinary.begin(), typeConstantBinary.end()); result.insert(result.end(), globalVarsBinary.begin(), globalVarsBinary.end()); @@ -1016,6 +1020,28 @@ bool EmitVisitor::visit(SpirvConstantNull *inst) { return true; } +bool EmitVisitor::visit(SpirvConvertPtrToU *inst) { + initInstruction(inst); + curInst.push_back(inst->getResultTypeId()); + curInst.push_back(getOrAssignResultId(inst)); + curInst.push_back(getOrAssignResultId(inst->getPtr())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + +bool EmitVisitor::visit(SpirvConvertUToPtr *inst) { + initInstruction(inst); + curInst.push_back(inst->getResultTypeId()); + curInst.push_back(getOrAssignResultId(inst)); + curInst.push_back(getOrAssignResultId(inst->getVal())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + bool EmitVisitor::visit(SpirvUndef *inst) { typeHandler.getOrCreateUndef(inst); emitDebugNameForInstruction(getOrAssignResultId(inst), @@ -2012,10 +2038,11 @@ void EmitTypeHandler::initTypeInstruction(spv::Op op) { curTypeInst.push_back(static_cast(op)); } -void EmitTypeHandler::finalizeTypeInstruction() { +void EmitTypeHandler::finalizeTypeInstruction(bool isFwdDecl) { curTypeInst[0] |= static_cast(curTypeInst.size()) << 16; - typeConstantBinary->insert(typeConstantBinary->end(), curTypeInst.begin(), - curTypeInst.end()); + auto binarySection = isFwdDecl ? fwdDeclBinary : typeConstantBinary; + binarySection->insert(binarySection->end(), curTypeInst.begin(), + curTypeInst.end()); } uint32_t EmitTypeHandler::getResultIdForType(const SpirvType *type, @@ -2594,6 +2621,17 @@ uint32_t EmitTypeHandler::emitType(const SpirvType *type) { curTypeInst.push_back(pointeeType); finalizeTypeInstruction(); } + // Forward pointer types + else if (const auto *fwdPtrType = dyn_cast(type)) { + const SpirvPointerType *ptrType = + context.getForwardReference(fwdPtrType->getPointeeType()); + const uint32_t refId = emitType(ptrType); + initTypeInstruction(spv::Op::OpTypeForwardPointer); + curTypeInst.push_back(refId); + curTypeInst.push_back(static_cast(ptrType->getStorageClass())); + finalizeTypeInstruction(true); + return refId; + } // Function types else if (const auto *fnType = dyn_cast(type)) { const uint32_t retTypeId = emitType(fnType->getReturnType()); diff --git a/tools/clang/lib/SPIRV/EmitVisitor.h b/tools/clang/lib/SPIRV/EmitVisitor.h index 2f5d99b89d..1f9b0939e6 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.h +++ b/tools/clang/lib/SPIRV/EmitVisitor.h @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_EMITVISITOR_H #define LLVM_CLANG_SPIRV_EMITVISITOR_H @@ -49,15 +53,15 @@ class EmitTypeHandler { EmitTypeHandler(ASTContext &astCtx, SpirvContext &spvContext, const SpirvCodeGenOptions &opts, FeatureManager &featureMgr, std::vector *debugVec, - std::vector *decVec, + std::vector *decVec, std::vector *fwdVec, std::vector *typesVec, const std::function &takeNextIdFn) : astContext(astCtx), context(spvContext), featureManager(featureMgr), debugVariableBinary(debugVec), annotationsBinary(decVec), - typeConstantBinary(typesVec), takeNextIdFunction(takeNextIdFn), - emittedConstantInts({}), emittedConstantFloats({}), - emittedConstantComposites({}), emittedConstantNulls({}), - emittedUndef({}), emittedConstantBools() { + fwdDeclBinary(fwdVec), typeConstantBinary(typesVec), + takeNextIdFunction(takeNextIdFn), emittedConstantInts({}), + emittedConstantFloats({}), emittedConstantComposites({}), + emittedConstantNulls({}), emittedUndef({}), emittedConstantBools() { assert(decVec); assert(typesVec); } @@ -120,7 +124,7 @@ class EmitTypeHandler { private: void initTypeInstruction(spv::Op op); - void finalizeTypeInstruction(); + void finalizeTypeInstruction(bool isFwdDecl = false); // Returns the result-id for the given type and decorations. If a type with // the same decorations have already been used, it returns the existing @@ -161,6 +165,7 @@ class EmitTypeHandler { std::vector curDecorationInst; std::vector *debugVariableBinary; std::vector *annotationsBinary; + std::vector *fwdDeclBinary; std::vector *typeConstantBinary; std::function takeNextIdFunction; @@ -207,7 +212,7 @@ class EmitVisitor : public Visitor { : Visitor(opts, spvCtx), astContext(astCtx), featureManager(featureMgr), id(0), typeHandler(astCtx, spvCtx, opts, featureMgr, &debugVariableBinary, - &annotationsBinary, &typeConstantBinary, + &annotationsBinary, &fwdDeclBinary, &typeConstantBinary, [this]() -> uint32_t { return takeNextId(); }), debugMainFileId(0), debugInfoExtInstId(0), debugLineStart(0), debugLineEnd(0), debugColumnStart(0), debugColumnEnd(0), @@ -254,6 +259,8 @@ class EmitVisitor : public Visitor { bool visit(SpirvConstantFloat *) override; bool visit(SpirvConstantComposite *) override; bool visit(SpirvConstantNull *) override; + bool visit(SpirvConvertPtrToU *) override; + bool visit(SpirvConvertUToPtr *) override; bool visit(SpirvUndef *) override; bool visit(SpirvCompositeConstruct *) override; bool visit(SpirvCompositeExtract *) override; @@ -438,7 +445,9 @@ class EmitVisitor : public Visitor { // All annotation instructions: OpDecorate, OpMemberDecorate, OpGroupDecorate, // OpGroupMemberDecorate, and OpDecorationGroup. std::vector annotationsBinary; - // All type and constant instructions + // All forward pointer type declaration instructions + std::vector fwdDeclBinary; + // All other type and constant instructions std::vector typeConstantBinary; // All global variable declarations (all OpVariable instructions whose Storage // Class is not Function) diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp index a5bc4a4aa8..b31d19b5d8 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #include "LowerTypeVisitor.h" @@ -549,7 +552,9 @@ const SpirvType *LowerTypeVisitor::lowerType(QualType type, // checking the general struct type. if (const auto *spvType = lowerResourceType(type, rule, isRowMajor, srcLoc)) { - spvContext.registerStructDeclForSpirvType(spvType, decl); + if (!isa(spvType)) { + spvContext.registerStructDeclForSpirvType(spvType, decl); + } return spvType; } @@ -809,6 +814,32 @@ const SpirvType *LowerTypeVisitor::lowerVkTypeInVkNamespace( QualType realType = hlsl::GetHLSLResourceTemplateParamType(type); return lowerType(realType, rule, llvm::None, srcLoc); } + if (name == "BufferPointer") { + const size_t visitedTypeStackSize = visitedTypeStack.size(); + (void)visitedTypeStackSize; // suppress unused warning (used only in assert) + + for (QualType t : visitedTypeStack) { + if (t == type) { + return spvContext.getForwardPointerType(type); + } + } + + QualType realType = hlsl::GetHLSLResourceTemplateParamType(type); + if (rule == SpirvLayoutRule::Void) { + rule = spvOptions.sBufferLayoutRule; + } + visitedTypeStack.push_back(type); + + const SpirvType *spirvType = lowerType(realType, rule, llvm::None, srcLoc); + const auto *pointerType = spvContext.getPointerType( + spirvType, spv::StorageClass::PhysicalStorageBuffer); + spvContext.registerForwardReference(type, pointerType); + + assert(visitedTypeStack.back() == type); + visitedTypeStack.pop_back(); + assert(visitedTypeStack.size() == visitedTypeStackSize); + return pointerType; + } emitError("unknown type %0 in vk namespace", srcLoc) << type; return nullptr; } diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.h b/tools/clang/lib/SPIRV/LowerTypeVisitor.h index 96235d1508..5b26b67e3a 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.h +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.h @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_LIB_SPIRV_LOWERTYPEVISITOR_H @@ -137,6 +140,7 @@ class LowerTypeVisitor : public Visitor { AlignmentSizeCalculator alignmentCalc; /// alignment calculator bool useArrayForMat1xN; /// SPIR-V array for HLSL Matrix 1xN SpirvBuilder &spvBuilder; + SmallVector visitedTypeStack; // for type recursion detection }; } // end namespace spirv diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp index 1275e2b252..6b3f43fc77 100644 --- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp +++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #include "clang/SPIRV/SpirvBuilder.h" @@ -202,6 +205,14 @@ SpirvInstruction *SpirvBuilder::createLoad(QualType resultType, instruction->setLayoutRule(pointer->getLayoutRule()); instruction->setRValue(true); + if (pointer->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer) { + AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); + uint32_t align, size, stride; + std::tie(align, size) = alignmentCalc.getAlignmentAndSize( + resultType, pointer->getLayoutRule(), llvm::None, &stride); + instruction->setAlignment(align); + } + if (pointer->containsAliasComponent() && isAKindOfStructuredOrByteBuffer(resultType)) { instruction->setStorageClass(spv::StorageClass::Uniform); @@ -300,6 +311,16 @@ SpirvStore *SpirvBuilder::createStore(SpirvInstruction *address, new (context) SpirvStore(loc, address, source, llvm::None, range); insertPoint->addInstruction(instruction); + if (address->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer && + address->getAstResultType() != QualType()) { // exclude raw buffer + AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); + uint32_t align, size, stride; + std::tie(align, size) = alignmentCalc.getAlignmentAndSize( + address->getAstResultType(), address->getLayoutRule(), llvm::None, + &stride); + instruction->setAlignment(align); + } + if (address->isRasterizerOrdered()) { createEndInvocationInterlockEXT(loc, range); } @@ -491,6 +512,22 @@ SpirvImageTexelPointer *SpirvBuilder::createImageTexelPointer( return instruction; } +SpirvConvertPtrToU *SpirvBuilder::createConvertPtrToU(SpirvInstruction *ptr, + QualType type) { + auto *instruction = new (context) SpirvConvertPtrToU(ptr, type); + instruction->setRValue(true); + insertPoint->addInstruction(instruction); + return instruction; +} + +SpirvConvertUToPtr *SpirvBuilder::createConvertUToPtr(SpirvInstruction *val, + QualType type) { + auto *instruction = new (context) SpirvConvertUToPtr(val, type); + instruction->setRValue(false); + insertPoint->addInstruction(instruction); + return instruction; +} + spv::ImageOperandsMask SpirvBuilder::composeImageOperandsMask( SpirvInstruction *bias, SpirvInstruction *lod, const std::pair &grad, diff --git a/tools/clang/lib/SPIRV/SpirvContext.cpp b/tools/clang/lib/SPIRV/SpirvContext.cpp index 6af36eb691..47dfc67433 100644 --- a/tools/clang/lib/SPIRV/SpirvContext.cpp +++ b/tools/clang/lib/SPIRV/SpirvContext.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// #include @@ -328,6 +331,29 @@ const HybridPointerType *SpirvContext::getPointerType(QualType pointee, return result; } +const ForwardPointerType * +SpirvContext::getForwardPointerType(QualType pointee) { + assert(hlsl::IsVKBufferPointerType(pointee)); + + auto foundPointee = forwardPointerTypes.find(pointee); + if (foundPointee != forwardPointerTypes.end()) { + return foundPointee->second; + } + + return forwardPointerTypes[pointee] = new (this) ForwardPointerType(pointee); +} + +const SpirvPointerType *SpirvContext::getForwardReference(QualType type) { + return forwardReferences[type]; +} + +void SpirvContext::registerForwardReference( + QualType type, const SpirvPointerType *pointerType) { + assert(pointerType->getStorageClass() == + spv::StorageClass::PhysicalStorageBuffer); + forwardReferences[type] = pointerType; +} + FunctionType * SpirvContext::getFunctionType(const SpirvType *ret, llvm::ArrayRef param) { diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 579af04ea6..7cc84fa2fc 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements a SPIR-V emitter class that takes in HLSL AST and emits @@ -1233,12 +1237,17 @@ SpirvInstruction *SpirvEmitter::doExpr(const Expr *expr, } else if (isa(expr)) { assert(curThis); result = curThis; - } else if (isa(expr)) { + } else if (const auto *constructExpr = dyn_cast(expr)) { // For RayQuery type, we should not explicitly initialize it using // CXXConstructExpr e.g., RayQuery<0> r = RayQuery<0>() is the same as we do // not have a variable initialization. Setting nullptr for the SPIR-V // instruction used for expr will let us skip the variable initialization. - if (!hlsl::IsHLSLRayQueryType(expr->getType())) + if (hlsl::IsVKBufferPointerType(expr->getType())) { + const Expr *arg = constructExpr->getArg(0); + SpirvInstruction *value = loadIfGLValue(arg, arg->getSourceRange()); + result = spvBuilder.createConvertUToPtr(value, expr->getType()); + result->setRValue(); + } else if (!hlsl::IsHLSLRayQueryType(expr->getType())) result = curThis; } else if (const auto *unaryExpr = dyn_cast(expr)) { result = doUnaryExprOrTypeTraitExpr(unaryExpr); @@ -1543,7 +1552,23 @@ void SpirvEmitter::doFunctionDecl(const FunctionDecl *decl) { // Create all parameters. for (uint32_t i = 0; i < decl->getNumParams(); ++i) { const ParmVarDecl *paramDecl = decl->getParamDecl(i); - (void)declIdMapper.createFnParam(paramDecl, i + 1 + isNonStaticMemberFn); + QualType paramType = paramDecl->getType(); + auto *param = + declIdMapper.createFnParam(paramDecl, i + 1 + isNonStaticMemberFn); +#ifdef ENABLE_SPIRV_CODEGEN + if (hlsl::IsVKBufferPointerType(paramType)) { + Optional isRowMajor = llvm::None; + QualType desugaredType = desugarType(paramType, &isRowMajor); + if (hlsl::IsVKBufferPointerType(desugaredType)) { + spvBuilder.decorateWithLiterals( + param, + static_cast(paramDecl->hasAttr() + ? spv::Decoration::AliasedPointer + : spv::Decoration::RestrictPointer), + {}, loc); + } + } +#endif } if (decl->hasBody()) { @@ -1644,6 +1669,15 @@ bool SpirvEmitter::validateVKAttributes(const NamedDecl *decl) { loc); success = false; } + +#ifdef ENABLE_SPIRV_CODEGEN + if (hlsl::IsVKBufferPointerType(cast(decl)->getType())) { + emitError("vk::push_constant attribute cannot be used on declarations " + "with vk::BufferPointer type", + loc); + success = false; + } +#endif } // vk::shader_record_nv is supported only on cbuffer/ConstantBuffer @@ -1951,6 +1985,11 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) { return; } + if (hlsl::IsVKBufferPointerType(decl->getType()) && !decl->hasInit()) { + emitError("vk::BufferPointer has no default constructor", loc); + return; + } + // We can have VarDecls inside cbuffer/tbuffer. For those VarDecls, we need // to emit their cbuffer/tbuffer as a whole and access each individual one // using access chains. @@ -2037,10 +2076,24 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) { needsLegalization = true; } - if (var != nullptr && decl->hasAttrs()) { - declIdMapper.decorateWithIntrinsicAttrs(decl, var); - if (auto attr = decl->getAttr()) { - var->setStorageClass(static_cast(attr->getStclass())); + if (var != nullptr) { + Optional isRowMajor = llvm::None; + QualType desugaredType = desugarType(decl->getType(), &isRowMajor); + if (hlsl::IsVKBufferPointerType(desugaredType)) { + spvBuilder.decorateWithLiterals( + var, + static_cast(decl->hasAttr() + ? spv::Decoration::AliasedPointer + : spv::Decoration::RestrictPointer), + {}, loc); + } + + if (decl->hasAttrs()) { + declIdMapper.decorateWithIntrinsicAttrs(decl, var); + if (auto attr = decl->getAttr()) { + var->setStorageClass( + static_cast(attr->getStclass())); + } } } @@ -3665,6 +3718,12 @@ SpirvInstruction *SpirvEmitter::doCastExpr(const CastExpr *expr, } case CastKind::CK_ToVoid: return nullptr; + case CastKind::CK_VK_BufferPointerToIntegral: { + return spvBuilder.createConvertPtrToU(doExpr(subExpr, range), toType); + } + case CastKind::CK_VK_IntegralToBufferPointer: { + return spvBuilder.createConvertUToPtr(doExpr(subExpr, range), toType); + } default: emitError("implicit cast kind '%0' unimplemented", expr->getExprLoc()) << expr->getCastKindName() << expr->getSourceRange(); @@ -5442,6 +5501,8 @@ SpirvEmitter::processIntrinsicMemberCall(const CXXMemberCallExpr *expr, case IntrinsicOp::MOP_WorldRayDirection: case IntrinsicOp::MOP_WorldRayOrigin: return processRayQueryIntrinsics(expr, opcode); + case IntrinsicOp::MOP_GetBufferContents: + return processIntrinsicGetBufferContents(expr); default: emitError("intrinsic '%0' method unimplemented", expr->getCallee()->getExprLoc()) @@ -7021,6 +7082,12 @@ SpirvInstruction *SpirvEmitter::reconstructValue(SpirvInstruction *srcVal, if (const auto *recordType = valType->getAs()) { assert(recordType->isStructureType()); + if (isTypeInVkNamespace(recordType) && + recordType->getDecl()->getName().equals("BufferPointer")) { + // Uniquely among structs, vk::BufferPointer lowers to a pointer type. + return srcVal; + } + LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, spvBuilder); const StructType *spirvStructType = @@ -9403,6 +9470,14 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { case hlsl::IntrinsicOp::IOP_EvaluateAttributeSnapped: { retVal = processEvaluateAttributeAt(callExpr, hlslOpcode, srcLoc, srcRange); break; + } + case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast: { + retVal = processIntrinsicPointerCast(callExpr, false); + break; + } + case hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast: { + retVal = processIntrinsicPointerCast(callExpr, true); + break; } INTRINSIC_SPIRV_OP_CASE(ddx, DPdx, true); INTRINSIC_SPIRV_OP_CASE(ddx_coarse, DPdxCoarse, false); @@ -10782,6 +10857,50 @@ SpirvEmitter::processIntrinsicClamp(const CallExpr *callExpr) { loc, range); } +SpirvInstruction * +SpirvEmitter::processIntrinsicPointerCast(const CallExpr *callExpr, + bool isStatic) { + const Expr *argExpr = callExpr->getArg(0); + SpirvInstruction *ptr = doExpr(argExpr); + QualType srcType = argExpr->getType(); + QualType destType = callExpr->getType(); + QualType srcTypeArg = hlsl::GetVKBufferPointerBufferType(srcType); + QualType destTypeArg = hlsl::GetVKBufferPointerBufferType(destType); + return srcTypeArg == destTypeArg + ? ptr + : spvBuilder.createUnaryOp(spv::Op::OpBitcast, destType, ptr, + callExpr->getExprLoc(), + callExpr->getSourceRange()); +} + +SpirvInstruction *SpirvEmitter::processIntrinsicGetBufferContents( + const CXXMemberCallExpr *callExpr) { + LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, + spvBuilder); + Expr *obj = callExpr->getImplicitObjectArgument(); + SpirvInstruction *bufferPointer = doExpr(obj); + if (!bufferPointer) + return nullptr; + unsigned align = hlsl::GetVKBufferPointerAlignment(obj->getType()); + lowerTypeVisitor.visitInstruction(bufferPointer); + + const SpirvPointerType *bufferPointerType = + dyn_cast(bufferPointer->getResultType()); + SpirvLoad *retVal = + spvBuilder.createLoad(bufferPointerType->getPointeeType(), bufferPointer, + callExpr->getLocStart()); + if (!align) { + QualType bufferType = hlsl::GetVKBufferPointerBufferType(obj->getType()); + AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); + uint32_t stride; + std::tie(align, std::ignore) = alignmentCalc.getAlignmentAndSize( + bufferType, retVal->getLayoutRule(), llvm::None, &stride); + } + retVal->setAlignment(align); + retVal->setRValue(false); + return retVal; +} + SpirvInstruction * SpirvEmitter::processIntrinsicMemoryBarrier(const CallExpr *callExpr, bool isDevice, bool groupSync, diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index eca038527f..0a5ff308c2 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file defines a SPIR-V emitter class that takes in HLSL AST and emits @@ -491,6 +495,15 @@ class SpirvEmitter : public ASTConsumer { /// Processes the 'lit' intrinsic function. SpirvInstruction *processIntrinsicLit(const CallExpr *); + /// Processes the 'vk::static_pointer_cast' and 'vk_reinterpret_pointer_cast' + /// intrinsic functions. + SpirvInstruction *processIntrinsicPointerCast(const CallExpr *, + bool isStatic); + + /// Processes the vk::BufferPointer intrinsic function 'Get'. + SpirvInstruction * + processIntrinsicGetBufferContents(const CXXMemberCallExpr *); + /// Processes the 'GroupMemoryBarrier', 'GroupMemoryBarrierWithGroupSync', /// 'DeviceMemoryBarrier', 'DeviceMemoryBarrierWithGroupSync', /// 'AllMemoryBarrier', and 'AllMemoryBarrierWithGroupSync' intrinsic diff --git a/tools/clang/lib/SPIRV/SpirvInstruction.cpp b/tools/clang/lib/SPIRV/SpirvInstruction.cpp index 21aada9e82..6deb11d946 100644 --- a/tools/clang/lib/SPIRV/SpirvInstruction.cpp +++ b/tools/clang/lib/SPIRV/SpirvInstruction.cpp @@ -4,6 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. +// +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements the in-memory representation of SPIR-V instructions. @@ -57,6 +61,8 @@ DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantInteger) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantFloat) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantComposite) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantNull) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertPtrToU) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertUToPtr) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvUndef) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvCompositeConstruct) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvCompositeExtract) @@ -620,6 +626,28 @@ bool SpirvConstantNull::operator==(const SpirvConstantNull &that) const { astResultType == that.astResultType; } +SpirvConvertPtrToU::SpirvConvertPtrToU(SpirvInstruction *ptr, QualType type, + SourceLocation loc, SourceRange range) + : SpirvInstruction(IK_ConvertPtrToU, spv::Op::OpConvertPtrToU, type, loc, + range), + ptr(ptr) {} + +bool SpirvConvertPtrToU::operator==(const SpirvConvertPtrToU &that) const { + return opcode == that.opcode && resultType == that.resultType && + astResultType == that.astResultType && ptr == that.ptr; +} + +SpirvConvertUToPtr::SpirvConvertUToPtr(SpirvInstruction *val, QualType type, + SourceLocation loc, SourceRange range) + : SpirvInstruction(IK_ConvertUToPtr, spv::Op::OpConvertUToPtr, type, loc, + range), + val(val) {} + +bool SpirvConvertUToPtr::operator==(const SpirvConvertUToPtr &that) const { + return opcode == that.opcode && resultType == that.resultType && + astResultType == that.astResultType && val == that.val; +} + SpirvUndef::SpirvUndef(QualType type) : SpirvInstruction(IK_Undef, spv::Op::OpUndef, type, /*SourceLocation*/ {}) {} diff --git a/tools/clang/lib/Sema/SemaCast.cpp b/tools/clang/lib/Sema/SemaCast.cpp index 10668dc388..f5a864e2b6 100644 --- a/tools/clang/lib/Sema/SemaCast.cpp +++ b/tools/clang/lib/Sema/SemaCast.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// // // This file implements semantic analysis for cast expressions, including @@ -1543,6 +1546,20 @@ TryStaticImplicitCast(Sema &Self, ExprResult &SrcExpr, QualType DestType, if (InitSeq.isConstructorInitialization()) Kind = CK_ConstructorConversion; +#ifdef ENABLE_SPIRV_CODEGEN + // Special cases for vk::BufferPointer. + else if (hlsl::IsVKBufferPointerType(SrcExpr.get()->getType()) && + DestType->isIntegerType() && CCK == Sema::CCK_CStyleCast) { + Kind = CK_VK_BufferPointerToIntegral; + SrcExpr = Result; + return TC_Success; + } else if (hlsl::IsVKBufferPointerType(DestType) && + SrcExpr.get()->getType()->isIntegerType()) { + Kind = CK_VK_IntegralToBufferPointer; + SrcExpr = Result; + return TC_Success; + } +#endif else Kind = CK_NoOp; diff --git a/tools/clang/lib/Sema/SemaExprCXX.cpp b/tools/clang/lib/Sema/SemaExprCXX.cpp index f46bb0ad9f..4723bc93e9 100644 --- a/tools/clang/lib/Sema/SemaExprCXX.cpp +++ b/tools/clang/lib/Sema/SemaExprCXX.cpp @@ -5,6 +5,9 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// //===----------------------------------------------------------------------===// /// /// \file @@ -1052,6 +1055,31 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo, // corresponding cast expression. if (Exprs.size() == 1 && !ListInitialization) { Expr *Arg = Exprs[0]; +#ifdef ENABLE_SPIRV_CODEGEN + if (hlsl::IsVKBufferPointerType(Ty) && Arg->getType()->isIntegerType()) { + for (auto *ctor : Ty->getAsCXXRecordDecl()->ctors()) { + if (auto *functionType = ctor->getType()->getAs()) { + if (functionType->getNumParams() != 1 || + !functionType->getParamType(0)->isIntegerType()) + continue; + + CanQualType argType = Arg->getType()->getCanonicalTypeUnqualified(); + if (!Arg->isRValue()) { + Arg = ImpCastExprToType(Arg, argType, CK_LValueToRValue).get(); + } + if (argType != Context.UnsignedLongLongTy) { + Arg = ImpCastExprToType(Arg, Context.UnsignedLongLongTy, + CK_IntegralCast) + .get(); + } + return CXXConstructExpr::Create( + Context, Ty, TyBeginLoc, ctor, false, {Arg}, false, false, false, + false, CXXConstructExpr::ConstructionKind::CK_Complete, + SourceRange(LParenLoc, RParenLoc)); + } + } + } +#endif return BuildCXXFunctionalCastExpr(TInfo, LParenLoc, Arg, RParenLoc); } diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index d20daa0ac0..f001cb70d9 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -6,6 +6,9 @@ // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // +// All rights reserved. // +// // // This file implements the semantic support for HLSL. // // // /////////////////////////////////////////////////////////////////////////////// @@ -195,6 +198,7 @@ enum ArBasicKind { AR_OBJECT_VK_LITERAL, AR_OBJECT_VK_SPV_INTRINSIC_TYPE, AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID, + AR_OBJECT_VK_BUFFER_POINTER, #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -550,6 +554,7 @@ const UINT g_uBasicKindProps[] = { BPROP_OBJECT, // AR_OBJECT_VK_LITERAL, BPROP_OBJECT, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE use recordType BPROP_OBJECT, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID use recordType + BPROP_OBJECT, // AR_OBJECT_VK_BUFFER_POINTER use recordType #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -1232,6 +1237,9 @@ static const ArBasicKind g_AnyOutputRecordCT[] = { static const ArBasicKind g_DxHitObjectCT[] = {AR_OBJECT_HIT_OBJECT, AR_BASIC_UNKNOWN}; +static const ArBasicKind g_VKBufferPointerCT[] = {AR_OBJECT_VK_BUFFER_POINTER, + AR_BASIC_UNKNOWN}; + // Basic kinds, indexed by a LEGAL_INTRINSIC_COMPTYPES value. const ArBasicKind *g_LegalIntrinsicCompTypes[] = { g_NullCT, // LICOMPTYPE_VOID @@ -1287,6 +1295,7 @@ const ArBasicKind *g_LegalIntrinsicCompTypes[] = { g_GroupNodeOutputRecordsCT, // LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS g_DxHitObjectCT, // LICOMPTYPE_HIT_OBJECT + g_VKBufferPointerCT, // LICOMPTYPE_VK_BUFFER_POINTER }; static_assert( ARRAYSIZE(g_LegalIntrinsicCompTypes) == LICOMPTYPE_COUNT, @@ -1345,6 +1354,7 @@ static const ArBasicKind g_ArBasicKindsAsTypes[] = { AR_OBJECT_VK_SPIRV_TYPE, AR_OBJECT_VK_SPIRV_OPAQUE_TYPE, AR_OBJECT_VK_INTEGRAL_CONSTANT, AR_OBJECT_VK_LITERAL, AR_OBJECT_VK_SPV_INTRINSIC_TYPE, AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID, + AR_OBJECT_VK_BUFFER_POINTER, #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -1451,6 +1461,7 @@ static const uint8_t g_ArBasicKindsTemplateCount[] = { 1, // AR_OBJECT_VK_LITERAL, 1, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE 1, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID + 2, // AR_OBJECT_VK_BUFFER_POINTER #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -1599,6 +1610,7 @@ static const SubscriptOperatorRecord g_ArBasicKindsSubscripts[] = { {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_LITERAL, {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID + {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_BUFFER_POINTER #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -1763,6 +1775,7 @@ static const char *g_ArBasicTypeNames[] = { "Literal", "ext_type", "ext_result_id", + "BufferPointer", #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends @@ -2981,6 +2994,7 @@ class HLSLExternalSource : public ExternalSemaSource { ClassTemplateDecl *m_vkIntegralConstantTemplateDecl; ClassTemplateDecl *m_vkLiteralTemplateDecl; + ClassTemplateDecl *m_vkBufferPointerTemplateDecl; // Declarations for Work Graph Output Record types ClassTemplateDecl *m_GroupNodeOutputRecordsTemplateDecl; @@ -3486,6 +3500,25 @@ class HLSLExternalSource : public ExternalSemaSource { templateTypeParmDecls.push_back(templateTypeParmDecl); continue; } + if (pArgs[i].uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) { + if (TInfo == nullptr) { + TInfo = m_sema->getASTContext().CreateTypeSourceInfo( + m_context->UnsignedIntTy, 0); + } + IdentifierInfo *idT = &context.Idents.get("T"); + IdentifierInfo *idA = &context.Idents.get("A"); + TemplateTypeParmDecl *templateTypeParmDecl = + TemplateTypeParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc, 0, + 0, idT, TypenameTrue, + ParameterPackFalse); + NonTypeTemplateParmDecl *nonTypeTemplateParmDecl = + NonTypeTemplateParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc, + 0, 1, idA, context.UnsignedIntTy, + ParameterPackFalse, TInfo); + templateTypeParmDecl->setDefaultArgument(TInfo); + templateTypeParmDecls.push_back(templateTypeParmDecl); + templateTypeParmDecls.push_back(nonTypeTemplateParmDecl); + } } return templateTypeParmDecls; } @@ -3554,6 +3587,19 @@ class HLSLExternalSource : public ExternalSemaSource { case LICOMPTYPE_HIT_OBJECT: paramTypes.push_back(GetBasicKindType(AR_OBJECT_HIT_OBJECT)); break; + case LICOMPTYPE_VK_BUFFER_POINTER: { + const ArBasicKind *match = + std::find(g_ArBasicKindsAsTypes, + &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], + AR_OBJECT_VK_BUFFER_POINTER); + DXASSERT(match != + &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], + "otherwise can't find constant in basic kinds"); + size_t index = match - g_ArBasicKindsAsTypes; + paramTypes.push_back( + m_sema->getASTContext().getTypeDeclType(m_objectTypeDecls[index])); + break; + } default: DXASSERT(false, "Argument type of intrinsic function is not " "supported"); @@ -3932,6 +3978,12 @@ class HLSLExternalSource : public ExternalSemaSource { recordDecl = DeclareTemplateTypeWithHandleInDeclContext( *m_context, m_vkNSDecl, typeName, 1, nullptr); recordDecl->setImplicit(true); + } else if (kind == AR_OBJECT_VK_BUFFER_POINTER) { + if (!m_vkNSDecl) + continue; + recordDecl = DeclareVkBufferPointerType(*m_context, m_vkNSDecl); + recordDecl->setImplicit(true); + m_vkBufferPointerTemplateDecl = recordDecl->getDescribedClassTemplate(); } #endif else if (templateArgCount == 0) { @@ -4044,7 +4096,8 @@ class HLSLExternalSource : public ExternalSemaSource { HLSLExternalSource() : m_matrixTemplateDecl(nullptr), m_vectorTemplateDecl(nullptr), m_vkIntegralConstantTemplateDecl(nullptr), - m_vkLiteralTemplateDecl(nullptr), m_hlslNSDecl(nullptr), + m_vkLiteralTemplateDecl(nullptr), + m_vkBufferPointerTemplateDecl(nullptr), m_hlslNSDecl(nullptr), m_vkNSDecl(nullptr), m_dxNSDecl(nullptr), m_context(nullptr), m_sema(nullptr), m_hlslStringTypedef(nullptr) { memset(m_matrixTypes, 0, sizeof(m_matrixTypes)); @@ -4802,7 +4855,8 @@ class HLSLExternalSource : public ExternalSemaSource { case AR_OBJECT_NODE_OUTPUT_ARRAY: case AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY: case AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS: - case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS: { + case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS: + case AR_OBJECT_VK_BUFFER_POINTER: { const ArBasicKind *match = std::find( g_ArBasicKindsAsTypes, &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], kind); @@ -5318,6 +5372,8 @@ class HLSLExternalSource : public ExternalSemaSource { << type << GetMatrixOrVectorElementType(type); } return valid; + } else if (hlsl::IsVKBufferPointerType(qt)) { + return true; } else if (qt->isStructureOrClassType()) { const RecordType *recordType = qt->getAs(); objectKind = ClassifyRecordType(recordType); @@ -6790,6 +6846,7 @@ bool HLSLExternalSource::MatchArguments( if (pIntrinsic->pArgs[0].qwUsage && pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_TYPE && pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_FUNCTION && + pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_FUNCTION_2 && pIntrinsic->pArgs[0].uComponentTypeId != INTRIN_COMPTYPE_FROM_NODEOUTPUT) { CAB(pIntrinsic->pArgs[0].uTemplateId < MaxIntrinsicArgs, 0); @@ -6830,7 +6887,8 @@ bool HLSLExternalSource::MatchArguments( // Check template. if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_TYPE || - pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION) { + pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION || + pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) { continue; // Already verified that this is available. } if (pArgument->uLegalComponentTypes == LICOMPTYPE_USER_DEFINED_TYPE) { @@ -6999,6 +7057,14 @@ bool HLSLExternalSource::MatchArguments( } else { pNewType = functionTemplateTypeArg; } + } else if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) { + if (i == 0 && + (builtinOp == hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast || + builtinOp == hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast)) { + pNewType = Args[0]->getType(); + } else { + badArgIdx = std::min(badArgIdx, i); + } } else if (pArgument->uLegalComponentTypes == LICOMPTYPE_USER_DEFINED_TYPE) { if (objectElement.isNull()) { @@ -9685,6 +9751,11 @@ bool HLSLExternalSource::CanConvert(SourceLocation loc, Expr *sourceExpr, return false; } + // Cast vk::BufferPointer to pointer address. + if (SourceInfo.EltKind == AR_OBJECT_VK_BUFFER_POINTER) { + return TargetInfo.EltKind == AR_BASIC_UINT64; + } + // Cast cbuffer to its result value. if ((SourceInfo.EltKind == AR_OBJECT_CONSTANT_BUFFER || SourceInfo.EltKind == AR_OBJECT_TEXTURE_BUFFER) && @@ -11533,6 +11604,30 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) { return false; } +static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE, + bool isStatic) { + const Expr *argExpr = CE->getArg(0); + QualType srcType = argExpr->getType(); + QualType destType = CE->getType(); + QualType srcTypeArg = hlsl::GetVKBufferPointerBufferType(srcType); + QualType destTypeArg = hlsl::GetVKBufferPointerBufferType(destType); + + if (isStatic && srcTypeArg != destTypeArg && + !S.IsDerivedFrom(srcTypeArg, destTypeArg)) { + S.Diags.Report(CE->getExprLoc(), + diag::err_hlsl_vk_static_pointer_cast_type); + return true; + } + + if (hlsl::GetVKBufferPointerAlignment(destType) > + hlsl::GetVKBufferPointerAlignment(srcType)) { + S.Diags.Report(CE->getExprLoc(), diag::err_hlsl_vk_pointer_cast_alignment); + return true; + } + + return false; +} + // Check HLSL call constraints, not fatal to creating the AST. void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, const FunctionProtoType *Proto) { @@ -11551,6 +11646,12 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, case hlsl::IntrinsicOp::IOP_Barrier: CheckBarrierCall(*this, FDecl, TheCall); break; + case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast: + CheckVKBufferPointerCast(*this, FDecl, TheCall, false); + break; + case hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast: + CheckVKBufferPointerCast(*this, FDecl, TheCall, true); + break; default: break; } @@ -13801,6 +13902,10 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A, A.getRange(), S.Context, A.getAttributeSpellingListIndex()); break; // SPIRV Change Starts + case AttributeList::AT_VKAliasedPointer: { + declAttr = ::new (S.Context) VKAliasedPointerAttr( + A.getRange(), S.Context, A.getAttributeSpellingListIndex()); + } break; case AttributeList::AT_VKDecorateIdExt: { if (A.getNumArgs() == 0 || !A.getArg(0).is()) { Handled = false; diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl new file mode 100644 index 0000000000..f0f5c54a16 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -spirv -E main -T cs_6_7 %s | FileCheck %s + +// Bug was causing alignment miss + +struct Content { + int a; +}; + +typedef vk::BufferPointer BufferContent; +typedef vk::BufferPointer BufferBuffer; + +RWStructuredBuffer rwbuf; + +void foo(BufferContent bc) { + bc.Get().a = 1; +} + +[numthreads(1, 1, 1)] +void main() { + foo(rwbuf[0].Get()); +} + +// CHECK: [[L0:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} %{{[_0-9A-Za-z]*}} Aligned 8 +// CHECK: [[L1:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} [[L0]] Aligned 8 +// CHECK: [[L2:%[_0-9A-Za-z]*]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[L1]] %int_0 +// CHECK: OpStore [[L2]] %int_1 Aligned 4 + + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl new file mode 100644 index 0000000000..fc5b9edad0 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl @@ -0,0 +1,72 @@ +// RUN: %dxc -spirv -Od -T ps_6_0 -E MainPs %s | FileCheck %s + +struct Globals_s +{ + float4 g_vSomeConstantA; + float4 g_vTestFloat4; + float4 g_vSomeConstantB; +}; + +typedef vk::BufferPointer Globals_p; + +struct TestPushConstant_t +{ + Globals_p m_nBufferDeviceAddress; +}; + +[[vk::push_constant]] TestPushConstant_t g_PushConstants; + +cbuffer cbuf { + [[vk::aliased_pointer]] Globals_p bp; +} + +// CHECK: OpDecorate [[BP0:%[_0-9A-Za-z]*]] AliasedPointer +// CHECK: OpDecorate [[BP1:%[_0-9A-Za-z]*]] AliasedPointer +// CHECK: OpDecorate [[BP:%[_0-9A-Za-z]*]] AliasedPointer +// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32 +// CHECK-DAG: [[F1:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 1 +// CHECK-DAG: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0 +// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4 +// CHECK: [[V4C:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F1]] [[F0]] [[F0]] [[F0]] +// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0 +// CHECK-DAG: [[I1:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 1 +// CHECK: [[GS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]] +// CHECK: [[PGS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GS]] +// CHECK: [[TT:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGS]] +// CHECK: [[PTT:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[TT]] +// CHECK: [[PFV4FLOAT:%[_0-9A-Za-z]*]] = OpTypePointer Function [[V4FLOAT]] +// CHECK: [[PPGS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGS]] +// CHECK: [[PBV4FLOAT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]] + +void f([[vk::aliased_pointer]] Globals_p bp) { +} + +float4 MainPs(void) : SV_Target0 +{ + float4 vTest = float4(1.0,0.0,0.0,0.0); + [[vk::aliased_pointer]] Globals_p bp0 = Globals_p(g_PushConstants.m_nBufferDeviceAddress); + [[vk::aliased_pointer]] Globals_p bp1 = Globals_p(g_PushConstants.m_nBufferDeviceAddress); + bp0.Get().g_vTestFloat4 = vTest; + f(bp0); + return bp1.Get().g_vTestFloat4; // Returns float4(1.0,0.0,0.0,0.0) +} + +// CHECK: [[GP:%[_0-9A-Za-z]*]] = OpVariable [[PTT]] PushConstant +// CHECK: [[VTEST:%[0-9A-Za-z]*]] = OpVariable [[PFV4FLOAT]] Function +// CHECK: OpStore [[VTEST]] [[V4C]] +// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGS]] [[GP]] [[I0]] +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad %_ptr_PhysicalStorageBuffer_Globals_s [[X1]] +// CHECK: OpStore [[BP0]] [[X2]] +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGS]] [[GP]] [[I0]] +// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[X3]] +// CHECK: OpStore [[BP1]] [[X4]] +// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[VTEST]] +// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP0]] Aligned 16 +// CHECK: [[X7:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X6]] [[I1]] +// CHECK: OpStore [[X7]] [[X5]] Aligned 16 +// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP1]] Aligned 16 +// CHECK: [[X9:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X8]] [[I1]] +// CHECK: [[X10:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X9]] Aligned 16 +// CHECK: OpReturnValue [[X10]] + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl new file mode 100644 index 0000000000..992d8b39fd --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl @@ -0,0 +1,39 @@ +// RUN: %dxc -spirv -fcgl -T ps_6_0 %s | FileCheck %s + +struct S { + uint u; +}; + +typedef vk::BufferPointer BP; + +struct PC { + BP bp; +}; + +[[vk::push_constant]] PC pc; + +// CHECK: [[UINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 0 +// CHECK: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0 +// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0 +// CHECK: [[S:%[_0-9A-Za-z]*]] = OpTypeStruct [[UINT]] +// CHECK: [[PS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[S]] +// CHECK: [[PU:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[UINT]] +// CHECK: [[U1:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 1 +// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpVariable %{{[_0-9A-Za-z]*}} PushConstant + +void main() +{ +// CHECK: [[IN:%[_0-9A-Za-z]*]] = OpVariable +// CHECK: [[OUT:%[_0-9A-Za-z]*]] = OpVariable + uint u0, u1; + +// CHECK: [[X1:%[_0-9]+]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[PC]] [[I0]] +// CHECK: [[X2:%[_0-9]+]] = OpLoad [[PS]] [[X1]] Aligned 4 +// CHECK: [[X3:%[_0-9]+]] = OpAccessChain [[PU]] [[X2]] [[I0]] +// CHECK: [[X4:%[_0-9]+]] = OpLoad [[UINT]] [[IN]] +// CHECK: [[X5:%[_0-9]+]] = OpAtomicExchange [[UINT]] [[X3]] [[U1]] [[U0]] [[X4]] +// CHECK: OpStore [[OUT]] [[X5]] + InterlockedExchange(pc.bp.Get().u, u0, u1); +} + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl new file mode 100644 index 0000000000..86cf48c41e --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl @@ -0,0 +1,19 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Content { + float a; +}; + +typedef vk::BufferPointer BufferContent; + +[[vk::push_constant]] +BufferContent buffer; + +[numthreads(1, 1, 1)] +void main() { + float tmp = buffer.Get().a; + buffer.Get().a = tmp; +} + +// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl new file mode 100644 index 0000000000..09585a7664 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl @@ -0,0 +1,19 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Globals_s { + float4 a; +}; + +typedef vk::BufferPointer Globals_p; +typedef vk::BufferPointer Globals_pp; + +[[vk::push_constant]] +Globals_pp bda; + +[numthreads(1, 1, 1)] +void main() { + float4 r = bda.Get().Get().a; +} + +// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl new file mode 100644 index 0000000000..e803b5b754 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl @@ -0,0 +1,19 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Content { + uint a; +}; + +typedef vk::BufferPointer BufferContent; + +[[vk::push_constant]] +BufferContent buffer; + +[numthreads(1, 1, 1)] +void main() { + uint data = buffer.Get(); + buffer.Get() = data; +} + +// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl new file mode 100644 index 0000000000..1029aa7f2e --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl @@ -0,0 +1,18 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Content { + uint a; +}; + +typedef vk::BufferPointer BufferContent; + +[[vk::push_constant]] +BufferContent buffer; + +[numthreads(1, 1, 1)] +void main() { + buffer.Get() = 1; +} + +// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl new file mode 100644 index 0000000000..62bdb7f3cb --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl @@ -0,0 +1,26 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Content { + int a; +}; + +typedef vk::BufferPointer BufferContent; +typedef vk::BufferPointer BufferBuffer; + +//[[vk::push_constant]] +//BufferContent buffer; + +RWStructuredBuffer rwbuf; + +// Wrong type in the parameter. +void foo(BufferContent bc) { + bc.Get().a = 1; +} + +[numthreads(1, 1, 1)] +void main() { + foo(rwbuf[0]); +} + +// CHECK: no matching function for call to 'foo' + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl new file mode 100644 index 0000000000..a89b286edf --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl @@ -0,0 +1,23 @@ +// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s + +struct Content { + int a; +}; + +typedef vk::BufferPointer BufferContent; +typedef vk::BufferPointer BufferBuffer; + +RWStructuredBuffer buf; + +void foo(const BufferContent bc) { + bc.Get().a = 1; +} + +[numthreads(1, 1, 1)] +void main() { + static BufferContent bcs = buf[0]; + static BufferBuffer bbs = (BufferContent)bcs; +} + +// CHECK: cannot initialize a variable of type 'BufferPointer' with an lvalue of type 'BufferPointer' + diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl new file mode 100644 index 0000000000..71fee1a795 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl @@ -0,0 +1,101 @@ +// RUN: %dxc -spirv -Od -T ps_6_0 -E MainPs %s | FileCheck %s + +// CHECK: OpCapability PhysicalStorageBufferAddresses +// CHECK: OpExtension "SPV_KHR_physical_storage_buffer" +// CHECK: OpMemoryModel PhysicalStorageBuffer64 GLSL450 +// CHECK: OpEntryPoint Fragment [[MAIN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]] + +// Forward declaration +typedef struct block_s block_t; +typedef vk::BufferPointer block_p; + +struct block_s +{ + float4 x; + block_p next; +}; + +struct TestPushConstant_t +{ + block_p root; +}; + +[[vk::push_constant]] TestPushConstant_t g_PushConstants; + +// CHECK: OpDecorate [[GP:%[_0-9A-Za-z]*]] AliasedPointer +// CHECK: OpDecorate [[COPY1:%[_0-9A-Za-z]*]] RestrictPointer +// CHECK: OpDecorate [[COPY2:%[_0-9A-Za-z]*]] RestrictPointer +// CHECK: OpMemberDecorate [[BLOCK:%[_0-9A-Za-z]*]] 1 Offset 16 +// CHECK: OpTypeForwardPointer [[PBLOCK:%[_0-9A-Za-z]*]] PhysicalStorageBuffer +// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0 +// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1 +// CHECK: [[ULONG:%[_0-9A-Za-z]*]] = OpTypeInt 64 0 +// CHECK: [[UL0:%[_0-9A-Za-z]*]] = OpConstant [[ULONG]] 0 +// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32 +// CHECK: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0 +// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4 +// CHECK: [[CV4FLOAT:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F0]] [[F0]] [[F0]] [[F0]] +// CHECK: [[BLOCK]] = OpTypeStruct [[V4FLOAT]] [[PBLOCK]] +// CHECK: [[PBLOCK]] = OpTypePointer PhysicalStorageBuffer [[BLOCK]] +// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PBLOCK]] +// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]] +// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]] +// CHECK: [[PPBLOCK0:%[_0-9A-Za-z]*]] = OpTypePointer Function %_ptr_PhysicalStorageBuffer_block_s +// CHECK: [[PPBLOCK1:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PBLOCK]] +// CHECK: [[PPBLOCK2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[PBLOCK]] +// CHECK: [[BOOL:%[_0-9A-Za-z]*]] = OpTypeBool +// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]] +// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant +// CHECK: [[OUT]] = OpVariable [[PV4FLOAT1]] Output + +[numthreads(1,1,1)] +float4 MainPs(void) : SV_Target0 +{ + if (__has_feature(hlsl_vk_buffer_pointer)) { + [[vk::aliased_pointer]] block_p g_p = + vk::static_pointer_cast(g_PushConstants.root); + g_p = g_p.Get().next; + uint64_t addr = (uint64_t)g_p; + block_p copy1 = block_p(addr); + block_p copy2 = block_p(copy1); + if (addr == 0) // Null pointer test + return float4(0.0,0.0,0.0,0.0); + return g_p.Get().x; + } + return float4(0.0,0.0,0.0,0.0); +} + +// CHECK: [[MAIN]] = OpFunction +// CHECK-NEXT: OpLabel +// CHECK-NEXT: [[RESULT:%[_0-9A-Za-z]*]] = OpFunctionCall [[V4FLOAT]] [[FUN:%[_0-9A-Za-z]*]] +// CHECK: OpStore [[OUT]] [[RESULT]] +// CHECK: OpFunctionEnd +// CHECK: [[FUN]] = OpFunction [[V4FLOAT]] +// CHECK: [[GP]] = OpVariable [[PPBLOCK0]] Function +// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK1]] [[GPC]] [[S0]] +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X1]] +// CHECK: OpStore [[GP]] [[X2]] +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32 +// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK2]] [[X3]] [[S1]] +// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X4]] Aligned 8 +// CHECK: OpStore [[GP]] [[X5]] +// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] +// CHECK: [[X7:%[_0-9A-Za-z]*]] = OpConvertPtrToU [[ULONG]] [[X6]] +// CHECK: OpStore [[ADDR:%[_0-9A-Za-z]*]] [[X7]] +// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[ULONG]] [[ADDR]] +// CHECK: [[X9:%[_0-9A-Za-z]*]] = OpConvertUToPtr [[PBLOCK]] [[X8]] +// CHECK: OpStore [[COPY1]] [[X9]] +// CHECK: [[X10:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[COPY1]] +// CHECK: OpStore [[COPY2]] [[X10]] +// CHECK: [[X11:%[_0-9A-Za-z]*]] = OpLoad [[ULONG]] [[ADDR]] +// CHECK: [[X12:%[_0-9A-Za-z]*]] = OpIEqual %bool [[X11]] [[UL0]] +// CHECK: OpBranchConditional [[X12]] [[IF_TRUE:%[_0-9A-Za-z]*]] [[IF_MERGE:%[_0-9A-Za-z]*]] +// CHECK: [[IF_TRUE]] = OpLabel +// CHECK: OpReturnValue [[CV4FLOAT]] +// CHECK: [[IF_MERGE]] = OpLabel +// CHECK: [[X13:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32 +// CHECK: [[X14:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X13]] [[S0]] +// CHECK: [[X15:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X14]] Aligned 16 +// CHECK: OpReturnValue [[X15]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl new file mode 100644 index 0000000000..c7d6f0ed2b --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl @@ -0,0 +1,48 @@ +// RUN: %dxc -spirv -T ps_6_0 -E MainPs %s | FileCheck %s + +// CHECK: OpEntryPoint Fragment [[FUN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]] + +struct Globals_s +{ + float4 g_vSomeConstantA; + float4 g_vTestFloat4; + float4 g_vSomeConstantB; +}; + +typedef vk::BufferPointer Globals_p; + +struct TestPushConstant_t +{ + Globals_p m_nBufferDeviceAddress; +}; + +[[vk::push_constant]] TestPushConstant_t g_PushConstants; + +// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0 +// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1 +// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32 +// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4 +// CHECK: [[GLOBALS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]] +// CHECK: [[PGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GLOBALS]] +// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGLOBALS]] +// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]] +// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]] +// CHECK: [[PPGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGLOBALS]] +// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]] +// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant +// CHECK-DAG: [[OUT]] = OpVariable [[PV4FLOAT1]] Output + +float4 MainPs(void) : SV_Target0 +{ + float4 vTest = g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4; + return vTest; +} + +// CHECK: [[FUN]] = OpFunction +// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGLOBALS]] [[GPC]] [[S0]] +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]] +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]] +// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X3]] Aligned 16 +// CHECK: OpStore [[OUT]] [[X4]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl new file mode 100644 index 0000000000..b2efd02cbd --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl @@ -0,0 +1,52 @@ +// RUN: %dxc -spirv -T ps_6_0 -E MainPs %s | FileCheck %s + +// CHECK: OpEntryPoint Fragment [[FUN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]] + +struct Globals_s +{ + float4 g_vSomeConstantA; + float4 g_vTestFloat4; + float4 g_vSomeConstantB; +}; + +typedef vk::BufferPointer Globals_p; + +struct TestPushConstant_t +{ + Globals_p m_nBufferDeviceAddress; +}; + +[[vk::push_constant]] TestPushConstant_t g_PushConstants; + +// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32 +// CHECK-DAG: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0 +// CHECK-DAG: [[F1:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 1 +// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4 +// CHECK-DAG: [[CV4FLOAT:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F1]] [[F0]] [[F0]] [[F0]] +// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0 +// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1 +// CHECK: [[GLOBALS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]] +// CHECK: [[PGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GLOBALS]] +// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGLOBALS]] +// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]] +// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]] +// CHECK: [[PPGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGLOBALS]] +// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]] +// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant +// CHECK-DAG: [[OUT]] = OpVariable [[PV4FLOAT1]] Output + +float4 MainPs(void) : SV_Target0 +{ + float4 vTest = float4(1.0,0.0,0.0,0.0); + g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4 = vTest; + return vTest; +} + +// CHECK: [[FUN]] = OpFunction +// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGLOBALS]] [[GPC]] [[S0]] +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]] +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]] +// CHECK: OpStore [[X3]] [[CV4FLOAT]] Aligned 16 +// CHECK: OpStore [[OUT]] [[CV4FLOAT]] +// CHECK: OpFunctionEnd diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 0ca5b0716b..55c3643d95 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -1,6 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details. // +// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +// All rights reserved. +// // See hctdb.py for the implementation of intrinsic file processing. // // Intrinsic declarations are grouped into namespaces that @@ -393,7 +396,13 @@ void [[]] RawBufferStore(in u64 addr, in $funcT value); void [[]] RawBufferStore(in u64 addr, in $funcT value, in uint alignment); void [[]] ext_execution_mode(in uint mode, ...); void [[]] ext_execution_mode_id(in uint mode, ...); +$funcT2 [[]] static_pointer_cast(in VkBufferPointer ptr); +$funcT2 [[]] reinterpret_pointer_cast(in VkBufferPointer ptr); + +} namespace +namespace BufferPointerMethods { +$classT [[ro]] GetBufferContents(); } namespace // SPIRV Change Ends @@ -1147,4 +1156,3 @@ $classT [[]] SubpassLoad(in int sample) : subpassinputms_load; } namespace // SPIRV Change Ends - diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 05bc7d472d..5eb35fb52a 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1,5 +1,7 @@ # Copyright (C) Microsoft Corporation. All rights reserved. # This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details. +# Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. +# All rights reserved. ############################################################################### # DXIL information. # ############################################################################### @@ -8584,6 +8586,7 @@ def __init__(self, intrinsic_defs, opcode_data): "GroupNodeOutputRecords": "LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS", "ThreadNodeOutputRecords": "LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS", "DxHitObject": "LICOMPTYPE_HIT_OBJECT", + "VkBufferPointer": "LICOMPTYPE_VK_BUFFER_POINTER", } self.trans_rowcol = {"r": "IA_R", "c": "IA_C", "r2": "IA_R2", "c2": "IA_C2"} @@ -8645,7 +8648,8 @@ def load_intrinsics(self, intrinsic_defs): (?:RW)?(?:Texture\w*|ByteAddressBuffer) | acceleration_struct | ray_desc | RayQuery | DxHitObject | Node\w* | RWNode\w* | EmptyNode\w* | - AnyNodeOutput\w* | NodeOutputRecord\w* | GroupShared\w* + AnyNodeOutput\w* | NodeOutputRecord\w* | GroupShared\w* | + VkBufferPointer $)""", flags=re.VERBOSE, ) @@ -8697,6 +8701,10 @@ def process_arg(desc, idx, done_args, intrinsic_name): template_id = "-3" component_id = "0" type_name = "void" + elif type_name == "$funcT2": + template_id = "-4" + component_id = "0" + type_name = "void" elif type_name == "...": assert idx != 0, "'...' can only be used in the parameter list" template_id = "-2" @@ -8825,6 +8833,8 @@ def do_object(m): template_id = "INTRIN_TEMPLATE_VARARGS" elif template_id == "-3": template_id = "INTRIN_TEMPLATE_FROM_FUNCTION" + elif template_id == "-4": + template_id = "INTRIN_TEMPLATE_FROM_FUNCTION_2" if component_id == "-1": component_id = "INTRIN_COMPTYPE_FROM_TYPE_ELT0" if component_id == "-2": diff --git a/utils/hct/hlsl_intrinsic_opcodes.json b/utils/hct/hlsl_intrinsic_opcodes.json index 4c85069488..c4527277cd 100644 --- a/utils/hct/hlsl_intrinsic_opcodes.json +++ b/utils/hct/hlsl_intrinsic_opcodes.json @@ -1,6 +1,6 @@ { "IntrinsicOpCodes": { - "Num_Intrinsics": 360, + "Num_Intrinsics": 363, "IOP_AcceptHitAndEndSearch": 0, "IOP_AddUint64": 1, "IOP_AllMemoryBarrier": 2, @@ -360,6 +360,9 @@ "MOP_InterlockedUMax": 356, "MOP_InterlockedUMin": 357, "MOP_DxHitObject_MakeNop": 358, - "IOP_DxMaybeReorderThread": 359 + "IOP_DxMaybeReorderThread": 359, + "IOP_Vkreinterpret_pointer_cast": 360, + "IOP_Vkstatic_pointer_cast": 361, + "MOP_GetBufferContents": 362 } } From 2b1c2e640dae09adf1cb2dd52bc5ce860d73b02b Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Wed, 2 Apr 2025 10:09:22 -0700 Subject: [PATCH 63/88] Fix typo in exec tests comment (#7299) Keep seeing this comment typo and wanted to rectify. --- .../unittests/HLSLExec/ExecutionTest.cpp | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp index 91b42f6b79..6db27d7a41 100644 --- a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp +++ b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp @@ -5632,7 +5632,7 @@ void ExecutionTest::RunBasicShaderModelTest(CComPtr pDevice, std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", - // this callbacked is called when the test is creating the resource to run + // this callback is called when the test is creating the resource to run // the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { UNREFERENCED_PARAMETER(Name); @@ -6999,7 +6999,7 @@ TEST_F(ExecutionTest, UnaryFloatOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryFPOp")); @@ -7067,7 +7067,7 @@ TEST_F(ExecutionTest, BinaryFloatOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp")); @@ -7157,7 +7157,7 @@ TEST_F(ExecutionTest, TertiaryFloatOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp")); @@ -7234,7 +7234,7 @@ TEST_F(ExecutionTest, UnaryHalfOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryFPOp")); @@ -7314,7 +7314,7 @@ TEST_F(ExecutionTest, BinaryHalfOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp")); @@ -7424,7 +7424,7 @@ TEST_F(ExecutionTest, TertiaryHalfOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp")); @@ -7494,7 +7494,7 @@ TEST_F(ExecutionTest, UnaryIntOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryIntOp")); @@ -7554,7 +7554,7 @@ TEST_F(ExecutionTest, UnaryUintOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryUintOp")); @@ -7619,7 +7619,7 @@ TEST_F(ExecutionTest, BinaryIntOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryIntOp")); @@ -7707,7 +7707,7 @@ TEST_F(ExecutionTest, TertiaryIntOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryIntOp")); @@ -7777,7 +7777,7 @@ TEST_F(ExecutionTest, BinaryUintOpTest) { int numExpected = Validation_Expected2->size() == 0 ? 1 : 2; std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryUintOp")); @@ -7869,7 +7869,7 @@ TEST_F(ExecutionTest, TertiaryUintOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryUintOp")); @@ -7948,7 +7948,7 @@ TEST_F(ExecutionTest, UnaryInt16OpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryIntOp")); @@ -8016,7 +8016,7 @@ TEST_F(ExecutionTest, UnaryUint16OpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "UnaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryUintOp")); @@ -8091,7 +8091,7 @@ TEST_F(ExecutionTest, BinaryInt16OpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryIntOp")); @@ -8187,7 +8187,7 @@ TEST_F(ExecutionTest, TertiaryInt16OpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryIntOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryIntOp")); @@ -8264,7 +8264,7 @@ TEST_F(ExecutionTest, BinaryUint16OpTest) { int numExpected = Validation_Expected2->size() == 0 ? 1 : 2; std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryUintOp")); @@ -8363,7 +8363,7 @@ TEST_F(ExecutionTest, TertiaryUint16OpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryUintOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryUintOp")); @@ -8948,7 +8948,7 @@ TEST_F(ExecutionTest, DotTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "DotOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SDotOp")); @@ -9240,7 +9240,7 @@ TEST_F(ExecutionTest, Msad4Test) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "Msad4", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SMsad4")); @@ -9342,7 +9342,7 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp")); @@ -9455,7 +9455,7 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) { std::shared_ptr test = RunShaderOpTest( pDevice, m_support, pStream, "TertiaryFPOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp")); @@ -9883,7 +9883,7 @@ void ExecutionTest::WaveIntrinsicsActivePrefixTest( ++maskIndex) { std::shared_ptr test = RunShaderOpTestAfterParse( pDevice, m_support, "WaveIntrinsicsOp", - // this callbacked is called when the test + // this callback is called when the test // is creating the resource to run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "SWaveIntrinsicsOp")); @@ -12609,7 +12609,7 @@ TEST_F(ExecutionTest, HelperLaneTest) { std::shared_ptr test = RunShaderOpTestAfterParse( pDevice, m_support, "HelperLaneTestNoWave", - // this callbacked is called when the test is creating the resource to + // this callback is called when the test is creating the resource to // run the test [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(0 == _stricmp(Name, "UAVBuffer0")); From 3b1a29bf89520c0159669487feaaac5a98ab8ed5 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Wed, 2 Apr 2025 16:19:43 -0700 Subject: [PATCH 64/88] [OMM] Add DXR Entry point test, non-library target test, conforming tests to spec. (#7281) This PR adds 2 tests that were mentioned in the spec that haven't yet been added. 1. A test that makes sure that restricted flags are diagnosed in DXR entry shaders. 2. A test that makes sure that no diagnostics are emitted when a restricted flag is used for a subobject in a non-library shadaer target. Fixes https://github.com/microsoft/DirectXShaderCompiler/issues/7282 --- .../SemaHLSL/rayquery-omm-DXR-entry-point.hlsl | 17 +++++++++++++++++ .../test/SemaHLSL/rayquery-omm-type-diag.hlsl | 4 ++-- .../raytracingpipelineconfig1-no-errors.hlsl | 12 ++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl create mode 100644 tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl new file mode 100644 index 0000000000..722187cf43 --- /dev/null +++ b/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl @@ -0,0 +1,17 @@ +// RUN: %dxc -T lib_6_3 -validator-version 1.8 -verify %s + +// expected-warning@+1{{potential misuse of built-in constant 'RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_3; introduced in shader model 6.9}} +RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS }; + +RaytracingAccelerationStructure RTAS; +// DXR entry to test that restricted flags are diagnosed. +[shader("raygeneration")] +void main(void) { + RayDesc rayDesc; + + // expected-warning@+2{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model lib_6_3; introduced in shader model 6.9}} + // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_3; introduced in shader model 6.9}} + RayQuery rayQuery; + // expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model lib_6_3; introduced in shader model 6.9}} + rayQuery.TraceRayInline(RTAS, RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc); +} diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl index 981788a688..5e484d193e 100644 --- a/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl +++ b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl @@ -1,5 +1,5 @@ -// RUN: %dxc -T vs_6_9 -E RayQueryTests -verify %s -// RUN: %dxc -T vs_6_5 -E RayQueryTests2 -verify %s +// RUN: %dxc -T vs_6_9 -verify %s +// RUN: %dxc -T vs_6_5 -verify %s // validate 2nd template argument flags // expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}} diff --git a/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl new file mode 100644 index 0000000000..272a46a87e --- /dev/null +++ b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl @@ -0,0 +1,12 @@ +// RUN: %dxc -T ps_6_0 -verify %s + +// expected-no-diagnostics +// No diagnostic is expected because this is a non-library target, +// and SubObjects are ignored on non-library targets. + +RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS }; + +[shader("pixel")] +int main(int i : INDEX) : SV_Target { + return 1; +} From 65564102a78a99b191228cc88ef4ccee2f987783 Mon Sep 17 00:00:00 2001 From: Cassandra Beckley Date: Wed, 2 Apr 2025 18:38:02 -0700 Subject: [PATCH 65/88] [SPIR-V] Implement QuadAny and QuadAll (#7266) If `"SPV_KHR_quad_control"` can be used, uses `OpGroupNonUniformQuadAnyKHR` and `OpGroupNonUniformQuadAllKHR`. If not, falls back to constructing the value using `OpGroupNonUniformQuadSwap`. Fixes #7247 --- docs/SPIR-V.rst | 8 +++ .../include/clang/SPIRV/FeatureManager.h | 1 + .../clang/include/clang/SPIRV/SpirvBuilder.h | 2 +- .../include/clang/SPIRV/SpirvInstruction.h | 8 +-- tools/clang/lib/SPIRV/CapabilityVisitor.cpp | 3 ++ tools/clang/lib/SPIRV/EmitVisitor.cpp | 7 +-- tools/clang/lib/SPIRV/FeatureManager.cpp | 3 ++ tools/clang/lib/SPIRV/SpirvBuilder.cpp | 2 +- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 51 +++++++++++++++++++ tools/clang/lib/SPIRV/SpirvEmitter.h | 4 ++ tools/clang/lib/SPIRV/SpirvInstruction.cpp | 9 +++- .../test/CodeGenSPIRV/sm6.quad-any-all.hlsl | 41 +++++++++++++++ 12 files changed, 130 insertions(+), 9 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst index 899b587492..b5e9c05079 100644 --- a/docs/SPIR-V.rst +++ b/docs/SPIR-V.rst @@ -320,6 +320,7 @@ Supported extensions * SPV_KHR_maximal_reconvergence * SPV_KHR_float_controls * SPV_NV_shader_subgroup_partitioned +* SPV_KHR_quad_control Vulkan specific attributes -------------------------- @@ -4008,6 +4009,8 @@ Quad ``QuadReadAcrossX()`` ``OpGroupNonUniformQuadSwap`` Quad ``QuadReadAcrossY()`` ``OpGroupNonUniformQuadSwap`` Quad ``QuadReadAcrossDiagonal()`` ``OpGroupNonUniformQuadSwap`` Quad ``QuadReadLaneAt()`` ``OpGroupNonUniformQuadBroadcast`` +Quad ``QuadAny()`` ``OpGroupNonUniformQuadAnyKHR`` +Quad ``QuadAll()`` ``OpGroupNonUniformQuadAllKHR`` N/A ``WaveMatch()`` ``OpGroupNonUniformPartitionNV`` Multiprefix ``WaveMultiPrefixSum()`` ``OpGroupNonUniform*Add`` ``PartitionedExclusiveScanNV`` Multiprefix ``WaveMultiPrefixProduct()`` ``OpGroupNonUniform*Mul`` ``PartitionedExclusiveScanNV`` @@ -4016,6 +4019,11 @@ Multiprefix ``WaveMultiPrefixBitOr()`` ``OpGroupNonUniformLogicalOr`` ` Multiprefix ``WaveMultiPrefixBitXor()`` ``OpGroupNonUniformLogicalXor`` ``PartitionedExclusiveScanNV`` ============= ============================ =================================== ============================== +``QuadAny`` and ``QuadAll`` will use the ``OpGroupNonUniformQuadAnyKHR`` and +``OpGroupNonUniformQuadAllKHR`` instructions if the ``SPV_KHR_quad_control`` +extension is enabled. If it is not, they will fall back to constructing the +value using multiple calls to ``OpGroupNonUniformQuadBroadcast``. + The Implicit ``vk`` Namespace ============================= diff --git a/tools/clang/include/clang/SPIRV/FeatureManager.h b/tools/clang/include/clang/SPIRV/FeatureManager.h index 8a9755ae79..3c1871df37 100644 --- a/tools/clang/include/clang/SPIRV/FeatureManager.h +++ b/tools/clang/include/clang/SPIRV/FeatureManager.h @@ -64,6 +64,7 @@ enum class Extension { KHR_maximal_reconvergence, KHR_float_controls, NV_shader_subgroup_partitioned, + KHR_quad_control, Unknown, }; diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h index ed2cb3b6fd..5e03d1ef96 100644 --- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h +++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h @@ -242,7 +242,7 @@ class SpirvBuilder { /// \brief Creates an operation with the given OpGroupNonUniform* SPIR-V /// opcode. SpirvGroupNonUniformOp *createGroupNonUniformOp( - spv::Op op, QualType resultType, spv::Scope execScope, + spv::Op op, QualType resultType, llvm::Optional execScope, llvm::ArrayRef operands, SourceLocation, llvm::Optional groupOp = llvm::None); diff --git a/tools/clang/include/clang/SPIRV/SpirvInstruction.h b/tools/clang/include/clang/SPIRV/SpirvInstruction.h index 7a7ad3aa4d..f49a295610 100644 --- a/tools/clang/include/clang/SPIRV/SpirvInstruction.h +++ b/tools/clang/include/clang/SPIRV/SpirvInstruction.h @@ -1566,7 +1566,8 @@ class SpirvFunctionCall : public SpirvInstruction { /// \brief OpGroupNonUniform* instructions class SpirvGroupNonUniformOp : public SpirvInstruction { public: - SpirvGroupNonUniformOp(spv::Op opcode, QualType resultType, spv::Scope scope, + SpirvGroupNonUniformOp(spv::Op opcode, QualType resultType, + llvm::Optional scope, llvm::ArrayRef operands, SourceLocation loc, llvm::Optional group); @@ -1580,7 +1581,8 @@ class SpirvGroupNonUniformOp : public SpirvInstruction { bool invokeVisitor(Visitor *v) override; - spv::Scope getExecutionScope() const { return execScope; } + bool hasExecutionScope() const { return execScope.hasValue(); } + spv::Scope getExecutionScope() const { return execScope.getValue(); } llvm::ArrayRef getOperands() const { return operands; } @@ -1598,7 +1600,7 @@ class SpirvGroupNonUniformOp : public SpirvInstruction { } private: - spv::Scope execScope; + llvm::Optional execScope; llvm::SmallVector operands; llvm::Optional groupOp; }; diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp index 6fd0c6d950..24dfdc2e9a 100644 --- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp +++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp @@ -887,6 +887,9 @@ bool CapabilityVisitor::visit(SpirvModule *, Visitor::Phase phase) { addCapability(spv::Capability::InterpolationFunction); + addExtensionAndCapabilitiesIfEnabled(Extension::KHR_quad_control, + {spv::Capability::QuadControlKHR}); + return true; } diff --git a/tools/clang/lib/SPIRV/EmitVisitor.cpp b/tools/clang/lib/SPIRV/EmitVisitor.cpp index 9c0368f7a1..eb00f59632 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.cpp +++ b/tools/clang/lib/SPIRV/EmitVisitor.cpp @@ -1134,9 +1134,10 @@ bool EmitVisitor::visit(SpirvGroupNonUniformOp *inst) { initInstruction(inst); curInst.push_back(inst->getResultTypeId()); curInst.push_back(getOrAssignResultId(inst)); - curInst.push_back(typeHandler.getOrCreateConstantInt( - llvm::APInt(32, static_cast(inst->getExecutionScope())), - context.getUIntType(32), /* isSpecConst */ false)); + if (inst->hasExecutionScope()) + curInst.push_back(typeHandler.getOrCreateConstantInt( + llvm::APInt(32, static_cast(inst->getExecutionScope())), + context.getUIntType(32), /* isSpecConst */ false)); if (inst->hasGroupOp()) curInst.push_back(static_cast(inst->getGroupOp())); for (auto *operand : inst->getOperands()) diff --git a/tools/clang/lib/SPIRV/FeatureManager.cpp b/tools/clang/lib/SPIRV/FeatureManager.cpp index a8ee1de000..7fb449fee9 100644 --- a/tools/clang/lib/SPIRV/FeatureManager.cpp +++ b/tools/clang/lib/SPIRV/FeatureManager.cpp @@ -226,6 +226,7 @@ Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) { .Case("SPV_KHR_float_controls", Extension::KHR_float_controls) .Case("SPV_NV_shader_subgroup_partitioned", Extension::NV_shader_subgroup_partitioned) + .Case("SPV_KHR_quad_control", Extension::KHR_quad_control) .Default(Extension::Unknown); } @@ -297,6 +298,8 @@ const char *FeatureManager::getExtensionName(Extension symbol) { return "SPV_KHR_float_controls"; case Extension::NV_shader_subgroup_partitioned: return "SPV_NV_shader_subgroup_partitioned"; + case Extension::KHR_quad_control: + return "SPV_KHR_quad_control"; default: break; } diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp index 6b3f43fc77..689fc0715f 100644 --- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp +++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp @@ -453,7 +453,7 @@ SpirvSpecConstantBinaryOp *SpirvBuilder::createSpecConstantBinaryOp( } SpirvGroupNonUniformOp *SpirvBuilder::createGroupNonUniformOp( - spv::Op op, QualType resultType, spv::Scope execScope, + spv::Op op, QualType resultType, llvm::Optional execScope, llvm::ArrayRef operands, SourceLocation loc, llvm::Optional groupOp) { assert(insertPoint && "null insert point"); diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 7cc84fa2fc..eed4f6369f 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -9271,6 +9271,10 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { case hlsl::IntrinsicOp::IOP_QuadReadLaneAt: retVal = processWaveQuadWideShuffle(callExpr, hlslOpcode); break; + case hlsl::IntrinsicOp::IOP_QuadAny: + case hlsl::IntrinsicOp::IOP_QuadAll: + retVal = processWaveQuadAnyAll(callExpr, hlslOpcode); + break; case hlsl::IntrinsicOp::IOP_abort: case hlsl::IntrinsicOp::IOP_GetRenderTargetSampleCount: case hlsl::IntrinsicOp::IOP_GetRenderTargetSamplePosition: { @@ -10233,6 +10237,53 @@ SpirvEmitter::processWaveQuadWideShuffle(const CallExpr *callExpr, opcode, retType, spv::Scope::Subgroup, {value, target}, srcLoc); } +SpirvInstruction *SpirvEmitter::processWaveQuadAnyAll(const CallExpr *callExpr, + hlsl::IntrinsicOp op) { + // Signatures: + // bool QuadAny(bool localValue) + // bool QuadAll(bool localValue) + assert(callExpr->getNumArgs() == 1); + assert(op == hlsl::IntrinsicOp::IOP_QuadAny || + op == hlsl::IntrinsicOp::IOP_QuadAll); + featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_1, "Wave Operation", + callExpr->getExprLoc()); + + auto *predicate = doExpr(callExpr->getArg(0)); + const auto srcLoc = callExpr->getExprLoc(); + + if (!featureManager.isExtensionEnabled(Extension::KHR_quad_control)) { + // We can't use QuadAny/QuadAll, so implement them using QuadSwap. We + // will read the value at each quad invocation, then combine them. + + spv::Op reducer = op == hlsl::IntrinsicOp::IOP_QuadAny + ? spv::Op::OpLogicalOr + : spv::Op::OpLogicalAnd; + + SpirvInstruction *result = predicate; + + for (size_t i = 0; i < 3; i++) { + SpirvInstruction *invocationValue = spvBuilder.createGroupNonUniformOp( + spv::Op::OpGroupNonUniformQuadSwap, astContext.BoolTy, + spv::Scope::Subgroup, + {predicate, spvBuilder.getConstantInt(astContext.UnsignedIntTy, + llvm::APInt(32, i))}, + srcLoc); + result = spvBuilder.createBinaryOp(reducer, astContext.BoolTy, result, + invocationValue, srcLoc); + } + + return result; + } + + spv::Op opcode = op == hlsl::IntrinsicOp::IOP_QuadAny + ? spv::Op::OpGroupNonUniformQuadAnyKHR + : spv::Op::OpGroupNonUniformQuadAllKHR; + + return spvBuilder.createGroupNonUniformOp(opcode, astContext.BoolTy, + llvm::Optional(), + {predicate}, srcLoc); +} + SpirvInstruction * SpirvEmitter::processWaveActiveAllEqual(const CallExpr *callExpr) { assert(callExpr->getNumArgs() == 1); diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index 0a5ff308c2..79d2c43c35 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -670,6 +670,10 @@ class SpirvEmitter : public ASTConsumer { SpirvInstruction *processWaveQuadWideShuffle(const CallExpr *, hlsl::IntrinsicOp op); + /// Processes SM6.7 quad any/all. + SpirvInstruction *processWaveQuadAnyAll(const CallExpr *, + hlsl::IntrinsicOp op); + /// Generates the Spir-V instructions needed to implement the given call to /// WaveActiveAllEqual. Returns a pointer to the instruction that produces the /// final result. diff --git a/tools/clang/lib/SPIRV/SpirvInstruction.cpp b/tools/clang/lib/SPIRV/SpirvInstruction.cpp index 6deb11d946..f41de03adc 100644 --- a/tools/clang/lib/SPIRV/SpirvInstruction.cpp +++ b/tools/clang/lib/SPIRV/SpirvInstruction.cpp @@ -705,7 +705,7 @@ SpirvFunctionCall::SpirvFunctionCall(QualType resultType, SourceLocation loc, function(fn), args(argsVec.begin(), argsVec.end()) {} SpirvGroupNonUniformOp::SpirvGroupNonUniformOp( - spv::Op op, QualType resultType, spv::Scope scope, + spv::Op op, QualType resultType, llvm::Optional scope, llvm::ArrayRef operandsVec, SourceLocation loc, llvm::Optional group) : SpirvInstruction(IK_GroupNonUniformOp, op, resultType, loc), @@ -737,6 +737,8 @@ SpirvGroupNonUniformOp::SpirvGroupNonUniformOp( case spv::Op::OpGroupNonUniformLogicalAnd: case spv::Op::OpGroupNonUniformLogicalOr: case spv::Op::OpGroupNonUniformLogicalXor: + case spv::Op::OpGroupNonUniformQuadAnyKHR: + case spv::Op::OpGroupNonUniformQuadAllKHR: assert(operandsVec.size() == 1); break; @@ -768,6 +770,11 @@ SpirvGroupNonUniformOp::SpirvGroupNonUniformOp( assert(false && "Unexpected Group non-uniform opcode"); break; } + + if (op != spv::Op::OpGroupNonUniformQuadAnyKHR && + op != spv::Op::OpGroupNonUniformQuadAllKHR) { + assert(scope.hasValue()); + } } SpirvImageOp::SpirvImageOp( diff --git a/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl b/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl new file mode 100644 index 0000000000..fb9f6e0d76 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl @@ -0,0 +1,41 @@ +// RUN: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.1 -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.1 -fspv-extension=SPV_KHR_16bit_storage -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,NOQUAD +// RUN: not %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.0 -fcgl %s -spirv 2>&1 | FileCheck %s --check-prefixes=ERROR + +// CHECK: ; Version: 1.3 + +// QUAD: OpCapability QuadControlKHR +// QUAD: OpExtension "SPV_KHR_quad_control" + +RWStructuredBuffer values; + +[numthreads(32, 1, 1)] +void main(uint3 id: SV_DispatchThreadID) { + uint outIdx = (id.y * 8) + id.x; + +// CHECK: [[val1:%[0-9]+]] = OpIEqual %bool {{%[0-9]+}} +// QUAD-NEXT: {{%[0-9]+}} = OpGroupNonUniformQuadAnyKHR %bool [[val1]] + +// NOQUAD-NEXT: [[inv0:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_0 +// NOQUAD-NEXT: [[or0:%[0-9]+]] = OpLogicalOr %bool [[val1]] [[inv0]] +// NOQUAD-NEXT: [[inv1:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_1 +// NOQUAD-NEXT: [[or1:%[0-9]+]] = OpLogicalOr %bool [[or0]] [[inv1]] +// NOQUAD-NEXT: [[inv2:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_2 +// NOQUAD-NEXT: [[or2:%[0-9]+]] = OpLogicalOr %bool [[or1]] [[inv2]] + +// ERROR: 27:24: error: Vulkan 1.1 is required for Wave Operation but not permitted to use + values[outIdx].x = QuadAny(outIdx % 4 == 0) ? 1.0 : 2.0; + +// CHECK: [[val2:%[0-9]+]] = OpIEqual %bool {{%[0-9]+}} +// QUAD-NEXT: {{%[0-9]+}} = OpGroupNonUniformQuadAllKHR %bool [[val2]] + +// NOQUAD-NEXT: [[inv0:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_0 +// NOQUAD-NEXT: [[or0:%[0-9]+]] = OpLogicalAnd %bool [[val2]] [[inv0]] +// NOQUAD-NEXT: [[inv1:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_1 +// NOQUAD-NEXT: [[or1:%[0-9]+]] = OpLogicalAnd %bool [[or0]] [[inv1]] +// NOQUAD-NEXT: [[inv2:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_2 +// NOQUAD-NEXT: [[or2:%[0-9]+]] = OpLogicalAnd %bool [[or1]] [[inv2]] + +// ERROR: 40:24: error: Vulkan 1.1 is required for Wave Operation but not permitted to use + values[outIdx].y = QuadAll(outIdx % 2 == 0) ? 3.0 : 4.0; +} From 90102440f822dde23d1ee1e6b2970db2aaf1f849 Mon Sep 17 00:00:00 2001 From: Urs Hanselmann <6864721+urshanselmann@users.noreply.github.com> Date: Thu, 3 Apr 2025 15:55:41 +0200 Subject: [PATCH 66/88] Add UUID compiler extension check on Clang (#7286) Fixes #7248 Fix Clang Compilation on Linux without Microsoft extensions enabled. ## Rationale Clang support depends on the `-fms-extensions` compiler flag. [[1]](https://clang.llvm.org/docs/UsersManual.html#microsoft-extensions) If enabled, the `_MSC_EXTENSIONS` macro is defined. [[2]](https://github.com/llvm/llvm-project/blob/19a319667b567a26a20f9829a0ae7e6a5c259cba/clang/lib/Basic/Targets/OSTargets.cpp#L248) --- include/dxc/WinAdapter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/dxc/WinAdapter.h b/include/dxc/WinAdapter.h index b8c6646871..d02ad1ac38 100644 --- a/include/dxc/WinAdapter.h +++ b/include/dxc/WinAdapter.h @@ -51,7 +51,8 @@ #define _countof(a) (sizeof(a) / sizeof(*(a))) // If it is GCC, there is no UUID support and we must emulate it. -#ifndef __clang__ +// Clang support depends on the -fms-extensions compiler flag. +#if !defined(__clang__) || !defined(_MSC_EXTENSIONS) #define __EMULATE_UUID 1 #endif // __clang__ From 6a73640b91f823c4b9d9cc2c89eb2d3d93b0377f Mon Sep 17 00:00:00 2001 From: Chris B Date: Thu, 3 Apr 2025 08:56:07 -0500 Subject: [PATCH 67/88] Update DXC's CONTRIBUTING file (#7265) This change seeks to address some recent questions about how the LLLVM Coding Standards are applied in DXC. --------- Co-authored-by: Ashley Coleman --- CONTRIBUTING.md | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 233211f150..840b4f0f17 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -40,10 +40,32 @@ Before submitting a feature or substantial code contribution please discuss it w ### Coding guidelines -The coding, style, and general engineering guidelines follow those described in the docs/CodingStandards.rst. For additional guidelines in code specific to HLSL, see the docs/HLSLChanges.rst file. +The coding, style, and general engineering guidelines follow those described in the [LLVM Coding Standards](docs/CodingStandards.rst). For additional guidelines in code specific to HLSL, see the [HLSL Changes](docs/HLSLChanges.rst) docs. DXC has adopted a clang-format requirement for all incoming changes to C and C++ files. PRs to DXC should have the *changed code* clang formatted to the LLVM style, and leave the remaining portions of the file unchanged. This can be done using the `git-clang-format` tool or IDE driven workflows. A GitHub action will run on all PRs to validate that the change is properly formatted. +#### Applying LLVM Standards + +All new code contributed to DXC should follow the LLVM coding standards. + +Note that the LLVM Coding Standards have a golden rule: + +> **If you are extending, enhancing, or bug fixing already implemented code, use the style that is already being used so that the source is uniform and easy to follow.** + +The golden rule should continue to be applied to places where DXC is self-consistent. A good example is DXC's common use of `PascalCase` instead of `camelCase` for APIs in some parts of the HLSL implementation. In any place where DXC is not self-consistent new code should follow the LLVM Coding Standard. + +A good secondary rule to follow is: + +> **When in doubt, follow LLVM.** + +Adopting LLVM's coding standards provides a consistent set of rules and guidelines to hold all contributions to. This allows patch authors to clearly understand the expectations placed on contributions, and allows reviewers to have a bar to measure contributions against. Aligning with LLVM by default ensures the path of least resistance for everyone. + +Since many of the LLVM Coding Standards are not enforced automatically we rely on code reviews to provide feedback and ensure contributions align with the expected coding standards. Since we rely on reviewers for enforcement and humans make mistakes, please keep in mind: + +> **Code review is a conversation.** + +It is completely reasonable for a patch author to question feedback and provide additional context about why something was done the way it was. Reviewers often see narrow slices in diffs rather than the full context of a file or part of the compiler, so they may not always provide perfect feedback. This is especially true with the application of the "golden rule" since it depends on understanding a wider context. + ### Documenting Pull Requests Pull request descriptions should have the following format: From c9170e5fc5d39d472af1d5e5c2cf368a4501bc1a Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Thu, 3 Apr 2025 12:42:15 -0400 Subject: [PATCH 68/88] Update SPIRV-Tools (#7303) Fixes #7181 --- external/SPIRV-Tools | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools index 393d5c7df1..4bd1536ed7 160000 --- a/external/SPIRV-Tools +++ b/external/SPIRV-Tools @@ -1 +1 @@ -Subproject commit 393d5c7df150532045c50affffea2df22e8231b0 +Subproject commit 4bd1536ed79003a5194a4bd8c9aa2fa17a84c15b From 85f34327588ded72e949ed438d85653576f144e4 Mon Sep 17 00:00:00 2001 From: Dan Brown <61992655+danbrown-amd@users.noreply.github.com> Date: Thu, 3 Apr 2025 14:44:09 -0600 Subject: [PATCH 69/88] Fixes non-SPIR-V build, broken by PR #7163 ([SPIRV] Implements vk::BufferPointer proposal) (#7306) #ifdef ENABLE_SPIRV_CODEGEN was omitted in several places. --- include/dxc/dxcapi.internal.h | 5 ++++- tools/clang/lib/Sema/SemaHLSL.cpp | 21 +++++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h index f183bb6cf0..d37054194b 100644 --- a/include/dxc/dxcapi.internal.h +++ b/include/dxc/dxcapi.internal.h @@ -132,9 +132,12 @@ enum LEGAL_INTRINSIC_COMPTYPES { LICOMPTYPE_HIT_OBJECT = 51, +#ifdef ENABLE_SPIRV_CODEGEN LICOMPTYPE_VK_BUFFER_POINTER = 52, - LICOMPTYPE_COUNT = 53 +#else + LICOMPTYPE_COUNT = 52 +#endif }; static const BYTE IA_SPECIAL_BASE = 0xf0; diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index f001cb70d9..f9e011f8d4 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -1237,8 +1237,10 @@ static const ArBasicKind g_AnyOutputRecordCT[] = { static const ArBasicKind g_DxHitObjectCT[] = {AR_OBJECT_HIT_OBJECT, AR_BASIC_UNKNOWN}; +#ifdef ENABLE_SPIRV_CODEGEN static const ArBasicKind g_VKBufferPointerCT[] = {AR_OBJECT_VK_BUFFER_POINTER, AR_BASIC_UNKNOWN}; +#endif // Basic kinds, indexed by a LEGAL_INTRINSIC_COMPTYPES value. const ArBasicKind *g_LegalIntrinsicCompTypes[] = { @@ -1295,7 +1297,9 @@ const ArBasicKind *g_LegalIntrinsicCompTypes[] = { g_GroupNodeOutputRecordsCT, // LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS g_DxHitObjectCT, // LICOMPTYPE_HIT_OBJECT - g_VKBufferPointerCT, // LICOMPTYPE_VK_BUFFER_POINTER +#ifdef ENABLE_SPIRV_CODEGEN + g_VKBufferPointerCT, // LICOMPTYPE_VK_BUFFER_POINTER +#endif }; static_assert( ARRAYSIZE(g_LegalIntrinsicCompTypes) == LICOMPTYPE_COUNT, @@ -3587,6 +3591,7 @@ class HLSLExternalSource : public ExternalSemaSource { case LICOMPTYPE_HIT_OBJECT: paramTypes.push_back(GetBasicKindType(AR_OBJECT_HIT_OBJECT)); break; +#ifdef ENABLE_SPIRV_CODEGEN case LICOMPTYPE_VK_BUFFER_POINTER: { const ArBasicKind *match = std::find(g_ArBasicKindsAsTypes, @@ -3600,6 +3605,7 @@ class HLSLExternalSource : public ExternalSemaSource { m_sema->getASTContext().getTypeDeclType(m_objectTypeDecls[index])); break; } +#endif default: DXASSERT(false, "Argument type of intrinsic function is not " "supported"); @@ -4856,7 +4862,10 @@ class HLSLExternalSource : public ExternalSemaSource { case AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY: case AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS: case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS: - case AR_OBJECT_VK_BUFFER_POINTER: { +#ifdef ENABLE_SPIRV_CODEGEN + case AR_OBJECT_VK_BUFFER_POINTER: +#endif + { const ArBasicKind *match = std::find( g_ArBasicKindsAsTypes, &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], kind); @@ -5372,8 +5381,10 @@ class HLSLExternalSource : public ExternalSemaSource { << type << GetMatrixOrVectorElementType(type); } return valid; +#ifdef ENABLE_SPIRV_CODEGEN } else if (hlsl::IsVKBufferPointerType(qt)) { return true; +#endif } else if (qt->isStructureOrClassType()) { const RecordType *recordType = qt->getAs(); objectKind = ClassifyRecordType(recordType); @@ -9751,10 +9762,12 @@ bool HLSLExternalSource::CanConvert(SourceLocation loc, Expr *sourceExpr, return false; } +#ifdef ENABLE_SPIRV_CODEGEN // Cast vk::BufferPointer to pointer address. if (SourceInfo.EltKind == AR_OBJECT_VK_BUFFER_POINTER) { return TargetInfo.EltKind == AR_BASIC_UINT64; } +#endif // Cast cbuffer to its result value. if ((SourceInfo.EltKind == AR_OBJECT_CONSTANT_BUFFER || @@ -11604,6 +11617,7 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) { return false; } +#ifdef ENABLE_SPIRV_CODEGEN static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE, bool isStatic) { const Expr *argExpr = CE->getArg(0); @@ -11627,6 +11641,7 @@ static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE, return false; } +#endif // Check HLSL call constraints, not fatal to creating the AST. void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, @@ -11646,12 +11661,14 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, case hlsl::IntrinsicOp::IOP_Barrier: CheckBarrierCall(*this, FDecl, TheCall); break; +#ifdef ENABLE_SPIRV_CODEGEN case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast: CheckVKBufferPointerCast(*this, FDecl, TheCall, false); break; case hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast: CheckVKBufferPointerCast(*this, FDecl, TheCall, true); break; +#endif default: break; } From e50f599ff302a0ecf08146f6986c738dc4149abb Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Fri, 4 Apr 2025 09:44:57 -0700 Subject: [PATCH 70/88] [NFC] Standardize DxilValidation variable capitalization (#7307) Capitalize all the variables and rename a few in DxilValidation.cpp in keeping with https://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly As this file was easily mistaken for applying to the golden rule: https://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly it is at serious risk of receiving changes that will get hung up by requirements to follow the LLVM coding guidelines. This brings the cases where variable capitalization is not in line with the coding standards to avoid such pitfalls in the future. --- lib/DxilValidation/DxilValidation.cpp | 3288 ++++++++++++------------- 1 file changed, 1644 insertions(+), 1644 deletions(-) diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index cac074adc3..97bde6ca24 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -65,8 +65,8 @@ using std::vector; namespace hlsl { // PrintDiagnosticContext methods. -PrintDiagnosticContext::PrintDiagnosticContext(DiagnosticPrinter &printer) - : m_Printer(printer), m_errorsFound(false), m_warningsFound(false) {} +PrintDiagnosticContext::PrintDiagnosticContext(DiagnosticPrinter &Printer) + : m_Printer(Printer), m_errorsFound(false), m_warningsFound(false) {} bool PrintDiagnosticContext::HasErrors() const { return m_errorsFound; } bool PrintDiagnosticContext::HasWarnings() const { return m_warningsFound; } @@ -97,68 +97,68 @@ struct PSExecutionInfo { }; static unsigned ValidateSignatureRowCol(Instruction *I, - DxilSignatureElement &SE, Value *rowVal, - Value *colVal, EntryStatus &Status, + DxilSignatureElement &SE, Value *RowVal, + Value *ColVal, EntryStatus &Status, ValidationContext &ValCtx) { - if (ConstantInt *constRow = dyn_cast(rowVal)) { - unsigned row = constRow->getLimitedValue(); - if (row >= SE.GetRows()) { - std::string range = std::string("0~") + std::to_string(SE.GetRows()); + if (ConstantInt *ConstRow = dyn_cast(RowVal)) { + unsigned Row = ConstRow->getLimitedValue(); + if (Row >= SE.GetRows()) { + std::string Range = std::string("0~") + std::to_string(SE.GetRows()); ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOperandRange, - {"Row", range, std::to_string(row)}); + {"Row", Range, std::to_string(Row)}); } } - if (!isa(colVal)) { - // col must be const + if (!isa(ColVal)) { + // Col must be const ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst, {"Col", "LoadInput/StoreOutput"}); return 0; } - unsigned col = cast(colVal)->getLimitedValue(); + unsigned Col = cast(ColVal)->getLimitedValue(); - if (col > SE.GetCols()) { - std::string range = std::string("0~") + std::to_string(SE.GetCols()); + if (Col > SE.GetCols()) { + std::string Range = std::string("0~") + std::to_string(SE.GetCols()); ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOperandRange, - {"Col", range, std::to_string(col)}); + {"Col", Range, std::to_string(Col)}); } else { if (SE.IsOutput()) - Status.outputCols[SE.GetID()] |= 1 << col; + Status.outputCols[SE.GetID()] |= 1 << Col; if (SE.IsPatchConstOrPrim()) - Status.patchConstOrPrimCols[SE.GetID()] |= 1 << col; + Status.patchConstOrPrimCols[SE.GetID()] |= 1 << Col; } - return col; + return Col; } static DxilSignatureElement * -ValidateSignatureAccess(Instruction *I, DxilSignature &sig, Value *sigID, - Value *rowVal, Value *colVal, EntryStatus &Status, +ValidateSignatureAccess(Instruction *I, DxilSignature &Sig, Value *SigId, + Value *RowVal, Value *ColVal, EntryStatus &Status, ValidationContext &ValCtx) { - if (!isa(sigID)) { + if (!isa(SigId)) { // inputID must be const ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst, {"SignatureID", "LoadInput/StoreOutput"}); return nullptr; } - unsigned SEIdx = cast(sigID)->getLimitedValue(); - if (sig.GetElements().size() <= SEIdx) { + unsigned SEIdx = cast(SigId)->getLimitedValue(); + if (Sig.GetElements().size() <= SEIdx) { ValCtx.EmitInstrError(I, ValidationRule::InstrOpConstRange); return nullptr; } - DxilSignatureElement &SE = sig.GetElement(SEIdx); - bool isOutput = sig.IsOutput(); + DxilSignatureElement &SE = Sig.GetElement(SEIdx); + bool IsOutput = Sig.IsOutput(); - unsigned col = ValidateSignatureRowCol(I, SE, rowVal, colVal, Status, ValCtx); + unsigned Col = ValidateSignatureRowCol(I, SE, RowVal, ColVal, Status, ValCtx); - if (isOutput && SE.GetSemantic()->GetKind() == DXIL::SemanticKind::Position) { - unsigned mask = Status.OutputPositionMask[SE.GetOutputStream()]; - mask |= 1 << col; + if (IsOutput && SE.GetSemantic()->GetKind() == DXIL::SemanticKind::Position) { + unsigned Mask = Status.OutputPositionMask[SE.GetOutputStream()]; + Mask |= 1 << Col; if (SE.GetOutputStream() < DXIL::kNumOutputStreams) - Status.OutputPositionMask[SE.GetOutputStream()] = mask; + Status.OutputPositionMask[SE.GetOutputStream()] = Mask; } return &SE; } @@ -183,9 +183,9 @@ static DxilResourceProperties GetResourceFromHandle(Value *Handle, return RP; } -static DXIL::SamplerKind GetSamplerKind(Value *samplerHandle, +static DXIL::SamplerKind GetSamplerKind(Value *SamplerHandle, ValidationContext &ValCtx) { - DxilResourceProperties RP = GetResourceFromHandle(samplerHandle, ValCtx); + DxilResourceProperties RP = GetResourceFromHandle(SamplerHandle, ValCtx); if (RP.getResourceClass() != DXIL::ResourceClass::Sampler) { // must be sampler. @@ -200,14 +200,14 @@ static DXIL::SamplerKind GetSamplerKind(Value *samplerHandle, } static DXIL::ResourceKind -GetResourceKindAndCompTy(Value *handle, DXIL::ComponentType &CompTy, +GetResourceKindAndCompTy(Value *Handle, DXIL::ComponentType &CompTy, DXIL::ResourceClass &ResClass, ValidationContext &ValCtx) { CompTy = DXIL::ComponentType::Invalid; ResClass = DXIL::ResourceClass::Invalid; // TODO: validate ROV is used only in PS. - DxilResourceProperties RP = GetResourceFromHandle(handle, ValCtx); + DxilResourceProperties RP = GetResourceFromHandle(Handle, ValCtx); ResClass = RP.getResourceClass(); switch (ResClass) { @@ -230,19 +230,19 @@ GetResourceKindAndCompTy(Value *handle, DXIL::ComponentType &CompTy, return RP.getResourceKind(); } -DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &typeSys, - std::deque &offsets) { +DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &TypeSys, + std::deque &Offsets) { unsigned CurIdx = 1; - unsigned LastIdx = offsets.size() - 1; + unsigned LastIdx = Offsets.size() - 1; DxilStructAnnotation *StructAnnot = nullptr; - for (; CurIdx < offsets.size(); ++CurIdx) { + for (; CurIdx < Offsets.size(); ++CurIdx) { if (const StructType *EltST = dyn_cast(Ty)) { - if (DxilStructAnnotation *EltAnnot = typeSys.GetStructAnnotation(EltST)) { + if (DxilStructAnnotation *EltAnnot = TypeSys.GetStructAnnotation(EltST)) { StructAnnot = EltAnnot; - Ty = EltST->getElementType(offsets[CurIdx]); + Ty = EltST->getElementType(Offsets[CurIdx]); if (CurIdx == LastIdx) { - return &StructAnnot->GetFieldAnnotation(offsets[CurIdx]); + return &StructAnnot->GetFieldAnnotation(Offsets[CurIdx]); } } else { return nullptr; @@ -252,16 +252,16 @@ DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &typeSys, StructAnnot = nullptr; } else { if (StructAnnot) - return &StructAnnot->GetFieldAnnotation(offsets[CurIdx]); + return &StructAnnot->GetFieldAnnotation(Offsets[CurIdx]); } } return nullptr; } -DxilResourceProperties ValidationContext::GetResourceFromVal(Value *resVal) { - auto it = ResPropMap.find(resVal); - if (it != ResPropMap.end()) { - return it->second; +DxilResourceProperties ValidationContext::GetResourceFromVal(Value *ResVal) { + auto It = ResPropMap.find(ResVal); + if (It != ResPropMap.end()) { + return It->second; } else { DxilResourceProperties RP; return RP; @@ -269,34 +269,34 @@ DxilResourceProperties ValidationContext::GetResourceFromVal(Value *resVal) { } struct ResRetUsage { - bool x; - bool y; - bool z; - bool w; - bool status; - ResRetUsage() : x(false), y(false), z(false), w(false), status(false) {} + bool X; + bool Y; + bool Z; + bool W; + bool Status; + ResRetUsage() : X(false), Y(false), Z(false), W(false), Status(false) {} }; -static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet, +static void CollectGetDimResRetUsage(ResRetUsage &Usage, Instruction *ResRet, ValidationContext &ValCtx) { for (User *U : ResRet->users()) { if (ExtractValueInst *EVI = dyn_cast(U)) { - for (unsigned idx : EVI->getIndices()) { - switch (idx) { + for (unsigned Idx : EVI->getIndices()) { + switch (Idx) { case 0: - usage.x = true; + Usage.X = true; break; case 1: - usage.y = true; + Usage.Y = true; break; case 2: - usage.z = true; + Usage.Z = true; break; case 3: - usage.w = true; + Usage.W = true; break; case DXIL::kResRetStatusIndex: - usage.status = true; + Usage.Status = true; break; default: // Emit index out of bound. @@ -306,7 +306,7 @@ static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet, } } } else if (PHINode *PHI = dyn_cast(U)) { - CollectGetDimResRetUsage(usage, PHI, ValCtx); + CollectGetDimResRetUsage(Usage, PHI, ValCtx); } else { Instruction *User = cast(U); ValCtx.EmitInstrError(User, ValidationRule::InstrDxilStructUser); @@ -314,18 +314,18 @@ static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet, } } -static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind resKind, - ArrayRef coords, +static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind ResKind, + ArrayRef Coords, ValidationContext &ValCtx) { - const unsigned kMaxNumCoords = 4; - unsigned numCoords = DxilResource::GetNumCoords(resKind); - for (unsigned i = 0; i < kMaxNumCoords; i++) { - if (i < numCoords) { - if (isa(coords[i])) { + const unsigned KMaxNumCoords = 4; + unsigned NumCoords = DxilResource::GetNumCoords(ResKind); + for (unsigned I = 0; I < KMaxNumCoords; I++) { + if (I < NumCoords) { + if (isa(Coords[I])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateMiss); } } else { - if (!isa(coords[i])) { + if (!isa(Coords[I])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateTooMany); } @@ -334,18 +334,18 @@ static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind resKind, } static void ValidateCalcLODResourceDimensionCoord(CallInst *CI, - DXIL::ResourceKind resKind, - ArrayRef coords, + DXIL::ResourceKind ResKind, + ArrayRef Coords, ValidationContext &ValCtx) { const unsigned kMaxNumDimCoords = 3; - unsigned numCoords = DxilResource::GetNumDimensionsForCalcLOD(resKind); - for (unsigned i = 0; i < kMaxNumDimCoords; i++) { - if (i < numCoords) { - if (isa(coords[i])) { + unsigned NumCoords = DxilResource::GetNumDimensionsForCalcLOD(ResKind); + for (unsigned I = 0; I < kMaxNumDimCoords; I++) { + if (I < NumCoords) { + if (isa(Coords[I])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateMiss); } } else { - if (!isa(coords[i])) { + if (!isa(Coords[I])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateTooMany); } @@ -353,21 +353,21 @@ static void ValidateCalcLODResourceDimensionCoord(CallInst *CI, } } -static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind, - ArrayRef offsets, +static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind ResKind, + ArrayRef Offsets, ValidationContext &ValCtx) { const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel(); - unsigned numOffsets = DxilResource::GetNumOffsets(resKind); - bool hasOffset = !isa(offsets[0]); + unsigned NumOffsets = DxilResource::GetNumOffsets(ResKind); + bool HasOffset = !isa(Offsets[0]); - auto validateOffset = [&](Value *offset) { + auto ValidateOffset = [&](Value *Offset) { // 6.7 Advanced Textures allow programmable offsets if (pSM->IsSM67Plus()) return; - if (ConstantInt *cOffset = dyn_cast(offset)) { - int offset = cOffset->getValue().getSExtValue(); - if (offset > 7 || offset < -8) { + if (ConstantInt *cOffset = dyn_cast(Offset)) { + int Offset = cOffset->getValue().getSExtValue(); + if (Offset > 7 || Offset < -8) { ValCtx.EmitInstrError(CI, ValidationRule::InstrTextureOffset); } } else { @@ -375,20 +375,20 @@ static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind, } }; - if (hasOffset) { - validateOffset(offsets[0]); + if (HasOffset) { + ValidateOffset(Offsets[0]); } - for (unsigned i = 1; i < offsets.size(); i++) { - if (i < numOffsets) { - if (hasOffset) { - if (isa(offsets[i])) + for (unsigned I = 1; I < Offsets.size(); I++) { + if (I < NumOffsets) { + if (HasOffset) { + if (isa(Offsets[I])) ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetMiss); else - validateOffset(offsets[i]); + ValidateOffset(Offsets[I]); } } else { - if (!isa(offsets[i])) { + if (!isa(Offsets[I])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany); } } @@ -405,53 +405,53 @@ static void ValidateDerivativeOp(CallInst *CI, ValidationContext &ValCtx) { {"Derivatives in CS/MS/AS", "Shader Model 6.6+"}); } -static void ValidateSampleInst(CallInst *CI, Value *srvHandle, - Value *samplerHandle, ArrayRef coords, - ArrayRef offsets, bool IsSampleC, +static void ValidateSampleInst(CallInst *CI, Value *SrvHandle, + Value *SamplerHandle, ArrayRef Coords, + ArrayRef Offsets, bool IsSampleC, ValidationContext &ValCtx) { if (!IsSampleC) { - if (GetSamplerKind(samplerHandle, ValCtx) != DXIL::SamplerKind::Default) { + if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Default) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSample); } } else { - if (GetSamplerKind(samplerHandle, ValCtx) != + if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Comparison) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSampleC); } } - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(srvHandle, compTy, resClass, ValCtx); - bool isSampleCompTy = compTy == DXIL::ComponentType::F32; - isSampleCompTy |= compTy == DXIL::ComponentType::SNormF32; - isSampleCompTy |= compTy == DXIL::ComponentType::UNormF32; - isSampleCompTy |= compTy == DXIL::ComponentType::F16; - isSampleCompTy |= compTy == DXIL::ComponentType::SNormF16; - isSampleCompTy |= compTy == DXIL::ComponentType::UNormF16; + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(SrvHandle, CompTy, ResClass, ValCtx); + bool IsSampleCompTy = CompTy == DXIL::ComponentType::F32; + IsSampleCompTy |= CompTy == DXIL::ComponentType::SNormF32; + IsSampleCompTy |= CompTy == DXIL::ComponentType::UNormF32; + IsSampleCompTy |= CompTy == DXIL::ComponentType::F16; + IsSampleCompTy |= CompTy == DXIL::ComponentType::SNormF16; + IsSampleCompTy |= CompTy == DXIL::ComponentType::UNormF16; const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel(); if (pSM->IsSM67Plus() && !IsSampleC) { - isSampleCompTy |= compTy == DXIL::ComponentType::I16; - isSampleCompTy |= compTy == DXIL::ComponentType::U16; - isSampleCompTy |= compTy == DXIL::ComponentType::I32; - isSampleCompTy |= compTy == DXIL::ComponentType::U32; + IsSampleCompTy |= CompTy == DXIL::ComponentType::I16; + IsSampleCompTy |= CompTy == DXIL::ComponentType::U16; + IsSampleCompTy |= CompTy == DXIL::ComponentType::I32; + IsSampleCompTy |= CompTy == DXIL::ComponentType::U32; } - if (!isSampleCompTy) { + if (!IsSampleCompTy) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSampleCompType); } - if (resClass != DXIL::ResourceClass::SRV) { + if (ResClass != DXIL::ResourceClass::SRV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForSamplerGather); } - ValidationRule rule = ValidationRule::InstrResourceKindForSample; + ValidationRule Rule = ValidationRule::InstrResourceKindForSample; if (IsSampleC) { - rule = ValidationRule::InstrResourceKindForSampleC; + Rule = ValidationRule::InstrResourceKindForSampleC; } - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture1D: case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2D: @@ -461,64 +461,64 @@ static void ValidateSampleInst(CallInst *CI, Value *srvHandle, break; case DXIL::ResourceKind::Texture3D: if (IsSampleC) { - ValCtx.EmitInstrError(CI, rule); + ValCtx.EmitInstrError(CI, Rule); } break; default: - ValCtx.EmitInstrError(CI, rule); + ValCtx.EmitInstrError(CI, Rule); return; } // Coord match resource kind. - ValidateResourceCoord(CI, resKind, coords, ValCtx); + ValidateResourceCoord(CI, ResKind, Coords, ValCtx); // Offset match resource kind. - ValidateResourceOffset(CI, resKind, offsets, ValCtx); + ValidateResourceOffset(CI, ResKind, Offsets, ValCtx); } -static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle, - ArrayRef coords, ArrayRef offsets, +static void ValidateGather(CallInst *CI, Value *SrvHandle, Value *SamplerHandle, + ArrayRef Coords, ArrayRef Offsets, bool IsSampleC, ValidationContext &ValCtx) { if (!IsSampleC) { - if (GetSamplerKind(samplerHandle, ValCtx) != DXIL::SamplerKind::Default) { + if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Default) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSample); } } else { - if (GetSamplerKind(samplerHandle, ValCtx) != + if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Comparison) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSampleC); } } - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(srvHandle, compTy, resClass, ValCtx); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(SrvHandle, CompTy, ResClass, ValCtx); - if (resClass != DXIL::ResourceClass::SRV) { + if (ResClass != DXIL::ResourceClass::SRV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForSamplerGather); return; } // Coord match resource kind. - ValidateResourceCoord(CI, resKind, coords, ValCtx); + ValidateResourceCoord(CI, ResKind, Coords, ValCtx); // Offset match resource kind. - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture2D: case DXIL::ResourceKind::Texture2DArray: { - bool hasOffset = !isa(offsets[0]); - if (hasOffset) { - if (isa(offsets[1])) { + bool HasOffset = !isa(Offsets[0]); + if (HasOffset) { + if (isa(Offsets[1])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetMiss); } } } break; case DXIL::ResourceKind::TextureCube: case DXIL::ResourceKind::TextureCubeArray: { - if (!isa(offsets[0])) { + if (!isa(Offsets[0])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany); } - if (!isa(offsets[1])) { + if (!isa(Offsets[1])) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany); } } break; @@ -529,21 +529,21 @@ static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle, } } -static unsigned StoreValueToMask(ArrayRef vals) { - unsigned mask = 0; - for (unsigned i = 0; i < 4; i++) { - if (!isa(vals[i])) { - mask |= 1 << i; +static unsigned StoreValueToMask(ArrayRef Vals) { + unsigned Mask = 0; + for (unsigned I = 0; I < 4; I++) { + if (!isa(Vals[I])) { + Mask |= 1 << I; } } - return mask; + return Mask; } -static int GetCBufSize(Value *cbHandle, ValidationContext &ValCtx) { - DxilResourceProperties RP = GetResourceFromHandle(cbHandle, ValCtx); +static int GetCBufSize(Value *CbHandle, ValidationContext &ValCtx) { + DxilResourceProperties RP = GetResourceFromHandle(CbHandle, ValCtx); if (RP.getResourceClass() != DXIL::ResourceClass::CBuffer) { - ValCtx.EmitInstrError(cast(cbHandle), + ValCtx.EmitInstrError(cast(CbHandle), ValidationRule::InstrCBufferClassForCBufferHandle); return -1; } @@ -554,7 +554,7 @@ static int GetCBufSize(Value *cbHandle, ValidationContext &ValCtx) { // Make sure none of the handle arguments are undef / zero-initializer, // Also, do not accept any resource handles with invalid dxil resource // properties -void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode, +void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { for (Value *op : CI->operands()) { @@ -563,13 +563,13 @@ void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode, const Type *pNodeRecordHandleTy = ValCtx.DxilMod.GetOP()->GetNodeRecordHandleType(); - const Type *argTy = op->getType(); - if (argTy == pNodeHandleTy || argTy == pNodeRecordHandleTy || - argTy == pHandleTy) { + const Type *ArgTy = op->getType(); + if (ArgTy == pNodeHandleTy || ArgTy == pNodeRecordHandleTy || + ArgTy == pHandleTy) { if (isa(op) || isa(op)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized); - } else if (argTy == pHandleTy) { + } else if (ArgTy == pHandleTy) { // GetResourceFromHandle will emit an error on an invalid handle GetResourceFromHandle(op, ValCtx); } @@ -577,10 +577,10 @@ void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode, } } -void ValidateHandleArgs(CallInst *CI, DXIL::OpCode opcode, +void ValidateHandleArgs(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { - switch (opcode) { + switch (Opcode) { // TODO: add case DXIL::OpCode::IndexNodeRecordHandle: case DXIL::OpCode::AnnotateHandle: @@ -591,12 +591,12 @@ void ValidateHandleArgs(CallInst *CI, DXIL::OpCode opcode, break; default: - ValidateHandleArgsForInstruction(CI, opcode, ValCtx); + ValidateHandleArgsForInstruction(CI, Opcode, ValCtx); break; } } -static unsigned GetNumVertices(DXIL::InputPrimitive inputPrimitive) { +static unsigned GetNumVertices(DXIL::InputPrimitive InputPrimitive) { const unsigned InputPrimitiveVertexTab[] = { 0, // Undefined = 0, 1, // Point = 1, @@ -641,26 +641,26 @@ static unsigned GetNumVertices(DXIL::InputPrimitive inputPrimitive) { 0, // LastEntry, }; - unsigned primitiveIdx = static_cast(inputPrimitive); - return InputPrimitiveVertexTab[primitiveIdx]; + unsigned PrimitiveIdx = static_cast(InputPrimitive); + return InputPrimitiveVertexTab[PrimitiveIdx]; } -static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, +static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { Function *F = CI->getParent()->getParent(); DxilModule &DM = ValCtx.DxilMod; - bool bIsPatchConstantFunc = false; + bool IsPatchConstantFunc = false; if (!DM.HasDxilEntryProps(F)) { - auto it = ValCtx.PatchConstantFuncMap.find(F); - if (it == ValCtx.PatchConstantFuncMap.end()) { + auto It = ValCtx.PatchConstantFuncMap.find(F); + if (It == ValCtx.PatchConstantFuncMap.end()) { // Missing entry props. ValCtx.EmitInstrError(CI, ValidationRule::InstrSignatureOperationNotInEntry); return; } // Use hull entry instead of patch constant function. - F = it->second.front(); - bIsPatchConstantFunc = true; + F = It->second.front(); + IsPatchConstantFunc = true; } if (!ValCtx.HasEntryStatus(F)) { return; @@ -668,67 +668,67 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, EntryStatus &Status = ValCtx.GetEntryStatus(F); DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F); - DxilFunctionProps &props = EntryProps.props; + DxilFunctionProps &Props = EntryProps.props; DxilEntrySignature &S = EntryProps.sig; - switch (opcode) { + switch (Opcode) { case DXIL::OpCode::LoadInput: { - Value *inputID = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); - DxilSignature &inputSig = S.InputSignature; - Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); - Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); - ValidateSignatureAccess(CI, inputSig, inputID, row, col, Status, ValCtx); - - // Check vertexID in ps/vs. and none array input. - Value *vertexID = + Value *InputId = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); + DxilSignature &InputSig = S.InputSignature; + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); + ValidateSignatureAccess(CI, InputSig, InputId, Row, Col, Status, ValCtx); + + // Check VertexId in ps/vs. and none array input. + Value *VertexId = CI->getArgOperand(DXIL::OperandIndex::kLoadInputVertexIDOpIdx); - bool usedVertexID = vertexID && !isa(vertexID); - if (props.IsVS() || props.IsPS()) { - if (usedVertexID) { - // use vertexID in VS/PS input. + bool UsedVertexId = VertexId && !isa(VertexId); + if (Props.IsVS() || Props.IsPS()) { + if (UsedVertexId) { + // Use VertexId in VS/PS input. ValCtx.EmitInstrError(CI, ValidationRule::SmOperand); return; } } else { - if (ConstantInt *cVertexID = dyn_cast(vertexID)) { - int immVertexID = cVertexID->getValue().getLimitedValue(); - if (cVertexID->getValue().isNegative()) { - immVertexID = cVertexID->getValue().getSExtValue(); + if (ConstantInt *cVertexId = dyn_cast(VertexId)) { + int ImmVertexId = cVertexId->getValue().getLimitedValue(); + if (cVertexId->getValue().isNegative()) { + ImmVertexId = cVertexId->getValue().getSExtValue(); } - const int low = 0; - int high = 0; - if (props.IsGS()) { - DXIL::InputPrimitive inputPrimitive = - props.ShaderProps.GS.inputPrimitive; - high = GetNumVertices(inputPrimitive); - } else if (props.IsDS()) { - high = props.ShaderProps.DS.inputControlPoints; - } else if (props.IsHS()) { - high = props.ShaderProps.HS.inputControlPoints; + const int Low = 0; + int High = 0; + if (Props.IsGS()) { + DXIL::InputPrimitive InputPrimitive = + Props.ShaderProps.GS.inputPrimitive; + High = GetNumVertices(InputPrimitive); + } else if (Props.IsDS()) { + High = Props.ShaderProps.DS.inputControlPoints; + } else if (Props.IsHS()) { + High = Props.ShaderProps.HS.inputControlPoints; } else { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"LoadInput", "VS/HS/DS/GS/PS"}); } - if (immVertexID < low || immVertexID >= high) { - std::string range = std::to_string(low) + "~" + std::to_string(high); + if (ImmVertexId < Low || ImmVertexId >= High) { + std::string Range = std::to_string(Low) + "~" + std::to_string(High); ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrOperandRange, - {"VertexID", range, std::to_string(immVertexID)}); + {"VertexID", Range, std::to_string(ImmVertexId)}); } } } } break; case DXIL::OpCode::DomainLocation: { - Value *colValue = + Value *ColValue = CI->getArgOperand(DXIL::OperandIndex::kDomainLocationColOpIdx); - if (!isa(colValue)) { - // col must be const + if (!isa(ColValue)) { + // Col must be const ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrOpConst, {"Col", "DomainLocation"}); } else { - unsigned col = cast(colValue)->getLimitedValue(); - if (col >= Status.domainLocSize) { + unsigned Col = cast(ColValue)->getLimitedValue(); + if (Col >= Status.domainLocSize) { ValCtx.EmitInstrError(CI, ValidationRule::SmDomainLocationIdxOOB); } } @@ -736,60 +736,60 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::OpCode::StoreOutput: case DXIL::OpCode::StoreVertexOutput: case DXIL::OpCode::StorePrimitiveOutput: { - Value *outputID = + Value *OutputId = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx); - DxilSignature &outputSig = opcode == DXIL::OpCode::StorePrimitiveOutput + DxilSignature &OutputSig = Opcode == DXIL::OpCode::StorePrimitiveOutput ? S.PatchConstOrPrimSignature : S.OutputSignature; - Value *row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx); - Value *col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx); - ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status, ValCtx); + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx); + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx); + ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status, ValCtx); } break; case DXIL::OpCode::OutputControlPointID: { // Only used in hull shader. - Function *func = CI->getParent()->getParent(); + Function *Func = CI->getParent()->getParent(); // Make sure this is inside hs shader entry function. - if (!(props.IsHS() && F == func)) { + if (!(Props.IsHS() && F == Func)) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"OutputControlPointID", "hull function"}); } } break; case DXIL::OpCode::LoadOutputControlPoint: { // Only used in patch constant function. - Function *func = CI->getParent()->getParent(); - if (ValCtx.entryFuncCallSet.count(func) > 0) { + Function *Func = CI->getParent()->getParent(); + if (ValCtx.entryFuncCallSet.count(Func) > 0) { ValCtx.EmitInstrFormatError( CI, ValidationRule::SmOpcodeInInvalidFunction, {"LoadOutputControlPoint", "PatchConstant function"}); } - Value *outputID = + Value *OutputId = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx); - DxilSignature &outputSig = S.OutputSignature; - Value *row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx); - Value *col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx); - ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status, ValCtx); + DxilSignature &OutputSig = S.OutputSignature; + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx); + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx); + ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status, ValCtx); } break; case DXIL::OpCode::StorePatchConstant: { // Only used in patch constant function. - Function *func = CI->getParent()->getParent(); - if (!bIsPatchConstantFunc) { + Function *Func = CI->getParent()->getParent(); + if (!IsPatchConstantFunc) { ValCtx.EmitInstrFormatError( CI, ValidationRule::SmOpcodeInInvalidFunction, {"StorePatchConstant", "PatchConstant function"}); } else { - auto &hullShaders = ValCtx.PatchConstantFuncMap[func]; - for (Function *F : hullShaders) { + auto &HullShaders = ValCtx.PatchConstantFuncMap[Func]; + for (Function *F : HullShaders) { EntryStatus &Status = ValCtx.GetEntryStatus(F); DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F); DxilEntrySignature &S = EntryProps.sig; - Value *outputID = + Value *OutputId = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx); - DxilSignature &outputSig = S.PatchConstOrPrimSignature; - Value *row = + DxilSignature &OutputSig = S.PatchConstOrPrimSignature; + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx); - Value *col = + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx); - ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status, + ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status, ValCtx); } } @@ -807,12 +807,12 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::OpCode::EvalSampleIndex: case DXIL::OpCode::EvalSnapped: { // Eval* share same operand index with load input. - Value *inputID = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); - DxilSignature &inputSig = S.InputSignature; - Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); - Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); + Value *InputId = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); + DxilSignature &InputSig = S.InputSignature; + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); DxilSignatureElement *pSE = ValidateSignatureAccess( - CI, inputSig, inputID, row, col, Status, ValCtx); + CI, InputSig, InputId, Row, Col, Status, ValCtx); if (pSE) { switch (pSE->GetInterpolationMode()->GetKind()) { case DXIL::InterpolationMode::Linear: @@ -836,11 +836,11 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, } break; case DXIL::OpCode::AttributeAtVertex: { Value *Attribute = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx); - DxilSignature &inputSig = S.InputSignature; - Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); - Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); + DxilSignature &InputSig = S.InputSignature; + Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); + Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); DxilSignatureElement *pSE = ValidateSignatureAccess( - CI, inputSig, Attribute, row, col, Status, ValCtx); + CI, InputSig, Attribute, Row, Col, Status, ValCtx); if (pSE && pSE->GetInterpolationMode()->GetKind() != hlsl::InterpolationMode::Kind::Constant) { ValCtx.EmitInstrFormatError( @@ -851,35 +851,35 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::OpCode::CutStream: case DXIL::OpCode::EmitThenCutStream: case DXIL::OpCode::EmitStream: { - if (props.IsGS()) { - auto &GS = props.ShaderProps.GS; - unsigned streamMask = 0; - for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) { - if (GS.streamPrimitiveTopologies[i] != + if (Props.IsGS()) { + auto &GS = Props.ShaderProps.GS; + unsigned StreamMask = 0; + for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) { + if (GS.streamPrimitiveTopologies[I] != DXIL::PrimitiveTopology::Undefined) { - streamMask |= 1 << i; + StreamMask |= 1 << I; } } - Value *streamID = + Value *StreamId = CI->getArgOperand(DXIL::OperandIndex::kStreamEmitCutIDOpIdx); - if (ConstantInt *cStreamID = dyn_cast(streamID)) { - int immStreamID = cStreamID->getValue().getLimitedValue(); - if (cStreamID->getValue().isNegative() || immStreamID >= 4) { + if (ConstantInt *cStreamId = dyn_cast(StreamId)) { + int ImmStreamId = cStreamId->getValue().getLimitedValue(); + if (cStreamId->getValue().isNegative() || ImmStreamId >= 4) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrOperandRange, - {"StreamID", "0~4", std::to_string(immStreamID)}); + {"StreamID", "0~4", std::to_string(ImmStreamId)}); } else { - unsigned immMask = 1 << immStreamID; - if ((streamMask & immMask) == 0) { - std::string range; - for (unsigned i = 0; i < 4; i++) { - if (streamMask & (1 << i)) { - range += std::to_string(i) + " "; + unsigned ImmMask = 1 << ImmStreamId; + if ((StreamMask & ImmMask) == 0) { + std::string Range; + for (unsigned I = 0; I < 4; I++) { + if (StreamMask & (1 << I)) { + Range += std::to_string(I) + " "; } } ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrOperandRange, - {"StreamID", range, std::to_string(immStreamID)}); + {"StreamID", Range, std::to_string(ImmStreamId)}); } } @@ -893,25 +893,25 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, } } break; case DXIL::OpCode::EmitIndices: { - if (!props.IsMS()) { + if (!Props.IsMS()) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"EmitIndices", "Mesh shader"}); } } break; case DXIL::OpCode::SetMeshOutputCounts: { - if (!props.IsMS()) { + if (!Props.IsMS()) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"SetMeshOutputCounts", "Mesh shader"}); } } break; case DXIL::OpCode::GetMeshPayload: { - if (!props.IsMS()) { + if (!Props.IsMS()) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"GetMeshPayload", "Mesh shader"}); } } break; case DXIL::OpCode::DispatchMesh: { - if (!props.IsAS()) { + if (!Props.IsAS()) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, {"DispatchMesh", "Amplification shader"}); } @@ -925,9 +925,9 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode, } } -static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode opcode, +static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { - switch (opcode) { + switch (Opcode) { // Imm input value validation. case DXIL::OpCode::Asin: { DxilInst_Asin I(CI); @@ -973,77 +973,77 @@ static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode opcode, // Validate the type-defined mask compared to the store value mask which // indicates which parts were defined returns true if caller should continue // validation -static bool ValidateStorageMasks(Instruction *I, DXIL::OpCode opcode, - ConstantInt *mask, unsigned stValMask, - bool isTyped, ValidationContext &ValCtx) { - if (!mask) { +static bool ValidateStorageMasks(Instruction *I, DXIL::OpCode Opcode, + ConstantInt *Mask, unsigned StValMask, + bool IsTyped, ValidationContext &ValCtx) { + if (!Mask) { // Mask for buffer store should be immediate. ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst, - {"Mask", hlsl::OP::GetOpCodeName(opcode)}); + {"Mask", hlsl::OP::GetOpCodeName(Opcode)}); return false; } - unsigned uMask = mask->getLimitedValue(); - if (isTyped && uMask != 0xf) { + unsigned UMask = Mask->getLimitedValue(); + if (IsTyped && UMask != 0xf) { ValCtx.EmitInstrError(I, ValidationRule::InstrWriteMaskForTypedUAVStore); } // write mask must be contiguous (.x .xy .xyz or .xyzw) - if (!((uMask == 0xf) || (uMask == 0x7) || (uMask == 0x3) || (uMask == 0x1))) { + if (!((UMask == 0xf) || (UMask == 0x7) || (UMask == 0x3) || (UMask == 0x1))) { ValCtx.EmitInstrError(I, ValidationRule::InstrWriteMaskGapForUAV); } - // If a bit is set in the uMask (expected values) that isn't set in stValMask + // If a bit is set in the UMask (expected values) that isn't set in StValMask // (user provided values) then the user failed to define some of the output // values. - if (uMask & ~stValMask) + if (UMask & ~StValMask) ValCtx.EmitInstrError(I, ValidationRule::InstrUndefinedValueForUAVStore); - else if (uMask != stValMask) + else if (UMask != StValMask) ValCtx.EmitInstrFormatError( I, ValidationRule::InstrWriteMaskMatchValueForUAVStore, - {std::to_string(uMask), std::to_string(stValMask)}); + {std::to_string(UMask), std::to_string(StValMask)}); return true; } -static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, +static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { - switch (opcode) { + switch (Opcode) { case DXIL::OpCode::GetDimensions: { - DxilInst_GetDimensions getDim(CI); - Value *handle = getDim.get_handle(); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(handle, compTy, resClass, ValCtx); + DxilInst_GetDimensions GetDim(CI); + Value *Handle = GetDim.get_handle(); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx); // Check the result component use. - ResRetUsage usage; - CollectGetDimResRetUsage(usage, CI, ValCtx); + ResRetUsage Usage; + CollectGetDimResRetUsage(Usage, CI, ValCtx); // Mip level only for texture. - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture1D: - if (usage.y) { + if (Usage.Y) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"y", "Texture1D"}); } - if (usage.z) { + if (Usage.Z) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"z", "Texture1D"}); } break; case DXIL::ResourceKind::Texture1DArray: - if (usage.z) { + if (Usage.Z) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"z", "Texture1DArray"}); } break; case DXIL::ResourceKind::Texture2D: - if (usage.z) { + if (Usage.Z) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"z", "Texture2D"}); @@ -1052,7 +1052,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::ResourceKind::Texture2DArray: break; case DXIL::ResourceKind::Texture2DMS: - if (usage.z) { + if (Usage.Z) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"z", "Texture2DMS"}); @@ -1063,7 +1063,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::ResourceKind::Texture3D: break; case DXIL::ResourceKind::TextureCube: - if (usage.z) { + if (Usage.Z) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"z", "TextureCube"}); @@ -1075,12 +1075,12 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, case DXIL::ResourceKind::RawBuffer: case DXIL::ResourceKind::TypedBuffer: case DXIL::ResourceKind::TBuffer: { - Value *mip = getDim.get_mipLevel(); - if (!isa(mip)) { + Value *Mip = GetDim.get_mipLevel(); + if (!isa(Mip)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrMipLevelForGetDimension); } - if (resKind != DXIL::ResourceKind::Invalid) { - if (usage.y || usage.z || usage.w) { + if (ResKind != DXIL::ResourceKind::Invalid) { + if (Usage.Y || Usage.Z || Usage.W) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"invalid", "resource"}); @@ -1092,38 +1092,38 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } break; } - if (usage.status) { + if (Usage.Status) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrUndefResultForGetDimension, {"invalid", "resource"}); } } break; case DXIL::OpCode::CalculateLOD: { - DxilInst_CalculateLOD lod(CI); - Value *samplerHandle = lod.get_sampler(); - DXIL::SamplerKind samplerKind = GetSamplerKind(samplerHandle, ValCtx); - if (samplerKind != DXIL::SamplerKind::Default) { + DxilInst_CalculateLOD LOD(CI); + Value *SamplerHandle = LOD.get_sampler(); + DXIL::SamplerKind SamplerKind = GetSamplerKind(SamplerHandle, ValCtx); + if (SamplerKind != DXIL::SamplerKind::Default) { // After SM68, Comparison is supported. if (!ValCtx.DxilMod.GetShaderModel()->IsSM68Plus() || - samplerKind != DXIL::SamplerKind::Comparison) + SamplerKind != DXIL::SamplerKind::Comparison) ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForLOD); } - Value *handle = lod.get_handle(); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(handle, compTy, resClass, ValCtx); - if (resClass != DXIL::ResourceClass::SRV) { + Value *Handle = LOD.get_handle(); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx); + if (ResClass != DXIL::ResourceClass::SRV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForSamplerGather); return; } // Coord match resource. ValidateCalcLODResourceDimensionCoord( - CI, resKind, {lod.get_coord0(), lod.get_coord1(), lod.get_coord2()}, + CI, ResKind, {LOD.get_coord0(), LOD.get_coord1(), LOD.get_coord2()}, ValCtx); - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture1D: case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2D: @@ -1140,67 +1140,67 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, ValidateDerivativeOp(CI, ValCtx); } break; case DXIL::OpCode::TextureGather: { - DxilInst_TextureGather gather(CI); - ValidateGather(CI, gather.get_srv(), gather.get_sampler(), - {gather.get_coord0(), gather.get_coord1(), - gather.get_coord2(), gather.get_coord3()}, - {gather.get_offset0(), gather.get_offset1()}, + DxilInst_TextureGather Gather(CI); + ValidateGather(CI, Gather.get_srv(), Gather.get_sampler(), + {Gather.get_coord0(), Gather.get_coord1(), + Gather.get_coord2(), Gather.get_coord3()}, + {Gather.get_offset0(), Gather.get_offset1()}, /*IsSampleC*/ false, ValCtx); } break; case DXIL::OpCode::TextureGatherCmp: { - DxilInst_TextureGatherCmp gather(CI); - ValidateGather(CI, gather.get_srv(), gather.get_sampler(), - {gather.get_coord0(), gather.get_coord1(), - gather.get_coord2(), gather.get_coord3()}, - {gather.get_offset0(), gather.get_offset1()}, + DxilInst_TextureGatherCmp Gather(CI); + ValidateGather(CI, Gather.get_srv(), Gather.get_sampler(), + {Gather.get_coord0(), Gather.get_coord1(), + Gather.get_coord2(), Gather.get_coord3()}, + {Gather.get_offset0(), Gather.get_offset1()}, /*IsSampleC*/ true, ValCtx); } break; case DXIL::OpCode::Sample: { - DxilInst_Sample sample(CI); + DxilInst_Sample Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ false, ValCtx); ValidateDerivativeOp(CI, ValCtx); } break; case DXIL::OpCode::SampleCmp: { - DxilInst_SampleCmp sample(CI); + DxilInst_SampleCmp Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ true, ValCtx); ValidateDerivativeOp(CI, ValCtx); } break; case DXIL::OpCode::SampleCmpLevel: { // sampler must be comparison mode. - DxilInst_SampleCmpLevel sample(CI); + DxilInst_SampleCmpLevel Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ true, ValCtx); } break; case DXIL::OpCode::SampleCmpLevelZero: { // sampler must be comparison mode. - DxilInst_SampleCmpLevelZero sample(CI); + DxilInst_SampleCmpLevelZero Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ true, ValCtx); } break; case DXIL::OpCode::SampleBias: { - DxilInst_SampleBias sample(CI); - Value *bias = sample.get_bias(); - if (ConstantFP *cBias = dyn_cast(bias)) { - float fBias = cBias->getValueAPF().convertToFloat(); - if (fBias < DXIL::kMinMipLodBias || fBias > DXIL::kMaxMipLodBias) { + DxilInst_SampleBias Sample(CI); + Value *Bias = Sample.get_bias(); + if (ConstantFP *cBias = dyn_cast(Bias)) { + float FBias = cBias->getValueAPF().convertToFloat(); + if (FBias < DXIL::kMinMipLodBias || FBias > DXIL::kMaxMipLodBias) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrImmBiasForSampleB, {std::to_string(DXIL::kMinMipLodBias), @@ -1210,19 +1210,19 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ false, ValCtx); ValidateDerivativeOp(CI, ValCtx); } break; case DXIL::OpCode::SampleCmpBias: { - DxilInst_SampleCmpBias sample(CI); - Value *bias = sample.get_bias(); - if (ConstantFP *cBias = dyn_cast(bias)) { - float fBias = cBias->getValueAPF().convertToFloat(); - if (fBias < DXIL::kMinMipLodBias || fBias > DXIL::kMaxMipLodBias) { + DxilInst_SampleCmpBias Sample(CI); + Value *Bias = Sample.get_bias(); + if (ConstantFP *cBias = dyn_cast(Bias)) { + float FBias = cBias->getValueAPF().convertToFloat(); + if (FBias < DXIL::kMinMipLodBias || FBias > DXIL::kMaxMipLodBias) { ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrImmBiasForSampleB, {std::to_string(DXIL::kMinMipLodBias), @@ -1232,38 +1232,38 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ true, ValCtx); ValidateDerivativeOp(CI, ValCtx); } break; case DXIL::OpCode::SampleGrad: { - DxilInst_SampleGrad sample(CI); + DxilInst_SampleGrad Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ false, ValCtx); } break; case DXIL::OpCode::SampleCmpGrad: { - DxilInst_SampleCmpGrad sample(CI); + DxilInst_SampleCmpGrad Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ true, ValCtx); } break; case DXIL::OpCode::SampleLevel: { - DxilInst_SampleLevel sample(CI); + DxilInst_SampleLevel Sample(CI); ValidateSampleInst( - CI, sample.get_srv(), sample.get_sampler(), - {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(), - sample.get_coord3()}, - {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()}, + CI, Sample.get_srv(), Sample.get_sampler(), + {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(), + Sample.get_coord3()}, + {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()}, /*IsSampleC*/ false, ValCtx); } break; case DXIL::OpCode::CheckAccessFullyMapped: { @@ -1273,53 +1273,53 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped); } else { Value *V = EVI->getOperand(0); - bool isLegal = EVI->getNumIndices() == 1 && + bool IsLegal = EVI->getNumIndices() == 1 && EVI->getIndices()[0] == DXIL::kResRetStatusIndex && ValCtx.DxilMod.GetOP()->IsResRetType(V->getType()); - if (!isLegal) { + if (!IsLegal) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped); } } } break; case DXIL::OpCode::BufferStore: { - DxilInst_BufferStore bufSt(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(bufSt.get_uav(), compTy, resClass, ValCtx); + DxilInst_BufferStore BufSt(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(BufSt.get_uav(), CompTy, ResClass, ValCtx); - if (resClass != DXIL::ResourceClass::UAV) { + if (ResClass != DXIL::ResourceClass::UAV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore); } - ConstantInt *mask = dyn_cast(bufSt.get_mask()); - unsigned stValMask = - StoreValueToMask({bufSt.get_value0(), bufSt.get_value1(), - bufSt.get_value2(), bufSt.get_value3()}); + ConstantInt *Mask = dyn_cast(BufSt.get_mask()); + unsigned StValMask = + StoreValueToMask({BufSt.get_value0(), BufSt.get_value1(), + BufSt.get_value2(), BufSt.get_value3()}); - if (!ValidateStorageMasks(CI, opcode, mask, stValMask, - resKind == DXIL::ResourceKind::TypedBuffer || - resKind == DXIL::ResourceKind::TBuffer, + if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, + ResKind == DXIL::ResourceKind::TypedBuffer || + ResKind == DXIL::ResourceKind::TBuffer, ValCtx)) return; - Value *offset = bufSt.get_coord1(); + Value *Offset = BufSt.get_coord1(); - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::RawBuffer: - if (!isa(offset)) { + if (!isa(Offset)) { ValCtx.EmitInstrError( CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } break; case DXIL::ResourceKind::TypedBuffer: case DXIL::ResourceKind::TBuffer: - if (!isa(offset)) { + if (!isa(Offset)) { ValCtx.EmitInstrError( CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } break; case DXIL::ResourceKind::StructuredBuffer: - if (isa(offset)) { + if (isa(Offset)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForStructBuf); } @@ -1332,26 +1332,26 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } break; case DXIL::OpCode::TextureStore: { - DxilInst_TextureStore texSt(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(texSt.get_srv(), compTy, resClass, ValCtx); + DxilInst_TextureStore TexSt(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(TexSt.get_srv(), CompTy, ResClass, ValCtx); - if (resClass != DXIL::ResourceClass::UAV) { + if (ResClass != DXIL::ResourceClass::UAV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore); } - ConstantInt *mask = dyn_cast(texSt.get_mask()); - unsigned stValMask = - StoreValueToMask({texSt.get_value0(), texSt.get_value1(), - texSt.get_value2(), texSt.get_value3()}); + ConstantInt *Mask = dyn_cast(TexSt.get_mask()); + unsigned StValMask = + StoreValueToMask({TexSt.get_value0(), TexSt.get_value1(), + TexSt.get_value2(), TexSt.get_value3()}); - if (!ValidateStorageMasks(CI, opcode, mask, stValMask, true /*isTyped*/, + if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, true /*IsTyped*/, ValCtx)) return; - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture1D: case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2D: @@ -1367,30 +1367,30 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } } break; case DXIL::OpCode::BufferLoad: { - DxilInst_BufferLoad bufLd(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(bufLd.get_srv(), compTy, resClass, ValCtx); - - if (resClass != DXIL::ResourceClass::SRV && - resClass != DXIL::ResourceClass::UAV) { + DxilInst_BufferLoad BufLd(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(BufLd.get_srv(), CompTy, ResClass, ValCtx); + + if (ResClass != DXIL::ResourceClass::SRV && + ResClass != DXIL::ResourceClass::UAV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad); } - Value *offset = bufLd.get_wot(); + Value *Offset = BufLd.get_wot(); - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::RawBuffer: case DXIL::ResourceKind::TypedBuffer: case DXIL::ResourceKind::TBuffer: - if (!isa(offset)) { + if (!isa(Offset)) { ValCtx.EmitInstrError( CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } break; case DXIL::ResourceKind::StructuredBuffer: - if (isa(offset)) { + if (isa(Offset)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForStructBuf); } @@ -1403,33 +1403,33 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } break; case DXIL::OpCode::TextureLoad: { - DxilInst_TextureLoad texLd(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(texLd.get_srv(), compTy, resClass, ValCtx); - - Value *mipLevel = texLd.get_mipLevelOrSampleCount(); - - if (resClass == DXIL::ResourceClass::UAV) { - bool noOffset = isa(texLd.get_offset0()); - noOffset &= isa(texLd.get_offset1()); - noOffset &= isa(texLd.get_offset2()); - if (!noOffset) { + DxilInst_TextureLoad TexLd(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(TexLd.get_srv(), CompTy, ResClass, ValCtx); + + Value *MipLevel = TexLd.get_mipLevelOrSampleCount(); + + if (ResClass == DXIL::ResourceClass::UAV) { + bool NoOffset = isa(TexLd.get_offset0()); + NoOffset &= isa(TexLd.get_offset1()); + NoOffset &= isa(TexLd.get_offset2()); + if (!NoOffset) { ValCtx.EmitInstrError(CI, ValidationRule::InstrOffsetOnUAVLoad); } - if (!isa(mipLevel)) { - if (resKind != DXIL::ResourceKind::Texture2DMS && - resKind != DXIL::ResourceKind::Texture2DMSArray) + if (!isa(MipLevel)) { + if (ResKind != DXIL::ResourceKind::Texture2DMS && + ResKind != DXIL::ResourceKind::Texture2DMSArray) ValCtx.EmitInstrError(CI, ValidationRule::InstrMipOnUAVLoad); } } else { - if (resClass != DXIL::ResourceClass::SRV) { + if (ResClass != DXIL::ResourceClass::SRV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad); } } - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::Texture1D: case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2D: @@ -1438,7 +1438,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, break; case DXIL::ResourceKind::Texture2DMS: case DXIL::ResourceKind::Texture2DMSArray: { - if (isa(mipLevel)) { + if (isa(MipLevel)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrSampleIndexForLoad2DMS); } } break; @@ -1449,28 +1449,28 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } ValidateResourceOffset( - CI, resKind, - {texLd.get_offset0(), texLd.get_offset1(), texLd.get_offset2()}, + CI, ResKind, + {TexLd.get_offset0(), TexLd.get_offset1(), TexLd.get_offset2()}, ValCtx); } break; case DXIL::OpCode::CBufferLoad: { DxilInst_CBufferLoad CBLoad(CI); - Value *regIndex = CBLoad.get_byteOffset(); - if (ConstantInt *cIndex = dyn_cast(regIndex)) { - int offset = cIndex->getLimitedValue(); - int size = GetCBufSize(CBLoad.get_handle(), ValCtx); - if (size > 0 && offset >= size) { + Value *RegIndex = CBLoad.get_byteOffset(); + if (ConstantInt *cIndex = dyn_cast(RegIndex)) { + int Offset = cIndex->getLimitedValue(); + int Size = GetCBufSize(CBLoad.get_handle(), ValCtx); + if (Size > 0 && Offset >= Size) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCBufferOutOfBound); } } } break; case DXIL::OpCode::CBufferLoadLegacy: { DxilInst_CBufferLoadLegacy CBLoad(CI); - Value *regIndex = CBLoad.get_regIndex(); - if (ConstantInt *cIndex = dyn_cast(regIndex)) { - int offset = cIndex->getLimitedValue() * 16; // 16 bytes align - int size = GetCBufSize(CBLoad.get_handle(), ValCtx); - if (size > 0 && offset >= size) { + Value *RegIndex = CBLoad.get_regIndex(); + if (ConstantInt *cIndex = dyn_cast(RegIndex)) { + int Offset = cIndex->getLimitedValue() * 16; // 16 bytes align + int Size = GetCBufSize(CBLoad.get_handle(), ValCtx); + if (Size > 0 && Offset >= Size) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCBufferOutOfBound); } } @@ -1483,35 +1483,35 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore); } } - DxilInst_RawBufferLoad bufLd(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(bufLd.get_srv(), compTy, resClass, ValCtx); + DxilInst_RawBufferLoad BufLd(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(BufLd.get_srv(), CompTy, ResClass, ValCtx); - if (resClass != DXIL::ResourceClass::SRV && - resClass != DXIL::ResourceClass::UAV) { + if (ResClass != DXIL::ResourceClass::SRV && + ResClass != DXIL::ResourceClass::UAV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad); } - Value *offset = bufLd.get_elementOffset(); - Value *align = bufLd.get_alignment(); - unsigned alignSize = 0; - if (!isa(align)) { + Value *Offset = BufLd.get_elementOffset(); + Value *Align = BufLd.get_alignment(); + unsigned AlignSize = 0; + if (!isa(Align)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } else { - alignSize = bufLd.get_alignment_val(); + AlignSize = BufLd.get_alignment_val(); } - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::RawBuffer: - if (!isa(offset)) { + if (!isa(Offset)) { ValCtx.EmitInstrError( CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } break; case DXIL::ResourceKind::StructuredBuffer: - if (isa(offset)) { + if (isa(Offset)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForStructBuf); } @@ -1530,43 +1530,43 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore); } } - DxilInst_RawBufferStore bufSt(CI); - DXIL::ComponentType compTy; - DXIL::ResourceClass resClass; - DXIL::ResourceKind resKind = - GetResourceKindAndCompTy(bufSt.get_uav(), compTy, resClass, ValCtx); + DxilInst_RawBufferStore BufSt(CI); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(BufSt.get_uav(), CompTy, ResClass, ValCtx); - if (resClass != DXIL::ResourceClass::UAV) { + if (ResClass != DXIL::ResourceClass::UAV) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore); } - ConstantInt *mask = dyn_cast(bufSt.get_mask()); - unsigned stValMask = - StoreValueToMask({bufSt.get_value0(), bufSt.get_value1(), - bufSt.get_value2(), bufSt.get_value3()}); + ConstantInt *Mask = dyn_cast(BufSt.get_mask()); + unsigned StValMask = + StoreValueToMask({BufSt.get_value0(), BufSt.get_value1(), + BufSt.get_value2(), BufSt.get_value3()}); - if (!ValidateStorageMasks(CI, opcode, mask, stValMask, false /*isTyped*/, + if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, false /*IsTyped*/, ValCtx)) return; - Value *offset = bufSt.get_elementOffset(); - Value *align = bufSt.get_alignment(); - unsigned alignSize = 0; - if (!isa(align)) { + Value *Offset = BufSt.get_elementOffset(); + Value *Align = BufSt.get_alignment(); + unsigned AlignSize = 0; + if (!isa(Align)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } else { - alignSize = bufSt.get_alignment_val(); + AlignSize = BufSt.get_alignment_val(); } - switch (resKind) { + switch (ResKind) { case DXIL::ResourceKind::RawBuffer: - if (!isa(offset)) { + if (!isa(Offset)) { ValCtx.EmitInstrError( CI, ValidationRule::InstrCoordinateCountForRawTypedBuf); } break; case DXIL::ResourceKind::StructuredBuffer: - if (isa(offset)) { + if (isa(Offset)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCoordinateCountForStructBuf); } @@ -1578,9 +1578,9 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } } break; case DXIL::OpCode::TraceRay: { - DxilInst_TraceRay traceRay(CI); - Value *hdl = traceRay.get_AccelerationStructure(); - DxilResourceProperties RP = ValCtx.GetResourceFromVal(hdl); + DxilInst_TraceRay TraceRay(CI); + Value *Hdl = TraceRay.get_AccelerationStructure(); + DxilResourceProperties RP = ValCtx.GetResourceFromVal(Hdl); if (RP.getResourceClass() == DXIL::ResourceClass::Invalid) { ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay); return; @@ -1595,12 +1595,12 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } static void ValidateBarrierFlagArg(ValidationContext &ValCtx, CallInst *CI, - Value *Arg, unsigned validMask, - StringRef flagName, StringRef opName) { + Value *Arg, unsigned ValidMask, + StringRef FlagName, StringRef OpName) { if (ConstantInt *CArg = dyn_cast(Arg)) { - if ((CArg->getLimitedValue() & (uint32_t)(~validMask)) != 0) { + if ((CArg->getLimitedValue() & (uint32_t)(~ValidMask)) != 0) { ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrBarrierFlagInvalid, - {flagName, opName}); + {FlagName, OpName}); } } else { ValCtx.EmitInstrError(CI, @@ -1622,35 +1622,35 @@ std::string GetLaunchTypeStr(DXIL::NodeLaunchType LT) { } static void ValidateDxilOperationCallInProfile(CallInst *CI, - DXIL::OpCode opcode, + DXIL::OpCode Opcode, const ShaderModel *pSM, ValidationContext &ValCtx) { - DXIL::ShaderKind shaderKind = + DXIL::ShaderKind ShaderKind = pSM ? pSM->GetKind() : DXIL::ShaderKind::Invalid; llvm::Function *F = CI->getParent()->getParent(); - DXIL::NodeLaunchType nodeLaunchType = DXIL::NodeLaunchType::Invalid; - if (DXIL::ShaderKind::Library == shaderKind) { + DXIL::NodeLaunchType NodeLaunchType = DXIL::NodeLaunchType::Invalid; + if (DXIL::ShaderKind::Library == ShaderKind) { if (ValCtx.DxilMod.HasDxilFunctionProps(F)) { - DxilEntryProps &entryProps = ValCtx.DxilMod.GetDxilEntryProps(F); - shaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind; - if (shaderKind == DXIL::ShaderKind::Node) - nodeLaunchType = entryProps.props.Node.LaunchType; + DxilEntryProps &EntryProps = ValCtx.DxilMod.GetDxilEntryProps(F); + ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind; + if (ShaderKind == DXIL::ShaderKind::Node) + NodeLaunchType = EntryProps.props.Node.LaunchType; } else if (ValCtx.DxilMod.IsPatchConstantShader(F)) - shaderKind = DXIL::ShaderKind::Hull; + ShaderKind = DXIL::ShaderKind::Hull; } // These shader models are treted like compute - bool isCSLike = shaderKind == DXIL::ShaderKind::Compute || - shaderKind == DXIL::ShaderKind::Mesh || - shaderKind == DXIL::ShaderKind::Amplification || - shaderKind == DXIL::ShaderKind::Node; + bool IsCSLike = ShaderKind == DXIL::ShaderKind::Compute || + ShaderKind == DXIL::ShaderKind::Mesh || + ShaderKind == DXIL::ShaderKind::Amplification || + ShaderKind == DXIL::ShaderKind::Node; // Is called from a library function - bool isLibFunc = shaderKind == DXIL::ShaderKind::Library; + bool IsLibFunc = ShaderKind == DXIL::ShaderKind::Library; - ValidateHandleArgs(CI, opcode, ValCtx); + ValidateHandleArgs(CI, Opcode, ValCtx); - switch (opcode) { + switch (Opcode) { // Imm input value validation. case DXIL::OpCode::Asin: case DXIL::OpCode::Acos: @@ -1659,7 +1659,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, case DXIL::OpCode::DerivFineY: case DXIL::OpCode::DerivCoarseX: case DXIL::OpCode::DerivCoarseY: - ValidateImmOperandForMathDxilOp(CI, opcode, ValCtx); + ValidateImmOperandForMathDxilOp(CI, Opcode, ValCtx); break; // Resource validation. case DXIL::OpCode::GetDimensions: @@ -1684,7 +1684,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, case DXIL::OpCode::CBufferLoadLegacy: case DXIL::OpCode::RawBufferLoad: case DXIL::OpCode::RawBufferStore: - ValidateResourceDxilOp(CI, opcode, ValCtx); + ValidateResourceDxilOp(CI, Opcode, ValCtx); break; // Input output. case DXIL::OpCode::LoadInput: @@ -1705,13 +1705,13 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, case DXIL::OpCode::EmitStream: case DXIL::OpCode::EmitThenCutStream: case DXIL::OpCode::CutStream: - ValidateSignatureDxilOp(CI, opcode, ValCtx); + ValidateSignatureDxilOp(CI, Opcode, ValCtx); break; // Special. case DXIL::OpCode::AllocateRayQuery: { // validate flags are immediate and compatible - llvm::Value *constRayFlag = CI->getOperand(1); - if (!llvm::isa(constRayFlag)) { + llvm::Value *ConstRayFlag = CI->getOperand(1); + if (!llvm::isa(ConstRayFlag)) { ValCtx.EmitInstrError(CI, ValidationRule::DeclAllocateRayQueryFlagsAreConst); } @@ -1719,9 +1719,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, } case DXIL::OpCode::AllocateRayQuery2: { // validate flags are immediate and compatible - llvm::Value *constRayFlag = CI->getOperand(1); + llvm::Value *ConstRayFlag = CI->getOperand(1); llvm::Value *RayQueryFlag = CI->getOperand(2); - if (!llvm::isa(constRayFlag) || + if (!llvm::isa(ConstRayFlag) || !llvm::isa(RayQueryFlag)) { ValCtx.EmitInstrError(CI, ValidationRule::DeclAllocateRayQuery2FlagsAreConst); @@ -1730,7 +1730,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, // When the ForceOMM2State ConstRayFlag is given as an argument to // a RayQuery object, AllowOpacityMicromaps is expected // as a RayQueryFlag argument - llvm::ConstantInt *Arg1 = llvm::cast(constRayFlag); + llvm::ConstantInt *Arg1 = llvm::cast(ConstRayFlag); llvm::ConstantInt *Arg2 = llvm::cast(RayQueryFlag); if ((Arg1->getValue().getSExtValue() & (unsigned)DXIL::RayFlag::ForceOMM2State) && @@ -1744,9 +1744,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, } case DXIL::OpCode::BufferUpdateCounter: { - DxilInst_BufferUpdateCounter updateCounter(CI); - Value *handle = updateCounter.get_uav(); - DxilResourceProperties RP = ValCtx.GetResourceFromVal(handle); + DxilInst_BufferUpdateCounter UpdateCounter(CI); + Value *Handle = UpdateCounter.get_uav(); + DxilResourceProperties RP = ValCtx.GetResourceFromVal(Handle); if (!RP.isUAV()) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBufferUpdateCounterOnUAV); @@ -1761,20 +1761,20 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, CI, ValidationRule::InstrBufferUpdateCounterOnResHasCounter); } - Value *inc = updateCounter.get_inc(); - if (ConstantInt *cInc = dyn_cast(inc)) { - bool isInc = cInc->getLimitedValue() == 1; + Value *Inc = UpdateCounter.get_inc(); + if (ConstantInt *cInc = dyn_cast(Inc)) { + bool IsInc = cInc->getLimitedValue() == 1; if (!ValCtx.isLibProfile) { - auto it = ValCtx.HandleResIndexMap.find(handle); - if (it != ValCtx.HandleResIndexMap.end()) { - unsigned resIndex = it->second; - if (ValCtx.UavCounterIncMap.count(resIndex)) { - if (isInc != ValCtx.UavCounterIncMap[resIndex]) { + auto It = ValCtx.HandleResIndexMap.find(Handle); + if (It != ValCtx.HandleResIndexMap.end()) { + unsigned ResIndex = It->second; + if (ValCtx.UavCounterIncMap.count(ResIndex)) { + if (IsInc != ValCtx.UavCounterIncMap[ResIndex]) { ValCtx.EmitInstrError(CI, ValidationRule::InstrOnlyOneAllocConsume); } } else { - ValCtx.UavCounterIncMap[resIndex] = isInc; + ValCtx.UavCounterIncMap[ResIndex] = IsInc; } } @@ -1789,35 +1789,35 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, } break; case DXIL::OpCode::Barrier: { - DxilInst_Barrier barrier(CI); - Value *mode = barrier.get_barrierMode(); - ConstantInt *cMode = dyn_cast(mode); - if (!cMode) { + DxilInst_Barrier Barrier(CI); + Value *Mode = Barrier.get_barrierMode(); + ConstantInt *CMode = dyn_cast(Mode); + if (!CMode) { ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrOpConst, {"Mode", "Barrier"}); return; } - const unsigned uglobal = + const unsigned Uglobal = static_cast(DXIL::BarrierMode::UAVFenceGlobal); - const unsigned g = static_cast(DXIL::BarrierMode::TGSMFence); - const unsigned ut = + const unsigned G = static_cast(DXIL::BarrierMode::TGSMFence); + const unsigned Ut = static_cast(DXIL::BarrierMode::UAVFenceThreadGroup); - unsigned barrierMode = cMode->getLimitedValue(); + unsigned BarrierMode = CMode->getLimitedValue(); - if (isCSLike || isLibFunc) { - bool bHasUGlobal = barrierMode & uglobal; - bool bHasGroup = barrierMode & g; - bool bHasUGroup = barrierMode & ut; - if (bHasUGlobal && bHasUGroup) { + if (IsCSLike || IsLibFunc) { + bool HasUGlobal = BarrierMode & Uglobal; + bool HasGroup = BarrierMode & G; + bool HasUGroup = BarrierMode & Ut; + if (HasUGlobal && HasUGroup) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeUselessUGroup); } - if (!bHasUGlobal && !bHasGroup && !bHasUGroup) { + if (!HasUGlobal && !HasGroup && !HasUGroup) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeNoMemory); } } else { - if (uglobal != barrierMode) { + if (Uglobal != BarrierMode) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS); } } @@ -1831,28 +1831,28 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, ValidateBarrierFlagArg(ValCtx, CI, DI.get_SemanticFlags(), (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask, "semantic", "BarrierByMemoryType"); - if (!isLibFunc && shaderKind != DXIL::ShaderKind::Node && + if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node && OP::BarrierRequiresNode(CI)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode); } - if (!isCSLike && !isLibFunc && OP::BarrierRequiresGroup(CI)) { + if (!IsCSLike && !IsLibFunc && OP::BarrierRequiresGroup(CI)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS); } } break; case DXIL::OpCode::BarrierByNodeRecordHandle: case DXIL::OpCode::BarrierByMemoryHandle: { - std::string opName = opcode == DXIL::OpCode::BarrierByNodeRecordHandle + std::string OpName = Opcode == DXIL::OpCode::BarrierByNodeRecordHandle ? "barrierByNodeRecordHandle" : "barrierByMemoryHandle"; DxilInst_BarrierByMemoryHandle DIMH(CI); ValidateBarrierFlagArg(ValCtx, CI, DIMH.get_SemanticFlags(), (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask, - "semantic", opName); - if (!isLibFunc && shaderKind != DXIL::ShaderKind::Node && + "semantic", OpName); + if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node && OP::BarrierRequiresNode(CI)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode); } - if (!isCSLike && !isLibFunc && OP::BarrierRequiresGroup(CI)) { + if (!IsCSLike && !IsLibFunc && OP::BarrierRequiresGroup(CI)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS); } } break; @@ -1864,7 +1864,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, break; case DXIL::OpCode::AtomicBinOp: case DXIL::OpCode::AtomicCompareExchange: { - Type *pOverloadType = OP::GetOverloadType(opcode, CI->getCalledFunction()); + Type *pOverloadType = OP::GetOverloadType(Opcode, CI->getCalledFunction()); if ((pOverloadType->isIntegerTy(64)) && !pSM->IsSM66Plus()) ValCtx.EmitInstrFormatError( CI, ValidationRule::SmOpcodeInInvalidFunction, @@ -1890,73 +1890,73 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, break; case DXIL::OpCode::ThreadId: // SV_DispatchThreadID - if (shaderKind != DXIL::ShaderKind::Node) { + if (ShaderKind != DXIL::ShaderKind::Node) { break; } - if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting) + if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting) break; ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrSVConflictingLaunchMode, - {"ThreadId", "SV_DispatchThreadID", GetLaunchTypeStr(nodeLaunchType)}); + {"ThreadId", "SV_DispatchThreadID", GetLaunchTypeStr(NodeLaunchType)}); break; case DXIL::OpCode::GroupId: // SV_GroupId - if (shaderKind != DXIL::ShaderKind::Node) { + if (ShaderKind != DXIL::ShaderKind::Node) { break; } - if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting) + if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting) break; ValCtx.EmitInstrFormatError( CI, ValidationRule::InstrSVConflictingLaunchMode, - {"GroupId", "SV_GroupId", GetLaunchTypeStr(nodeLaunchType)}); + {"GroupId", "SV_GroupId", GetLaunchTypeStr(NodeLaunchType)}); break; case DXIL::OpCode::ThreadIdInGroup: // SV_GroupThreadID - if (shaderKind != DXIL::ShaderKind::Node) { + if (ShaderKind != DXIL::ShaderKind::Node) { break; } - if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting || - nodeLaunchType == DXIL::NodeLaunchType::Coalescing) + if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting || + NodeLaunchType == DXIL::NodeLaunchType::Coalescing) break; ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrSVConflictingLaunchMode, {"ThreadIdInGroup", "SV_GroupThreadID", - GetLaunchTypeStr(nodeLaunchType)}); + GetLaunchTypeStr(NodeLaunchType)}); break; case DXIL::OpCode::FlattenedThreadIdInGroup: // SV_GroupIndex - if (shaderKind != DXIL::ShaderKind::Node) { + if (ShaderKind != DXIL::ShaderKind::Node) { break; } - if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting || - nodeLaunchType == DXIL::NodeLaunchType::Coalescing) + if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting || + NodeLaunchType == DXIL::NodeLaunchType::Coalescing) break; ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrSVConflictingLaunchMode, {"FlattenedThreadIdInGroup", "SV_GroupIndex", - GetLaunchTypeStr(nodeLaunchType)}); + GetLaunchTypeStr(NodeLaunchType)}); break; default: - // TODO: make sure every opcode is checked. + // TODO: make sure every Opcode is checked. // Skip opcodes don't need special check. break; } } static bool IsDxilFunction(llvm::Function *F) { - unsigned argSize = F->arg_size(); - if (argSize < 1) { + unsigned ArgSize = F->arg_size(); + if (ArgSize < 1) { // Cannot be a DXIL operation. return false; } @@ -1991,9 +1991,9 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) { } const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel(); - OP *hlslOP = ValCtx.DxilMod.GetOP(); - bool isDxilOp = OP::IsDxilOpFunc(F); - Type *voidTy = Type::getVoidTy(F->getContext()); + OP *HlslOP = ValCtx.DxilMod.GetOP(); + bool IsDxilOp = OP::IsDxilOpFunc(F); + Type *VoidTy = Type::getVoidTy(F->getContext()); for (User *user : F->users()) { CallInst *CI = dyn_cast(user); @@ -2004,32 +2004,32 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) { } // Skip call to external user defined function - if (!isDxilOp) + if (!IsDxilOp) continue; - Value *argOpcode = CI->getArgOperand(0); - ConstantInt *constOpcode = dyn_cast(argOpcode); - if (!constOpcode) { - // opcode not immediate; function body will validate this error. + Value *ArgOpcode = CI->getArgOperand(0); + ConstantInt *ConstOpcode = dyn_cast(ArgOpcode); + if (!ConstOpcode) { + // Opcode not immediate; function body will validate this error. continue; } - unsigned opcode = constOpcode->getLimitedValue(); - if (opcode >= (unsigned)DXIL::OpCode::NumOpCodes) { - // invalid opcode; function body will validate this error. + unsigned Opcode = ConstOpcode->getLimitedValue(); + if (Opcode >= (unsigned)DXIL::OpCode::NumOpCodes) { + // invalid Opcode; function body will validate this error. continue; } - DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode; + DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode; // In some cases, no overloads are provided (void is exclusive to others) - Function *dxilFunc; - if (hlslOP->IsOverloadLegal(dxilOpcode, voidTy)) { - dxilFunc = hlslOP->GetOpFunc(dxilOpcode, voidTy); + Function *DxilFunc; + if (HlslOP->IsOverloadLegal(DxilOpcode, VoidTy)) { + DxilFunc = HlslOP->GetOpFunc(DxilOpcode, VoidTy); } else { - Type *Ty = OP::GetOverloadType(dxilOpcode, CI->getCalledFunction()); + Type *Ty = OP::GetOverloadType(DxilOpcode, CI->getCalledFunction()); try { - if (!hlslOP->IsOverloadLegal(dxilOpcode, Ty)) { + if (!HlslOP->IsOverloadLegal(DxilOpcode, Ty)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrOload); continue; } @@ -2037,75 +2037,75 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) { ValCtx.EmitInstrError(CI, ValidationRule::InstrOload); continue; } - dxilFunc = hlslOP->GetOpFunc(dxilOpcode, Ty); + DxilFunc = HlslOP->GetOpFunc(DxilOpcode, Ty); } - if (!dxilFunc) { - // Cannot find dxilFunction based on opcode and type. + if (!DxilFunc) { + // Cannot find DxilFunction based on Opcode and type. ValCtx.EmitInstrError(CI, ValidationRule::InstrOload); continue; } - if (dxilFunc->getFunctionType() != F->getFunctionType()) { + if (DxilFunc->getFunctionType() != F->getFunctionType()) { ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrCallOload, - {dxilFunc->getName()}); + {DxilFunc->getName()}); continue; } unsigned major = pSM->GetMajor(); unsigned minor = pSM->GetMinor(); if (ValCtx.isLibProfile) { - Function *callingFunction = CI->getParent()->getParent(); + Function *CallingFunction = CI->getParent()->getParent(); DXIL::ShaderKind SK = DXIL::ShaderKind::Library; - if (ValCtx.DxilMod.HasDxilFunctionProps(callingFunction)) - SK = ValCtx.DxilMod.GetDxilFunctionProps(callingFunction).shaderKind; - else if (ValCtx.DxilMod.IsPatchConstantShader(callingFunction)) + if (ValCtx.DxilMod.HasDxilFunctionProps(CallingFunction)) + SK = ValCtx.DxilMod.GetDxilFunctionProps(CallingFunction).shaderKind; + else if (ValCtx.DxilMod.IsPatchConstantShader(CallingFunction)) SK = DXIL::ShaderKind::Hull; - if (!ValidateOpcodeInProfile(dxilOpcode, SK, major, minor)) { + if (!ValidateOpcodeInProfile(DxilOpcode, SK, major, minor)) { // Opcode not available in profile. // produces: "lib_6_3(ps)", or "lib_6_3(anyhit)" for shader types // Or: "lib_6_3(lib)" for library function - std::string shaderModel = pSM->GetName(); - shaderModel += std::string("(") + ShaderModel::GetKindName(SK) + ")"; + std::string ShaderModel = pSM->GetName(); + ShaderModel += std::string("(") + ShaderModel::GetKindName(SK) + ")"; ValCtx.EmitInstrFormatError( CI, ValidationRule::SmOpcode, - {hlslOP->GetOpCodeName(dxilOpcode), shaderModel}); + {HlslOP->GetOpCodeName(DxilOpcode), ShaderModel}); continue; } } else { - if (!ValidateOpcodeInProfile(dxilOpcode, pSM->GetKind(), major, minor)) { + if (!ValidateOpcodeInProfile(DxilOpcode, pSM->GetKind(), major, minor)) { // Opcode not available in profile. ValCtx.EmitInstrFormatError( CI, ValidationRule::SmOpcode, - {hlslOP->GetOpCodeName(dxilOpcode), pSM->GetName()}); + {HlslOP->GetOpCodeName(DxilOpcode), pSM->GetName()}); continue; } } // Check more detail. - ValidateDxilOperationCallInProfile(CI, dxilOpcode, pSM, ValCtx); + ValidateDxilOperationCallInProfile(CI, DxilOpcode, pSM, ValCtx); } } /////////////////////////////////////////////////////////////////////////////// // Instruction validation functions. // -static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) { - if (ST == hlslOP->GetBinaryWithCarryType()) +static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *HlslOP) { + if (ST == HlslOP->GetBinaryWithCarryType()) return true; - if (ST == hlslOP->GetBinaryWithTwoOutputsType()) + if (ST == HlslOP->GetBinaryWithTwoOutputsType()) return true; - if (ST == hlslOP->GetFourI32Type()) + if (ST == HlslOP->GetFourI32Type()) return true; - if (ST == hlslOP->GetFourI16Type()) + if (ST == HlslOP->GetFourI16Type()) return true; - if (ST == hlslOP->GetDimensionsType()) + if (ST == HlslOP->GetDimensionsType()) return true; - if (ST == hlslOP->GetHandleType()) + if (ST == HlslOP->GetHandleType()) return true; - if (ST == hlslOP->GetSamplePosType()) + if (ST == HlslOP->GetSamplePosType()) return true; - if (ST == hlslOP->GetSplitDoubleType()) + if (ST == HlslOP->GetSplitDoubleType()) return true; unsigned EltNum = ST->getNumElements(); @@ -2114,14 +2114,14 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) { case 2: // Check if it's a native vector resret. if (EltTy->isVectorTy()) - return ST == hlslOP->GetResRetType(EltTy); + return ST == HlslOP->GetResRetType(EltTy); LLVM_FALLTHROUGH; case 4: case 8: // 2 for doubles, 8 for halfs. - return ST == hlslOP->GetCBufferRetType(EltTy); + return ST == HlslOP->GetCBufferRetType(EltTy); break; case 5: - return ST == hlslOP->GetResRetType(EltTy); + return ST == HlslOP->GetResRetType(EltTy); break; default: return false; @@ -2132,11 +2132,11 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) { // inner type (UDT struct member) may be: [N dim array of]( UDT struct | scalar // ) scalar type may be: ( float(16|32|64) | int(16|32|64) ) static bool ValidateType(Type *Ty, ValidationContext &ValCtx, - bool bInner = false) { + bool IsInner = false) { DXASSERT_NOMSG(Ty != nullptr); if (Ty->isPointerTy()) { Type *EltTy = Ty->getPointerElementType(); - if (bInner || EltTy->isPointerTy()) { + if (IsInner || EltTy->isPointerTy()) { ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoPtrToPtr); return false; } @@ -2144,7 +2144,7 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, } if (Ty->isArrayTy()) { Type *EltTy = Ty->getArrayElementType(); - if (!bInner && isa(EltTy)) { + if (!IsInner && isa(EltTy)) { // Outermost array should be converted to single-dim, // but arrays inside struct are allowed to be multi-dim ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoMultiDim); @@ -2155,7 +2155,7 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, Ty = EltTy; } if (Ty->isStructTy()) { - bool result = true; + bool Result = true; StructType *ST = cast(Ty); StringRef Name = ST->getName(); @@ -2163,28 +2163,28 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, // Allow handle type. if (ValCtx.HandleTy == Ty) return true; - hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP(); - if (IsDxilBuiltinStructType(ST, hlslOP)) { + hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP(); + if (IsDxilBuiltinStructType(ST, HlslOP)) { ValCtx.EmitTypeError(Ty, ValidationRule::InstrDxilStructUser); - result = false; + Result = false; } ValCtx.EmitTypeError(Ty, ValidationRule::DeclDxilNsReserved); - result = false; + Result = false; } for (auto e : ST->elements()) { - if (!ValidateType(e, ValCtx, /*bInner*/ true)) { - result = false; + if (!ValidateType(e, ValCtx, /*IsInner*/ true)) { + Result = false; } } - return result; + return Result; } if (Ty->isFloatTy() || Ty->isHalfTy() || Ty->isDoubleTy()) { return true; } if (Ty->isIntegerTy()) { - unsigned width = Ty->getIntegerBitWidth(); - if (width != 1 && width != 8 && width != 16 && width != 32 && width != 64) { + unsigned Width = Ty->getIntegerBitWidth(); + if (Width != 1 && Width != 8 && Width != 16 && Width != 32 && Width != 64) { ValCtx.EmitTypeError(Ty, ValidationRule::TypesIntWidth); return false; } @@ -2207,13 +2207,13 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, } static bool GetNodeOperandAsInt(ValidationContext &ValCtx, MDNode *pMD, - unsigned index, uint64_t *pValue) { - *pValue = 0; - if (pMD->getNumOperands() < index) { + unsigned Index, uint64_t *PValue) { + *PValue = 0; + if (pMD->getNumOperands() < Index) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed); return false; } - ConstantAsMetadata *C = dyn_cast(pMD->getOperand(index)); + ConstantAsMetadata *C = dyn_cast(pMD->getOperand(Index)); if (C == nullptr) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed); return false; @@ -2223,7 +2223,7 @@ static bool GetNodeOperandAsInt(ValidationContext &ValCtx, MDNode *pMD, ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed); return false; } - *pValue = CI->getValue().getZExtValue(); + *PValue = CI->getValue().getZExtValue(); return true; } @@ -2237,14 +2237,14 @@ static bool IsPrecise(Instruction &I, ValidationContext &ValCtx) { return false; } - uint64_t val; - if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &val)) { + uint64_t Val; + if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &Val)) { return false; } - if (val == 1) { + if (Val == 1) { return true; } - if (val != 0) { + if (Val != 0) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaValueRange); } return false; @@ -2263,12 +2263,12 @@ static bool IsValueMinPrec(DxilModule &DxilMod, Value *V) { } static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx, - CallInst *setMeshOutputCounts, - CallInst *getMeshPayload) { + CallInst *SetMeshOutputCounts, + CallInst *GetMeshPayload) { if (ValCtx.DxilMod.HasDxilFunctionProps(F)) { - DXIL::ShaderKind shaderKind = + DXIL::ShaderKind ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind; - if (shaderKind != DXIL::ShaderKind::Mesh) + if (ShaderKind != DXIL::ShaderKind::Mesh) return; } else { return; @@ -2277,10 +2277,10 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx, DominatorTreeAnalysis DTA; DominatorTree DT = DTA.run(*F); - for (auto b = F->begin(), bend = F->end(); b != bend; ++b) { - bool foundSetMeshOutputCountsInCurrentBB = false; - for (auto i = b->begin(), iend = b->end(); i != iend; ++i) { - llvm::Instruction &I = *i; + for (auto B = F->begin(), BEnd = F->end(); B != BEnd; ++B) { + bool FoundSetMeshOutputCountsInCurrentBb = false; + for (auto It = B->begin(), ItEnd = B->end(); It != ItEnd; ++It) { + llvm::Instruction &I = *It; // Calls to external functions. CallInst *CI = dyn_cast(&I); @@ -2296,22 +2296,22 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx, continue; } - if (CI == setMeshOutputCounts) { - foundSetMeshOutputCountsInCurrentBB = true; + if (CI == SetMeshOutputCounts) { + FoundSetMeshOutputCountsInCurrentBb = true; } - Value *opcodeVal = CI->getOperand(0); - ConstantInt *OpcodeConst = dyn_cast(opcodeVal); - unsigned opcode = OpcodeConst->getLimitedValue(); - DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode; - - if (dxilOpcode == DXIL::OpCode::StoreVertexOutput || - dxilOpcode == DXIL::OpCode::StorePrimitiveOutput || - dxilOpcode == DXIL::OpCode::EmitIndices) { - if (setMeshOutputCounts == nullptr) { + Value *OpcodeVal = CI->getOperand(0); + ConstantInt *OpcodeConst = dyn_cast(OpcodeVal); + unsigned Opcode = OpcodeConst->getLimitedValue(); + DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode; + + if (DxilOpcode == DXIL::OpCode::StoreVertexOutput || + DxilOpcode == DXIL::OpCode::StorePrimitiveOutput || + DxilOpcode == DXIL::OpCode::EmitIndices) { + if (SetMeshOutputCounts == nullptr) { ValCtx.EmitInstrError( &I, ValidationRule::InstrMissingSetMeshOutputCounts); - } else if (!foundSetMeshOutputCountsInCurrentBB && - !DT.dominates(setMeshOutputCounts->getParent(), + } else if (!FoundSetMeshOutputCountsInCurrentBb && + !DT.dominates(SetMeshOutputCounts->getParent(), I.getParent())) { ValCtx.EmitInstrError( &I, ValidationRule::InstrNonDominatingSetMeshOutputCounts); @@ -2322,61 +2322,61 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx, } } - if (getMeshPayload) { - PointerType *payloadPTy = cast(getMeshPayload->getType()); - StructType *payloadTy = - cast(payloadPTy->getPointerElementType()); + if (GetMeshPayload) { + PointerType *PayloadPTy = cast(GetMeshPayload->getType()); + StructType *PayloadTy = + cast(PayloadPTy->getPointerElementType()); const DataLayout &DL = F->getParent()->getDataLayout(); - unsigned payloadSize = DL.getTypeAllocSize(payloadTy); + unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy); - DxilFunctionProps &prop = ValCtx.DxilMod.GetDxilFunctionProps(F); + DxilFunctionProps &Prop = ValCtx.DxilMod.GetDxilFunctionProps(F); - if (prop.ShaderProps.MS.payloadSizeInBytes < payloadSize) { + if (Prop.ShaderProps.MS.payloadSizeInBytes < PayloadSize) { ValCtx.EmitFnFormatError( F, ValidationRule::SmMeshShaderPayloadSizeDeclared, - {F->getName(), std::to_string(payloadSize), - std::to_string(prop.ShaderProps.MS.payloadSizeInBytes)}); + {F->getName(), std::to_string(PayloadSize), + std::to_string(Prop.ShaderProps.MS.payloadSizeInBytes)}); } - if (prop.ShaderProps.MS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) { + if (Prop.ShaderProps.MS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) { ValCtx.EmitFnFormatError( F, ValidationRule::SmMeshShaderPayloadSize, - {F->getName(), std::to_string(prop.ShaderProps.MS.payloadSizeInBytes), + {F->getName(), std::to_string(Prop.ShaderProps.MS.payloadSizeInBytes), std::to_string(DXIL::kMaxMSASPayloadBytes)}); } } } static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx, - CallInst *dispatchMesh) { + CallInst *DispatchMesh) { if (ValCtx.DxilMod.HasDxilFunctionProps(F)) { - DXIL::ShaderKind shaderKind = + DXIL::ShaderKind ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind; - if (shaderKind != DXIL::ShaderKind::Amplification) + if (ShaderKind != DXIL::ShaderKind::Amplification) return; - if (dispatchMesh) { - DxilInst_DispatchMesh dispatchMeshCall(dispatchMesh); - Value *operandVal = dispatchMeshCall.get_payload(); - Type *payloadTy = operandVal->getType(); + if (DispatchMesh) { + DxilInst_DispatchMesh DispatchMeshCall(DispatchMesh); + Value *OperandVal = DispatchMeshCall.get_payload(); + Type *PayloadTy = OperandVal->getType(); const DataLayout &DL = F->getParent()->getDataLayout(); - unsigned payloadSize = DL.getTypeAllocSize(payloadTy); + unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy); - DxilFunctionProps &prop = ValCtx.DxilMod.GetDxilFunctionProps(F); + DxilFunctionProps &Prop = ValCtx.DxilMod.GetDxilFunctionProps(F); - if (prop.ShaderProps.AS.payloadSizeInBytes < payloadSize) { + if (Prop.ShaderProps.AS.payloadSizeInBytes < PayloadSize) { ValCtx.EmitInstrFormatError( - dispatchMesh, + DispatchMesh, ValidationRule::SmAmplificationShaderPayloadSizeDeclared, - {F->getName(), std::to_string(payloadSize), - std::to_string(prop.ShaderProps.AS.payloadSizeInBytes)}); + {F->getName(), std::to_string(PayloadSize), + std::to_string(Prop.ShaderProps.AS.payloadSizeInBytes)}); } - if (prop.ShaderProps.AS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) { + if (Prop.ShaderProps.AS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) { ValCtx.EmitInstrFormatError( - dispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize, + DispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize, {F->getName(), - std::to_string(prop.ShaderProps.AS.payloadSizeInBytes), + std::to_string(Prop.ShaderProps.AS.payloadSizeInBytes), std::to_string(DXIL::kMaxMSASPayloadBytes)}); } } @@ -2385,7 +2385,7 @@ static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx, return; } - if (dispatchMesh == nullptr) { + if (DispatchMesh == nullptr) { ValCtx.EmitFnError(F, ValidationRule::InstrNotOnceDispatchMesh); return; } @@ -2393,30 +2393,30 @@ static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx, PostDominatorTree PDT; PDT.runOnFunction(*F); - if (!PDT.dominates(dispatchMesh->getParent(), &F->getEntryBlock())) { - ValCtx.EmitInstrError(dispatchMesh, + if (!PDT.dominates(DispatchMesh->getParent(), &F->getEntryBlock())) { + ValCtx.EmitInstrError(DispatchMesh, ValidationRule::InstrNonDominatingDispatchMesh); } - Function *dispatchMeshFunc = dispatchMesh->getCalledFunction(); - FunctionType *dispatchMeshFuncTy = dispatchMeshFunc->getFunctionType(); - PointerType *payloadPTy = - cast(dispatchMeshFuncTy->getParamType(4)); - StructType *payloadTy = cast(payloadPTy->getPointerElementType()); + Function *DispatchMeshFunc = DispatchMesh->getCalledFunction(); + FunctionType *DispatchMeshFuncTy = DispatchMeshFunc->getFunctionType(); + PointerType *PayloadPTy = + cast(DispatchMeshFuncTy->getParamType(4)); + StructType *PayloadTy = cast(PayloadPTy->getPointerElementType()); const DataLayout &DL = F->getParent()->getDataLayout(); - unsigned payloadSize = DL.getTypeAllocSize(payloadTy); + unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy); - if (payloadSize > DXIL::kMaxMSASPayloadBytes) { + if (PayloadSize > DXIL::kMaxMSASPayloadBytes) { ValCtx.EmitInstrFormatError( - dispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize, - {F->getName(), std::to_string(payloadSize), + DispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize, + {F->getName(), std::to_string(PayloadSize), std::to_string(DXIL::kMaxMSASPayloadBytes)}); } } -static void ValidateControlFlowHint(BasicBlock &bb, ValidationContext &ValCtx) { +static void ValidateControlFlowHint(BasicBlock &BB, ValidationContext &ValCtx) { // Validate controlflow hint. - TerminatorInst *TI = bb.getTerminator(); + TerminatorInst *TI = BB.getTerminator(); if (!TI) return; @@ -2427,33 +2427,33 @@ static void ValidateControlFlowHint(BasicBlock &bb, ValidationContext &ValCtx) { if (pNode->getNumOperands() < 3) return; - bool bHasBranch = false; - bool bHasFlatten = false; - bool bForceCase = false; + bool HasBranch = false; + bool HasFlatten = false; + bool ForceCase = false; - for (unsigned i = 2; i < pNode->getNumOperands(); i++) { - uint64_t value = 0; - if (GetNodeOperandAsInt(ValCtx, pNode, i, &value)) { - DXIL::ControlFlowHint hint = static_cast(value); - switch (hint) { + for (unsigned I = 2; I < pNode->getNumOperands(); I++) { + uint64_t Value = 0; + if (GetNodeOperandAsInt(ValCtx, pNode, I, &Value)) { + DXIL::ControlFlowHint Hint = static_cast(Value); + switch (Hint) { case DXIL::ControlFlowHint::Flatten: - bHasFlatten = true; + HasFlatten = true; break; case DXIL::ControlFlowHint::Branch: - bHasBranch = true; + HasBranch = true; break; case DXIL::ControlFlowHint::ForceCase: - bForceCase = true; + ForceCase = true; break; default: ValCtx.EmitMetaError(pNode, ValidationRule::MetaInvalidControlFlowHint); } } } - if (bHasBranch && bHasFlatten) { + if (HasBranch && HasFlatten) { ValCtx.EmitMetaError(pNode, ValidationRule::MetaBranchFlatten); } - if (bForceCase && !isa(TI)) { + if (ForceCase && !isa(TI)) { ValCtx.EmitMetaError(pNode, ValidationRule::MetaForceCaseOnSwitch); } } @@ -2466,30 +2466,30 @@ static void ValidateTBAAMetadata(MDNode *Node, ValidationContext &ValCtx) { } } break; case 2: { - MDNode *rootNode = dyn_cast(Node->getOperand(1)); - if (!rootNode) { + MDNode *RootNode = dyn_cast(Node->getOperand(1)); + if (!RootNode) { ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed); } else { - ValidateTBAAMetadata(rootNode, ValCtx); + ValidateTBAAMetadata(RootNode, ValCtx); } } break; case 3: { - MDNode *rootNode = dyn_cast(Node->getOperand(1)); - if (!rootNode) { + MDNode *RootNode = dyn_cast(Node->getOperand(1)); + if (!RootNode) { ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed); } else { - ValidateTBAAMetadata(rootNode, ValCtx); + ValidateTBAAMetadata(RootNode, ValCtx); } - ConstantAsMetadata *pointsToConstMem = + ConstantAsMetadata *PointsToConstMem = dyn_cast(Node->getOperand(2)); - if (!pointsToConstMem) { + if (!PointsToConstMem) { ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed); } else { - ConstantInt *isConst = - dyn_cast(pointsToConstMem->getValue()); - if (!isConst) { + ConstantInt *IsConst = + dyn_cast(PointsToConstMem->getValue()); + if (!IsConst) { ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed); - } else if (isConst->getValue().getLimitedValue() > 1) { + } else if (IsConst->getValue().getLimitedValue() > 1) { ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed); } } @@ -2570,11 +2570,11 @@ static void ValidateNonUniformMetadata(Instruction &I, MDNode *pMD, if (pMD->getNumOperands() != 1) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed); } - uint64_t val; - if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &val)) { + uint64_t Val; + if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &Val)) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed); } - if (val != 1) { + if (Val != 1) { ValCtx.EmitMetaError(pMD, ValidationRule::MetaValueRange); } } @@ -2609,31 +2609,31 @@ static void ValidateInstructionMetadata(Instruction *I, } static void ValidateFunctionAttribute(Function *F, ValidationContext &ValCtx) { - AttributeSet attrSet = F->getAttributes().getFnAttributes(); + AttributeSet AttrSet = F->getAttributes().getFnAttributes(); // fp32-denorm-mode - if (attrSet.hasAttribute(AttributeSet::FunctionIndex, + if (AttrSet.hasAttribute(AttributeSet::FunctionIndex, DXIL::kFP32DenormKindString)) { - Attribute attr = attrSet.getAttribute(AttributeSet::FunctionIndex, + Attribute Attr = AttrSet.getAttribute(AttributeSet::FunctionIndex, DXIL::kFP32DenormKindString); - StringRef value = attr.getValueAsString(); - if (!value.equals(DXIL::kFP32DenormValueAnyString) && - !value.equals(DXIL::kFP32DenormValueFtzString) && - !value.equals(DXIL::kFP32DenormValuePreserveString)) { - ValCtx.EmitFnAttributeError(F, attr.getKindAsString(), - attr.getValueAsString()); + StringRef StrValue = Attr.getValueAsString(); + if (!StrValue.equals(DXIL::kFP32DenormValueAnyString) && + !StrValue.equals(DXIL::kFP32DenormValueFtzString) && + !StrValue.equals(DXIL::kFP32DenormValuePreserveString)) { + ValCtx.EmitFnAttributeError(F, Attr.getKindAsString(), + Attr.getValueAsString()); } } // TODO: If validating libraries, we should remove all unknown function // attributes. For each attribute, check if it is a known attribute - for (unsigned I = 0, E = attrSet.getNumSlots(); I != E; ++I) { - for (auto AttrIter = attrSet.begin(I), AttrEnd = attrSet.end(I); + for (unsigned I = 0, E = AttrSet.getNumSlots(); I != E; ++I) { + for (auto AttrIter = AttrSet.begin(I), AttrEnd = AttrSet.end(I); AttrIter != AttrEnd; ++AttrIter) { if (!AttrIter->isStringAttribute()) { continue; } - StringRef kind = AttrIter->getKindAsString(); - if (!kind.equals(DXIL::kFP32DenormKindString) && - !kind.equals(DXIL::kWaveOpsIncludeHelperLanesString)) { + StringRef Kind = AttrIter->getKindAsString(); + if (!Kind.equals(DXIL::kFP32DenormKindString) && + !Kind.equals(DXIL::kWaveOpsIncludeHelperLanesString)) { ValCtx.EmitFnAttributeError(F, AttrIter->getKindAsString(), AttrIter->getValueAsString()); } @@ -2683,10 +2683,10 @@ static bool IsLLVMInstructionAllowedForShaderModel(Instruction &I, ValidationContext &ValCtx) { if (ValCtx.DxilMod.GetShaderModel()->IsSM69Plus()) return true; - unsigned OpCode = I.getOpcode(); - if (OpCode == Instruction::InsertElement || - OpCode == Instruction::ExtractElement || - OpCode == Instruction::ShuffleVector) + unsigned Opcode = I.getOpcode(); + if (Opcode == Instruction::InsertElement || + Opcode == Instruction::ExtractElement || + Opcode == Instruction::ShuffleVector) return false; return true; @@ -2697,16 +2697,16 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { ValCtx.DxilMod.GetGlobalFlags() & DXIL::kEnableMinPrecision; bool SupportsLifetimeIntrinsics = ValCtx.DxilMod.GetShaderModel()->IsSM66Plus(); - SmallVector gradientOps; - SmallVector barriers; - CallInst *setMeshOutputCounts = nullptr; - CallInst *getMeshPayload = nullptr; - CallInst *dispatchMesh = nullptr; - hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP(); + SmallVector GradientOps; + SmallVector Barriers; + CallInst *SetMeshOutputCounts = nullptr; + CallInst *GetMeshPayload = nullptr; + CallInst *DispatchMesh = nullptr; + hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP(); - for (auto b = F->begin(), bend = F->end(); b != bend; ++b) { - for (auto i = b->begin(), iend = b->end(); i != iend; ++i) { - llvm::Instruction &I = *i; + for (auto B = F->begin(), BEnd = F->end(); B != BEnd; ++B) { + for (auto It = B->begin(), ItEnd = B->end(); It != ItEnd; ++It) { + llvm::Instruction &I = *It; if (I.hasMetadata()) { @@ -2745,27 +2745,27 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { continue; } - Value *opcodeVal = CI->getOperand(0); - ConstantInt *OpcodeConst = dyn_cast(opcodeVal); + Value *OpcodeVal = CI->getOperand(0); + ConstantInt *OpcodeConst = dyn_cast(OpcodeVal); if (OpcodeConst == nullptr) { ValCtx.EmitInstrFormatError(&I, ValidationRule::InstrOpConst, {"Opcode", "DXIL operation"}); continue; } - unsigned opcode = OpcodeConst->getLimitedValue(); - if (opcode >= static_cast(DXIL::OpCode::NumOpCodes)) { + unsigned Opcode = OpcodeConst->getLimitedValue(); + if (Opcode >= static_cast(DXIL::OpCode::NumOpCodes)) { ValCtx.EmitInstrFormatError( &I, ValidationRule::InstrIllegalDXILOpCode, {std::to_string((unsigned)DXIL::OpCode::NumOpCodes), - std::to_string(opcode)}); + std::to_string(Opcode)}); continue; } - DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode; + DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode; bool IllegalOpFunc = true; - for (auto &it : hlslOP->GetOpFuncList(dxilOpcode)) { - if (it.second == FCalled) { + for (auto &It : HlslOP->GetOpFuncList(DxilOpcode)) { + if (It.second == FCalled) { IllegalOpFunc = false; break; } @@ -2774,46 +2774,46 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { if (IllegalOpFunc) { ValCtx.EmitInstrFormatError( &I, ValidationRule::InstrIllegalDXILOpFunction, - {FCalled->getName(), OP::GetOpCodeName(dxilOpcode)}); + {FCalled->getName(), OP::GetOpCodeName(DxilOpcode)}); continue; } - if (OP::IsDxilOpGradient(dxilOpcode)) { - gradientOps.push_back(CI); + if (OP::IsDxilOpGradient(DxilOpcode)) { + GradientOps.push_back(CI); } - if (dxilOpcode == DXIL::OpCode::Barrier) { - barriers.push_back(CI); + if (DxilOpcode == DXIL::OpCode::Barrier) { + Barriers.push_back(CI); } // External function validation will check the parameter // list. This function will check that the call does not // violate any rules. - if (dxilOpcode == DXIL::OpCode::SetMeshOutputCounts) { + if (DxilOpcode == DXIL::OpCode::SetMeshOutputCounts) { // validate the call count of SetMeshOutputCounts - if (setMeshOutputCounts != nullptr) { + if (SetMeshOutputCounts != nullptr) { ValCtx.EmitInstrError( &I, ValidationRule::InstrMultipleSetMeshOutputCounts); } - setMeshOutputCounts = CI; + SetMeshOutputCounts = CI; } - if (dxilOpcode == DXIL::OpCode::GetMeshPayload) { + if (DxilOpcode == DXIL::OpCode::GetMeshPayload) { // validate the call count of GetMeshPayload - if (getMeshPayload != nullptr) { + if (GetMeshPayload != nullptr) { ValCtx.EmitInstrError( &I, ValidationRule::InstrMultipleGetMeshPayload); } - getMeshPayload = CI; + GetMeshPayload = CI; } - if (dxilOpcode == DXIL::OpCode::DispatchMesh) { + if (DxilOpcode == DXIL::OpCode::DispatchMesh) { // validate the call count of DispatchMesh - if (dispatchMesh != nullptr) { + if (DispatchMesh != nullptr) { ValCtx.EmitInstrError(&I, ValidationRule::InstrNotOnceDispatchMesh); } - dispatchMesh = CI; + DispatchMesh = CI; } } continue; @@ -2821,23 +2821,23 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { for (Value *op : I.operands()) { if (isa(op)) { - bool legalUndef = isa(&I); + bool LegalUndef = isa(&I); if (isa(&I)) { - legalUndef = op == I.getOperand(0); + LegalUndef = op == I.getOperand(0); } if (isa(&I)) { - legalUndef = op == I.getOperand(1); + LegalUndef = op == I.getOperand(1); } if (isa(&I)) { - legalUndef = op == I.getOperand(0); + LegalUndef = op == I.getOperand(0); } - if (!legalUndef) + if (!LegalUndef) ValCtx.EmitInstrError(&I, ValidationRule::InstrNoReadingUninitialized); } else if (ConstantExpr *CE = dyn_cast(op)) { - for (Value *opCE : CE->operands()) { - if (isa(opCE)) { + for (Value *OpCE : CE->operands()) { + if (isa(OpCE)) { ValCtx.EmitInstrError( &I, ValidationRule::InstrNoReadingUninitialized); } @@ -2867,8 +2867,8 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { } } - unsigned opcode = I.getOpcode(); - switch (opcode) { + unsigned Opcode = I.getOpcode(); + switch (Opcode) { case Instruction::Alloca: { AllocaInst *AI = cast(&I); // TODO: validate address space and alignment @@ -2909,26 +2909,26 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { continue; } GetElementPtrInst *GEP = cast(&I); - bool allImmIndex = true; + bool AllImmIndex = true; for (auto Idx = GEP->idx_begin(), E = GEP->idx_end(); Idx != E; Idx++) { if (!isa(Idx)) { - allImmIndex = false; + AllImmIndex = false; break; } } - if (allImmIndex) { + if (AllImmIndex) { const DataLayout &DL = ValCtx.DL; Value *Ptr = GEP->getPointerOperand(); - unsigned size = + unsigned Size = DL.getTypeAllocSize(Ptr->getType()->getPointerElementType()); - unsigned valSize = + unsigned ValSize = DL.getTypeAllocSize(GEP->getType()->getPointerElementType()); SmallVector Indices(GEP->idx_begin(), GEP->idx_end()); - unsigned offset = + unsigned Offset = DL.getIndexedOffset(GEP->getPointerOperandType(), Indices); - if ((offset + valSize) > size) { + if ((Offset + ValSize) > Size) { ValCtx.EmitInstrError(GEP, ValidationRule::InstrInBoundsAccess); } } @@ -3002,16 +3002,16 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { case Instruction::AtomicCmpXchg: case Instruction::AtomicRMW: { Value *Ptr = I.getOperand(AtomicRMWInst::getPointerOperandIndex()); - PointerType *ptrType = cast(Ptr->getType()); - Type *elType = ptrType->getElementType(); + PointerType *PtrType = cast(Ptr->getType()); + Type *ElType = PtrType->getElementType(); const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel(); - if ((elType->isIntegerTy(64)) && !pSM->IsSM66Plus()) + if ((ElType->isIntegerTy(64)) && !pSM->IsSM66Plus()) ValCtx.EmitInstrFormatError( &I, ValidationRule::SmOpcodeInInvalidFunction, {"64-bit atomic operations", "Shader Model 6.6+"}); - if (ptrType->getAddressSpace() != DXIL::kTGSMAddrSpace && - ptrType->getAddressSpace() != DXIL::kNodeRecordAddrSpace) + if (PtrType->getAddressSpace() != DXIL::kTGSMAddrSpace && + PtrType->getAddressSpace() != DXIL::kNodeRecordAddrSpace) ValCtx.EmitInstrError( &I, ValidationRule::InstrAtomicOpNonGroupsharedOrRecord); @@ -3062,12 +3062,12 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { } } } - ValidateControlFlowHint(*b, ValCtx); + ValidateControlFlowHint(*B, ValCtx); } - ValidateMsIntrinsics(F, ValCtx, setMeshOutputCounts, getMeshPayload); + ValidateMsIntrinsics(F, ValCtx, SetMeshOutputCounts, GetMeshPayload); - ValidateAsIntrinsics(F, ValCtx, dispatchMesh); + ValidateAsIntrinsics(F, ValCtx, DispatchMesh); } static void ValidateNodeInputRecord(Function *F, ValidationContext &ValCtx) { @@ -3075,39 +3075,39 @@ static void ValidateNodeInputRecord(Function *F, ValidationContext &ValCtx) { // to do here if (!ValCtx.DxilMod.HasDxilFunctionProps(F)) return; - auto &props = ValCtx.DxilMod.GetDxilFunctionProps(F); - if (!props.IsNode()) + auto &Props = ValCtx.DxilMod.GetDxilFunctionProps(F); + if (!Props.IsNode()) return; - if (props.InputNodes.size() > 1) { + if (Props.InputNodes.size() > 1) { ValCtx.EmitFnFormatError( F, ValidationRule::DeclMultipleNodeInputs, - {F->getName(), std::to_string(props.InputNodes.size())}); + {F->getName(), std::to_string(Props.InputNodes.size())}); } - for (auto &input : props.InputNodes) { - if (!input.Flags.RecordTypeMatchesLaunchType(props.Node.LaunchType)) { + for (auto &input : Props.InputNodes) { + if (!input.Flags.RecordTypeMatchesLaunchType(Props.Node.LaunchType)) { // We allow EmptyNodeInput here, as that may have been added implicitly // if there was no input specified if (input.Flags.IsEmptyInput()) continue; - llvm::StringRef validInputs = ""; - switch (props.Node.LaunchType) { + llvm::StringRef ValidInputs = ""; + switch (Props.Node.LaunchType) { case DXIL::NodeLaunchType::Broadcasting: - validInputs = "{RW}DispatchNodeInputRecord"; + ValidInputs = "{RW}DispatchNodeInputRecord"; break; case DXIL::NodeLaunchType::Coalescing: - validInputs = "{RW}GroupNodeInputRecords or EmptyNodeInput"; + ValidInputs = "{RW}GroupNodeInputRecords or EmptyNodeInput"; break; case DXIL::NodeLaunchType::Thread: - validInputs = "{RW}ThreadNodeInputRecord"; + ValidInputs = "{RW}ThreadNodeInputRecord"; break; default: llvm_unreachable("invalid launch type"); } ValCtx.EmitFnFormatError( F, ValidationRule::DeclNodeLaunchInputType, - {ShaderModel::GetNodeLaunchTypeName(props.Node.LaunchType), - F->getName(), validInputs}); + {ShaderModel::GetNodeLaunchTypeName(Props.Node.LaunchType), + F->getName(), ValidInputs}); } } } @@ -3118,26 +3118,26 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { if (F.isIntrinsic() || IsDxilFunction(&F)) return; } else { - DXIL::ShaderKind shaderKind = DXIL::ShaderKind::Library; - bool isShader = ValCtx.DxilMod.HasDxilFunctionProps(&F); - unsigned numUDTShaderArgs = 0; - if (isShader) { - shaderKind = ValCtx.DxilMod.GetDxilFunctionProps(&F).shaderKind; - switch (shaderKind) { + DXIL::ShaderKind ShaderKind = DXIL::ShaderKind::Library; + bool IsShader = ValCtx.DxilMod.HasDxilFunctionProps(&F); + unsigned NumUDTShaderArgs = 0; + if (IsShader) { + ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(&F).shaderKind; + switch (ShaderKind) { case DXIL::ShaderKind::AnyHit: case DXIL::ShaderKind::ClosestHit: - numUDTShaderArgs = 2; + NumUDTShaderArgs = 2; break; case DXIL::ShaderKind::Miss: case DXIL::ShaderKind::Callable: - numUDTShaderArgs = 1; + NumUDTShaderArgs = 1; break; case DXIL::ShaderKind::Compute: { DxilModule &DM = ValCtx.DxilMod; if (DM.HasDxilEntryProps(&F)) { - DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F); // Check that compute has no node metadata - if (entryProps.props.IsNode()) { + if (EntryProps.props.IsNode()) { ValCtx.EmitFnFormatError(&F, ValidationRule::MetaComputeWithNode, {F.getName()}); } @@ -3148,45 +3148,45 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { break; } } else { - isShader = ValCtx.DxilMod.IsPatchConstantShader(&F); + IsShader = ValCtx.DxilMod.IsPatchConstantShader(&F); } // Entry function should not have parameter. - if (isShader && 0 == numUDTShaderArgs && !F.arg_empty()) + if (IsShader && 0 == NumUDTShaderArgs && !F.arg_empty()) ValCtx.EmitFnFormatError(&F, ValidationRule::FlowFunctionCall, {F.getName()}); // Shader functions should return void. - if (isShader && !F.getReturnType()->isVoidTy()) + if (IsShader && !F.getReturnType()->isVoidTy()) ValCtx.EmitFnFormatError(&F, ValidationRule::DeclShaderReturnVoid, {F.getName()}); - auto ArgFormatError = [&](Function &F, Argument &arg, ValidationRule rule) { - if (arg.hasName()) - ValCtx.EmitFnFormatError(&F, rule, {arg.getName().str(), F.getName()}); + auto ArgFormatError = [&](Function &F, Argument &Arg, ValidationRule Rule) { + if (Arg.hasName()) + ValCtx.EmitFnFormatError(&F, Rule, {Arg.getName().str(), F.getName()}); else - ValCtx.EmitFnFormatError(&F, rule, - {std::to_string(arg.getArgNo()), F.getName()}); + ValCtx.EmitFnFormatError(&F, Rule, + {std::to_string(Arg.getArgNo()), F.getName()}); }; - unsigned numArgs = 0; - for (auto &arg : F.args()) { - Type *argTy = arg.getType(); - if (argTy->isPointerTy()) - argTy = argTy->getPointerElementType(); - - numArgs++; - if (numUDTShaderArgs) { - if (arg.getArgNo() >= numUDTShaderArgs) { - ArgFormatError(F, arg, ValidationRule::DeclExtraArgs); - } else if (!argTy->isStructTy()) { - switch (shaderKind) { + unsigned NumArgs = 0; + for (auto &Arg : F.args()) { + Type *ArgTy = Arg.getType(); + if (ArgTy->isPointerTy()) + ArgTy = ArgTy->getPointerElementType(); + + NumArgs++; + if (NumUDTShaderArgs) { + if (Arg.getArgNo() >= NumUDTShaderArgs) { + ArgFormatError(F, Arg, ValidationRule::DeclExtraArgs); + } else if (!ArgTy->isStructTy()) { + switch (ShaderKind) { case DXIL::ShaderKind::Callable: - ArgFormatError(F, arg, ValidationRule::DeclParamStruct); + ArgFormatError(F, Arg, ValidationRule::DeclParamStruct); break; default: - ArgFormatError(F, arg, - arg.getArgNo() == 0 + ArgFormatError(F, Arg, + Arg.getArgNo() == 0 ? ValidationRule::DeclPayloadStruct : ValidationRule::DeclAttrStruct); } @@ -3194,24 +3194,24 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { continue; } - while (argTy->isArrayTy()) { - argTy = argTy->getArrayElementType(); + while (ArgTy->isArrayTy()) { + ArgTy = ArgTy->getArrayElementType(); } - if (argTy->isStructTy() && !ValCtx.isLibProfile) { - ArgFormatError(F, arg, ValidationRule::DeclFnFlattenParam); + if (ArgTy->isStructTy() && !ValCtx.isLibProfile) { + ArgFormatError(F, Arg, ValidationRule::DeclFnFlattenParam); break; } } - if (numArgs < numUDTShaderArgs && shaderKind != DXIL::ShaderKind::Node) { - StringRef argType[2] = { - shaderKind == DXIL::ShaderKind::Callable ? "params" : "payload", + if (NumArgs < NumUDTShaderArgs && ShaderKind != DXIL::ShaderKind::Node) { + StringRef ArgType[2] = { + ShaderKind == DXIL::ShaderKind::Callable ? "params" : "payload", "attributes"}; - for (unsigned i = numArgs; i < numUDTShaderArgs; i++) { + for (unsigned I = NumArgs; I < NumUDTShaderArgs; I++) { ValCtx.EmitFnFormatError( &F, ValidationRule::DeclShaderMissingArg, - {ShaderModel::GetKindName(shaderKind), F.getName(), argType[i]}); + {ShaderModel::GetKindName(ShaderKind), F.getName(), ArgType[I]}); } } @@ -3248,25 +3248,25 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { static void ValidateGlobalVariable(GlobalVariable &GV, ValidationContext &ValCtx) { - bool isInternalGV = + bool IsInternalGv = dxilutil::IsStaticGlobal(&GV) || dxilutil::IsSharedMemoryGlobal(&GV); if (ValCtx.isLibProfile) { - auto isCBufferGlobal = + auto IsCBufferGlobal = [&](const std::vector> &ResTab) -> bool { for (auto &Res : ResTab) if (Res->GetGlobalSymbol() == &GV) return true; return false; }; - auto isResourceGlobal = + auto IsResourceGlobal = [&](const std::vector> &ResTab) -> bool { for (auto &Res : ResTab) if (Res->GetGlobalSymbol() == &GV) return true; return false; }; - auto isSamplerGlobal = + auto IsSamplerGlobal = [&](const std::vector> &ResTab) -> bool { for (auto &Res : ResTab) if (Res->GetGlobalSymbol() == &GV) @@ -3274,32 +3274,32 @@ static void ValidateGlobalVariable(GlobalVariable &GV, return false; }; - bool isRes = isCBufferGlobal(ValCtx.DxilMod.GetCBuffers()); - isRes |= isResourceGlobal(ValCtx.DxilMod.GetUAVs()); - isRes |= isResourceGlobal(ValCtx.DxilMod.GetSRVs()); - isRes |= isSamplerGlobal(ValCtx.DxilMod.GetSamplers()); - isInternalGV |= isRes; + bool IsRes = IsCBufferGlobal(ValCtx.DxilMod.GetCBuffers()); + IsRes |= IsResourceGlobal(ValCtx.DxilMod.GetUAVs()); + IsRes |= IsResourceGlobal(ValCtx.DxilMod.GetSRVs()); + IsRes |= IsSamplerGlobal(ValCtx.DxilMod.GetSamplers()); + IsInternalGv |= IsRes; // Allow special dx.ishelper for library target if (GV.getName().compare(DXIL::kDxIsHelperGlobalName) == 0) { Type *Ty = GV.getType()->getPointerElementType(); if (Ty->isIntegerTy() && Ty->getScalarSizeInBits() == 32) { - isInternalGV = true; + IsInternalGv = true; } } } - if (!isInternalGV) { + if (!IsInternalGv) { if (!GV.user_empty()) { - bool hasInstructionUser = false; + bool HasInstructionUser = false; for (User *U : GV.users()) { if (isa(U)) { - hasInstructionUser = true; + HasInstructionUser = true; break; } } // External GV should not have instruction user. - if (hasInstructionUser) { + if (HasInstructionUser) { ValCtx.EmitGlobalVariableFormatError( &GV, ValidationRule::DeclNotUsedExternal, {GV.getName()}); } @@ -3322,14 +3322,14 @@ static void ValidateGlobalVariable(GlobalVariable &GV, } static void CollectFixAddressAccess(Value *V, - std::vector &fixAddrTGSMList) { + std::vector &FixAddrTGSMList) { for (User *U : V->users()) { if (GEPOperator *GEP = dyn_cast(U)) { if (isa(GEP) || GEP->hasAllConstantIndices()) { - CollectFixAddressAccess(GEP, fixAddrTGSMList); + CollectFixAddressAccess(GEP, FixAddrTGSMList); } } else if (StoreInst *SI = dyn_cast(U)) { - fixAddrTGSMList.emplace_back(SI); + FixAddrTGSMList.emplace_back(SI); } } } @@ -3339,16 +3339,16 @@ static bool IsDivergent(Value *V) { return false; } -static void ValidateTGSMRaceCondition(std::vector &fixAddrTGSMList, +static void ValidateTGSMRaceCondition(std::vector &FixAddrTGSMList, ValidationContext &ValCtx) { - std::unordered_set fixAddrTGSMFuncSet; - for (StoreInst *I : fixAddrTGSMList) { + std::unordered_set FixAddrTGSMFuncSet; + for (StoreInst *I : FixAddrTGSMList) { BasicBlock *BB = I->getParent(); - fixAddrTGSMFuncSet.insert(BB->getParent()); + FixAddrTGSMFuncSet.insert(BB->getParent()); } for (auto &F : ValCtx.DxilMod.GetModule()->functions()) { - if (F.isDeclaration() || !fixAddrTGSMFuncSet.count(&F)) + if (F.isDeclaration() || !FixAddrTGSMFuncSet.count(&F)) continue; PostDominatorTree PDT; @@ -3356,7 +3356,7 @@ static void ValidateTGSMRaceCondition(std::vector &fixAddrTGSMList, BasicBlock *Entry = &F.getEntryBlock(); - for (StoreInst *SI : fixAddrTGSMList) { + for (StoreInst *SI : FixAddrTGSMList) { BasicBlock *BB = SI->getParent(); if (BB->getParent() == &F) { if (PDT.dominates(BB, Entry)) { @@ -3375,7 +3375,7 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) { bool TGSMAllowed = pSM->IsCS() || pSM->IsAS() || pSM->IsMS() || pSM->IsLib(); unsigned TGSMSize = 0; - std::vector fixAddrTGSMList; + std::vector FixAddrTGSMList; const DataLayout &DL = M.GetModule()->getDataLayout(); for (GlobalVariable &GV : M.GetModule()->globals()) { ValidateGlobalVariable(GV, ValCtx); @@ -3390,9 +3390,9 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) { if (Instruction *I = dyn_cast(U)) { llvm::Function *F = I->getParent()->getParent(); if (M.HasDxilEntryProps(F)) { - DxilFunctionProps &props = M.GetDxilEntryProps(F).props; - if (!props.IsCS() && !props.IsAS() && !props.IsMS() && - !props.IsNode()) { + DxilFunctionProps &Props = M.GetDxilEntryProps(F).props; + if (!Props.IsCS() && !Props.IsAS() && !Props.IsMS() && + !Props.IsNode()) { ValCtx.EmitInstrFormatError(I, ValidationRule::SmTGSMUnsupported, {"from non-compute entry points"}); @@ -3402,7 +3402,7 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) { } } TGSMSize += DL.getTypeAllocSize(GV.getType()->getElementType()); - CollectFixAddressAccess(&GV, fixAddrTGSMList); + CollectFixAddressAccess(&GV, FixAddrTGSMList); } } @@ -3426,8 +3426,8 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) { GV, Rule, {std::to_string(TGSMSize), std::to_string(MaxSize)}); } - if (!fixAddrTGSMList.empty()) { - ValidateTGSMRaceCondition(fixAddrTGSMList, ValCtx); + if (!FixAddrTGSMList.empty()) { + ValidateTGSMRaceCondition(FixAddrTGSMList, ValCtx); } } @@ -3440,20 +3440,20 @@ static void ValidateValidatorVersion(ValidationContext &ValCtx) { if (pNode->getNumOperands() == 1) { MDTuple *pVerValues = dyn_cast(pNode->getOperand(0)); if (pVerValues != nullptr && pVerValues->getNumOperands() == 2) { - uint64_t majorVer, minorVer; - if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &majorVer) && - GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) { - unsigned curMajor, curMinor; - GetValidationVersion(&curMajor, &curMinor); + uint64_t MajorVer, MinorVer; + if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &MajorVer) && + GetNodeOperandAsInt(ValCtx, pVerValues, 1, &MinorVer)) { + unsigned CurMajor, CurMinor; + GetValidationVersion(&CurMajor, &CurMinor); // This will need to be updated as major/minor versions evolve, // depending on the degree of compat across versions. - if (majorVer == curMajor && minorVer <= curMinor) { + if (MajorVer == CurMajor && MinorVer <= CurMinor) { return; } else { ValCtx.EmitFormatError( ValidationRule::MetaVersionSupported, - {"Validator", std::to_string(majorVer), std::to_string(minorVer), - std::to_string(curMajor), std::to_string(curMinor)}); + {"Validator", std::to_string(MajorVer), std::to_string(MinorVer), + std::to_string(CurMajor), std::to_string(CurMinor)}); return; } } @@ -3471,19 +3471,19 @@ static void ValidateDxilVersion(ValidationContext &ValCtx) { if (pNode->getNumOperands() == 1) { MDTuple *pVerValues = dyn_cast(pNode->getOperand(0)); if (pVerValues != nullptr && pVerValues->getNumOperands() == 2) { - uint64_t majorVer, minorVer; - if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &majorVer) && - GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) { + uint64_t MajorVer, MinorVer; + if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &MajorVer) && + GetNodeOperandAsInt(ValCtx, pVerValues, 1, &MinorVer)) { // This will need to be updated as dxil major/minor versions evolve, // depending on the degree of compat across versions. - if ((majorVer == DXIL::kDxilMajor && minorVer <= DXIL::kDxilMinor) && - (majorVer == ValCtx.m_DxilMajor && - minorVer == ValCtx.m_DxilMinor)) { + if ((MajorVer == DXIL::kDxilMajor && MinorVer <= DXIL::kDxilMinor) && + (MajorVer == ValCtx.m_DxilMajor && + MinorVer == ValCtx.m_DxilMinor)) { return; } else { ValCtx.EmitFormatError(ValidationRule::MetaVersionSupported, - {"Dxil", std::to_string(majorVer), - std::to_string(minorVer), + {"Dxil", std::to_string(MajorVer), + std::to_string(MinorVer), std::to_string(DXIL::kDxilMajor), std::to_string(DXIL::kDxilMinor)}); return; @@ -3501,16 +3501,16 @@ static void ValidateTypeAnnotation(ValidationContext &ValCtx) { NamedMDNode *TA = pModule->getNamedMetadata("dx.typeAnnotations"); if (TA == nullptr) return; - for (unsigned i = 0, end = TA->getNumOperands(); i < end; ++i) { - MDTuple *TANode = dyn_cast(TA->getOperand(i)); + for (unsigned I = 0, End = TA->getNumOperands(); I < End; ++I) { + MDTuple *TANode = dyn_cast(TA->getOperand(I)); if (TANode->getNumOperands() < 3) { ValCtx.EmitMetaError(TANode, ValidationRule::MetaWellFormed); return; } - ConstantInt *tag = mdconst::extract(TANode->getOperand(0)); - uint64_t tagValue = tag->getZExtValue(); - if (tagValue != DxilMDHelper::kDxilTypeSystemStructTag && - tagValue != DxilMDHelper::kDxilTypeSystemFunctionTag) { + ConstantInt *Tag = mdconst::extract(TANode->getOperand(0)); + uint64_t TagValue = Tag->getZExtValue(); + if (TagValue != DxilMDHelper::kDxilTypeSystemStructTag && + TagValue != DxilMDHelper::kDxilTypeSystemFunctionTag) { ValCtx.EmitMetaError(TANode, ValidationRule::MetaWellFormed); return; } @@ -3519,11 +3519,11 @@ static void ValidateTypeAnnotation(ValidationContext &ValCtx) { } static void ValidateBitcode(ValidationContext &ValCtx) { - std::string diagStr; - raw_string_ostream diagStream(diagStr); - if (llvm::verifyModule(ValCtx.M, &diagStream)) { + std::string DiagStr; + raw_string_ostream DiagStream(DiagStr); + if (llvm::verifyModule(ValCtx.M, &DiagStream)) { ValCtx.EmitError(ValidationRule::BitcodeValid); - dxilutil::EmitErrorOnContext(ValCtx.M.getContext(), diagStream.str()); + dxilutil::EmitErrorOnContext(ValCtx.M.getContext(), DiagStream.str()); } } @@ -3537,18 +3537,18 @@ static void ValidateWaveSize(ValidationContext &ValCtx, if (!EPs) return; - for (unsigned i = 0, end = EPs->getNumOperands(); i < end; ++i) { - MDTuple *EPNodeRef = dyn_cast(EPs->getOperand(i)); + for (unsigned I = 0, End = EPs->getNumOperands(); I < End; ++I) { + MDTuple *EPNodeRef = dyn_cast(EPs->getOperand(I)); if (EPNodeRef->getNumOperands() < 5) { ValCtx.EmitMetaError(EPNodeRef, ValidationRule::MetaWellFormed); return; } // get access to the digit that represents the metadata number that // would store entry properties - const llvm::MDOperand &mOp = + const llvm::MDOperand &MOp = EPNodeRef->getOperand(EPNodeRef->getNumOperands() - 1); // the final operand to the entry points tuple should be a tuple. - if (mOp == nullptr || (mOp.get())->getMetadataID() != Metadata::MDTupleKind) + if (MOp == nullptr || (MOp.get())->getMetadataID() != Metadata::MDTupleKind) continue; // get access to the node that stores entry properties @@ -3556,29 +3556,29 @@ static void ValidateWaveSize(ValidationContext &ValCtx, EPNodeRef->getOperand(EPNodeRef->getNumOperands() - 1)); // find any incompatible tags inside the entry properties // increment j by 2 to only analyze tags, not values - bool foundTag = false; - for (unsigned j = 0, end2 = EPropNode->getNumOperands(); j < end2; j += 2) { - const MDOperand &propertyTagOp = EPropNode->getOperand(j); + bool FoundTag = false; + for (unsigned J = 0, End2 = EPropNode->getNumOperands(); J < End2; J += 2) { + const MDOperand &PropertyTagOp = EPropNode->getOperand(J); // note, we are only looking for tags, which will be a constant // integer - DXASSERT(!(propertyTagOp == nullptr || - (propertyTagOp.get())->getMetadataID() != + DXASSERT(!(PropertyTagOp == nullptr || + (PropertyTagOp.get())->getMetadataID() != Metadata::ConstantAsMetadataKind), "tag operand should be a constant integer."); - ConstantInt *tag = mdconst::extract(propertyTagOp); - uint64_t tagValue = tag->getZExtValue(); + ConstantInt *Tag = mdconst::extract(PropertyTagOp); + uint64_t TagValue = Tag->getZExtValue(); // legacy wavesize is only supported between 6.6 and 6.7, so we // should fail if we find the ranged wave size metadata tag - if (tagValue == DxilMDHelper::kDxilRangedWaveSizeTag) { + if (TagValue == DxilMDHelper::kDxilRangedWaveSizeTag) { // if this tag is already present in the // current entry point, emit an error - if (foundTag) { + if (FoundTag) { ValCtx.EmitFormatError(ValidationRule::SmWaveSizeTagDuplicate, {}); return; } - foundTag = true; + FoundTag = true; if (SM->IsSM66Plus() && !SM->IsSM68Plus()) { ValCtx.EmitFormatError(ValidationRule::SmWaveSizeRangeNeedsSM68Plus, @@ -3587,36 +3587,36 @@ static void ValidateWaveSize(ValidationContext &ValCtx, } // get the metadata that contains the // parameters to the wavesize attribute - MDTuple *WaveTuple = dyn_cast(EPropNode->getOperand(j + 1)); + MDTuple *WaveTuple = dyn_cast(EPropNode->getOperand(J + 1)); if (WaveTuple->getNumOperands() != 3) { ValCtx.EmitFormatError( ValidationRule::SmWaveSizeRangeExpectsThreeParams, {}); return; } - for (int k = 0; k < 3; k++) { - const MDOperand ¶m = WaveTuple->getOperand(k); - if (param->getMetadataID() != Metadata::ConstantAsMetadataKind) { + for (int K = 0; K < 3; K++) { + const MDOperand &Param = WaveTuple->getOperand(K); + if (Param->getMetadataID() != Metadata::ConstantAsMetadataKind) { ValCtx.EmitFormatError( ValidationRule::SmWaveSizeNeedsConstantOperands, {}); return; } } - } else if (tagValue == DxilMDHelper::kDxilWaveSizeTag) { + } else if (TagValue == DxilMDHelper::kDxilWaveSizeTag) { // if this tag is already present in the // current entry point, emit an error - if (foundTag) { + if (FoundTag) { ValCtx.EmitFormatError(ValidationRule::SmWaveSizeTagDuplicate, {}); return; } - foundTag = true; - MDTuple *WaveTuple = dyn_cast(EPropNode->getOperand(j + 1)); + FoundTag = true; + MDTuple *WaveTuple = dyn_cast(EPropNode->getOperand(J + 1)); if (WaveTuple->getNumOperands() != 1) { ValCtx.EmitFormatError(ValidationRule::SmWaveSizeExpectsOneParam, {}); return; } - const MDOperand ¶m = WaveTuple->getOperand(0); - if (param->getMetadataID() != Metadata::ConstantAsMetadataKind) { + const MDOperand &Param = WaveTuple->getOperand(0); + if (Param->getMetadataID() != Metadata::ConstantAsMetadataKind) { ValCtx.EmitFormatError( ValidationRule::SmWaveSizeNeedsConstantOperands, {}); return; @@ -3637,9 +3637,9 @@ static void ValidateMetadata(ValidationContext &ValCtx) { ValidateDxilVersion(ValCtx); Module *pModule = &ValCtx.M; - const std::string &target = pModule->getTargetTriple(); - if (target != "dxil-ms-dx") { - ValCtx.EmitFormatError(ValidationRule::MetaTarget, {target}); + const std::string &Target = pModule->getTargetTriple(); + if (Target != "dxil-ms-dx") { + ValCtx.EmitFormatError(ValidationRule::MetaTarget, {Target}); } // The llvm.dbg.(cu/contents/defines/mainFileName/arg) named metadata nodes @@ -3647,9 +3647,9 @@ static void ValidateMetadata(ValidationContext &ValCtx) { // llvm.bitsets is also disallowed. // // These are verified in lib/IR/Verifier.cpp. - StringMap llvmNamedMeta; - llvmNamedMeta["llvm.ident"]; - llvmNamedMeta["llvm.module.flags"]; + StringMap LlvmNamedMeta; + LlvmNamedMeta["llvm.ident"]; + LlvmNamedMeta["llvm.module.flags"]; for (auto &NamedMetaNode : pModule->named_metadata()) { if (!DxilModule::IsKnownNamedMetaData(NamedMetaNode)) { @@ -3657,7 +3657,7 @@ static void ValidateMetadata(ValidationContext &ValCtx) { if (!name.startswith_lower("llvm.")) { ValCtx.EmitFormatError(ValidationRule::MetaKnown, {name.str()}); } else { - if (llvmNamedMeta.count(name) == 0) { + if (LlvmNamedMeta.count(name) == 0) { ValCtx.EmitFormatError(ValidationRule::MetaKnown, {name.str()}); } } @@ -3690,35 +3690,35 @@ static void ValidateMetadata(ValidationContext &ValCtx) { } static void ValidateResourceOverlap( - hlsl::DxilResourceBase &res, - SpacesAllocator &spaceAllocator, + hlsl::DxilResourceBase &Res, + SpacesAllocator &SpaceAllocator, ValidationContext &ValCtx) { - unsigned base = res.GetLowerBound(); - if (ValCtx.isLibProfile && !res.IsAllocated()) { + unsigned Base = Res.GetLowerBound(); + if (ValCtx.isLibProfile && !Res.IsAllocated()) { // Skip unallocated resource for library. return; } - unsigned size = res.GetRangeSize(); - unsigned space = res.GetSpaceID(); + unsigned Size = Res.GetRangeSize(); + unsigned Space = Res.GetSpaceID(); - auto &allocator = spaceAllocator.Get(space); - unsigned end = base + size - 1; + auto &Allocator = SpaceAllocator.Get(Space); + unsigned End = Base + Size - 1; // unbounded - if (end < base) - end = size; - const DxilResourceBase *conflictRes = allocator.Insert(&res, base, end); - if (conflictRes) { + if (End < Base) + End = Size; + const DxilResourceBase *ConflictRes = Allocator.Insert(&Res, Base, End); + if (ConflictRes) { ValCtx.EmitFormatError( ValidationRule::SmResourceRangeOverlap, - {ValCtx.GetResourceName(&res), std::to_string(base), - std::to_string(size), std::to_string(conflictRes->GetLowerBound()), - std::to_string(conflictRes->GetRangeSize()), std::to_string(space)}); + {ValCtx.GetResourceName(&Res), std::to_string(Base), + std::to_string(Size), std::to_string(ConflictRes->GetLowerBound()), + std::to_string(ConflictRes->GetRangeSize()), std::to_string(Space)}); } } -static void ValidateResource(hlsl::DxilResource &res, +static void ValidateResource(hlsl::DxilResource &Res, ValidationContext &ValCtx) { - switch (res.GetKind()) { + switch (Res.GetKind()) { case DXIL::ResourceKind::RawBuffer: case DXIL::ResourceKind::TypedBuffer: case DXIL::ResourceKind::TBuffer: @@ -3730,8 +3730,8 @@ static void ValidateResource(hlsl::DxilResource &res, case DXIL::ResourceKind::Texture3D: case DXIL::ResourceKind::TextureCube: case DXIL::ResourceKind::TextureCubeArray: - if (res.GetSampleCount() > 0) { - ValCtx.EmitResourceError(&res, ValidationRule::SmSampleCountOnlyOn2DMS); + if (Res.GetSampleCount() > 0) { + ValCtx.EmitResourceError(&Res, ValidationRule::SmSampleCountOnlyOn2DMS); } break; case DXIL::ResourceKind::Texture2DMS: @@ -3742,16 +3742,16 @@ static void ValidateResource(hlsl::DxilResource &res, break; case DXIL::ResourceKind::FeedbackTexture2D: case DXIL::ResourceKind::FeedbackTexture2DArray: - if (res.GetSamplerFeedbackType() >= DXIL::SamplerFeedbackType::LastEntry) - ValCtx.EmitResourceError(&res, + if (Res.GetSamplerFeedbackType() >= DXIL::SamplerFeedbackType::LastEntry) + ValCtx.EmitResourceError(&Res, ValidationRule::SmInvalidSamplerFeedbackType); break; default: - ValCtx.EmitResourceError(&res, ValidationRule::SmInvalidResourceKind); + ValCtx.EmitResourceError(&Res, ValidationRule::SmInvalidResourceKind); break; } - switch (res.GetCompType().GetKind()) { + switch (Res.GetCompType().GetKind()) { case DXIL::ComponentType::F32: case DXIL::ComponentType::SNormF32: case DXIL::ComponentType::UNormF32: @@ -3765,266 +3765,266 @@ static void ValidateResource(hlsl::DxilResource &res, case DXIL::ComponentType::U16: break; default: - if (!res.IsStructuredBuffer() && !res.IsRawBuffer() && - !res.IsFeedbackTexture()) - ValCtx.EmitResourceError(&res, ValidationRule::SmInvalidResourceCompType); + if (!Res.IsStructuredBuffer() && !Res.IsRawBuffer() && + !Res.IsFeedbackTexture()) + ValCtx.EmitResourceError(&Res, ValidationRule::SmInvalidResourceCompType); break; } - if (res.IsStructuredBuffer()) { - unsigned stride = res.GetElementStride(); - bool alignedTo4Bytes = (stride & 3) == 0; - if (!alignedTo4Bytes && ValCtx.M.GetDxilModule().GetUseMinPrecision()) { + if (Res.IsStructuredBuffer()) { + unsigned Stride = Res.GetElementStride(); + bool AlignedTo4Bytes = (Stride & 3) == 0; + if (!AlignedTo4Bytes && ValCtx.M.GetDxilModule().GetUseMinPrecision()) { ValCtx.EmitResourceFormatError( - &res, ValidationRule::MetaStructBufAlignment, - {std::to_string(4), std::to_string(stride)}); + &Res, ValidationRule::MetaStructBufAlignment, + {std::to_string(4), std::to_string(Stride)}); } - if (stride > DXIL::kMaxStructBufferStride) { + if (Stride > DXIL::kMaxStructBufferStride) { ValCtx.EmitResourceFormatError( - &res, ValidationRule::MetaStructBufAlignmentOutOfBound, + &Res, ValidationRule::MetaStructBufAlignmentOutOfBound, {std::to_string(DXIL::kMaxStructBufferStride), - std::to_string(stride)}); + std::to_string(Stride)}); } } - if (res.IsAnyTexture() || res.IsTypedBuffer()) { - Type *RetTy = res.GetRetType(); - unsigned size = + if (Res.IsAnyTexture() || Res.IsTypedBuffer()) { + Type *RetTy = Res.GetRetType(); + unsigned Size = ValCtx.DxilMod.GetModule()->getDataLayout().getTypeAllocSize(RetTy); - if (size > 4 * 4) { - ValCtx.EmitResourceError(&res, ValidationRule::MetaTextureType); + if (Size > 4 * 4) { + ValCtx.EmitResourceError(&Res, ValidationRule::MetaTextureType); } } } static void CollectCBufferRanges( - DxilStructAnnotation *annotation, - SpanAllocator &constAllocator, unsigned base, - DxilTypeSystem &typeSys, StringRef cbName, ValidationContext &ValCtx) { - DXASSERT(((base + 15) & ~(0xf)) == base, + DxilStructAnnotation *Annotation, + SpanAllocator &ConstAllocator, unsigned Base, + DxilTypeSystem &TypeSys, StringRef CbName, ValidationContext &ValCtx) { + DXASSERT(((Base + 15) & ~(0xf)) == Base, "otherwise, base for struct is not aligned"); - unsigned cbSize = annotation->GetCBufferSize(); + unsigned CbSize = Annotation->GetCBufferSize(); - const StructType *ST = annotation->GetStructType(); + const StructType *ST = Annotation->GetStructType(); - for (int i = annotation->GetNumFields() - 1; i >= 0; i--) { - DxilFieldAnnotation &fieldAnnotation = annotation->GetFieldAnnotation(i); - Type *EltTy = ST->getElementType(i); + for (int I = Annotation->GetNumFields() - 1; I >= 0; I--) { + DxilFieldAnnotation &FieldAnnotation = Annotation->GetFieldAnnotation(I); + Type *EltTy = ST->getElementType(I); - unsigned offset = fieldAnnotation.GetCBufferOffset(); + unsigned Offset = FieldAnnotation.GetCBufferOffset(); unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize( - fieldAnnotation, EltTy, typeSys); + FieldAnnotation, EltTy, TypeSys); - bool bOutOfBound = false; + bool IsOutOfBound = false; if (!EltTy->isAggregateType()) { - bOutOfBound = (offset + EltSize) > cbSize; - if (!bOutOfBound) { - if (constAllocator.Insert(&fieldAnnotation, base + offset, - base + offset + EltSize - 1)) { + IsOutOfBound = (Offset + EltSize) > CbSize; + if (!IsOutOfBound) { + if (ConstAllocator.Insert(&FieldAnnotation, Base + Offset, + Base + Offset + EltSize - 1)) { ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap, - {cbName, std::to_string(base + offset)}); + {CbName, std::to_string(Base + Offset)}); } } } else if (isa(EltTy)) { - if (((offset + 15) & ~(0xf)) != offset) { + if (((Offset + 15) & ~(0xf)) != Offset) { ValCtx.EmitFormatError(ValidationRule::SmCBufferArrayOffsetAlignment, - {cbName, std::to_string(offset)}); + {CbName, std::to_string(Offset)}); continue; } - unsigned arrayCount = 1; + unsigned ArrayCount = 1; while (isa(EltTy)) { - arrayCount *= EltTy->getArrayNumElements(); + ArrayCount *= EltTy->getArrayNumElements(); EltTy = EltTy->getArrayElementType(); } DxilStructAnnotation *EltAnnotation = nullptr; if (StructType *EltST = dyn_cast(EltTy)) - EltAnnotation = typeSys.GetStructAnnotation(EltST); + EltAnnotation = TypeSys.GetStructAnnotation(EltST); - unsigned alignedEltSize = ((EltSize + 15) & ~(0xf)); - unsigned arraySize = ((arrayCount - 1) * alignedEltSize) + EltSize; - bOutOfBound = (offset + arraySize) > cbSize; + unsigned AlignedEltSize = ((EltSize + 15) & ~(0xf)); + unsigned ArraySize = ((ArrayCount - 1) * AlignedEltSize) + EltSize; + IsOutOfBound = (Offset + ArraySize) > CbSize; - if (!bOutOfBound) { + if (!IsOutOfBound) { // If we didn't care about gaps where elements could be placed with user // offsets, we could: recurse once if EltAnnotation, then allocate the - // rest if arrayCount > 1 + // rest if ArrayCount > 1 - unsigned arrayBase = base + offset; + unsigned ArrayBase = Base + Offset; if (!EltAnnotation) { if (EltSize > 0 && - nullptr != constAllocator.Insert(&fieldAnnotation, arrayBase, - arrayBase + arraySize - 1)) { + nullptr != ConstAllocator.Insert(&FieldAnnotation, ArrayBase, + ArrayBase + ArraySize - 1)) { ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap, - {cbName, std::to_string(arrayBase)}); + {CbName, std::to_string(ArrayBase)}); } } else { - for (unsigned idx = 0; idx < arrayCount; idx++) { - CollectCBufferRanges(EltAnnotation, constAllocator, arrayBase, - typeSys, cbName, ValCtx); - arrayBase += alignedEltSize; + for (unsigned Idx = 0; Idx < ArrayCount; Idx++) { + CollectCBufferRanges(EltAnnotation, ConstAllocator, ArrayBase, + TypeSys, CbName, ValCtx); + ArrayBase += AlignedEltSize; } } } } else { StructType *EltST = cast(EltTy); - unsigned structBase = base + offset; - bOutOfBound = (offset + EltSize) > cbSize; - if (!bOutOfBound) { + unsigned StructBase = Base + Offset; + IsOutOfBound = (Offset + EltSize) > CbSize; + if (!IsOutOfBound) { if (DxilStructAnnotation *EltAnnotation = - typeSys.GetStructAnnotation(EltST)) { - CollectCBufferRanges(EltAnnotation, constAllocator, structBase, - typeSys, cbName, ValCtx); + TypeSys.GetStructAnnotation(EltST)) { + CollectCBufferRanges(EltAnnotation, ConstAllocator, StructBase, + TypeSys, CbName, ValCtx); } else { if (EltSize > 0 && - nullptr != constAllocator.Insert(&fieldAnnotation, structBase, - structBase + EltSize - 1)) { + nullptr != ConstAllocator.Insert(&FieldAnnotation, StructBase, + StructBase + EltSize - 1)) { ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap, - {cbName, std::to_string(structBase)}); + {CbName, std::to_string(StructBase)}); } } } } - if (bOutOfBound) { + if (IsOutOfBound) { ValCtx.EmitFormatError(ValidationRule::SmCBufferElementOverflow, - {cbName, std::to_string(base + offset)}); + {CbName, std::to_string(Base + Offset)}); } } } -static void ValidateCBuffer(DxilCBuffer &cb, ValidationContext &ValCtx) { - Type *Ty = cb.GetHLSLType()->getPointerElementType(); - if (cb.GetRangeSize() != 1 || Ty->isArrayTy()) { +static void ValidateCBuffer(DxilCBuffer &Cb, ValidationContext &ValCtx) { + Type *Ty = Cb.GetHLSLType()->getPointerElementType(); + if (Cb.GetRangeSize() != 1 || Ty->isArrayTy()) { Ty = Ty->getArrayElementType(); } if (!isa(Ty)) { - ValCtx.EmitResourceError(&cb, + ValCtx.EmitResourceError(&Cb, ValidationRule::SmCBufferTemplateTypeMustBeStruct); return; } - if (cb.GetSize() > (DXIL::kMaxCBufferSize << 4)) { - ValCtx.EmitResourceFormatError(&cb, ValidationRule::SmCBufferSize, - {std::to_string(cb.GetSize())}); + if (Cb.GetSize() > (DXIL::kMaxCBufferSize << 4)) { + ValCtx.EmitResourceFormatError(&Cb, ValidationRule::SmCBufferSize, + {std::to_string(Cb.GetSize())}); return; } StructType *ST = cast(Ty); - DxilTypeSystem &typeSys = ValCtx.DxilMod.GetTypeSystem(); - DxilStructAnnotation *annotation = typeSys.GetStructAnnotation(ST); - if (!annotation) + DxilTypeSystem &TypeSys = ValCtx.DxilMod.GetTypeSystem(); + DxilStructAnnotation *Annotation = TypeSys.GetStructAnnotation(ST); + if (!Annotation) return; // Collect constant ranges. - std::vector> constRanges; - SpanAllocator constAllocator( + std::vector> ConstRanges; + SpanAllocator ConstAllocator( 0, // 4096 * 16 bytes. DXIL::kMaxCBufferSize << 4); - CollectCBufferRanges(annotation, constAllocator, 0, typeSys, - ValCtx.GetResourceName(&cb), ValCtx); + CollectCBufferRanges(Annotation, ConstAllocator, 0, TypeSys, + ValCtx.GetResourceName(&Cb), ValCtx); } static void ValidateResources(ValidationContext &ValCtx) { - const vector> &uavs = ValCtx.DxilMod.GetUAVs(); - SpacesAllocator uavAllocator; + const vector> &Uavs = ValCtx.DxilMod.GetUAVs(); + SpacesAllocator UavAllocator; - for (auto &uav : uavs) { - if (uav->IsROV()) { + for (auto &Uav : Uavs) { + if (Uav->IsROV()) { if (!ValCtx.DxilMod.GetShaderModel()->IsPS() && !ValCtx.isLibProfile) { - ValCtx.EmitResourceError(uav.get(), ValidationRule::SmROVOnlyInPS); + ValCtx.EmitResourceError(Uav.get(), ValidationRule::SmROVOnlyInPS); } } - switch (uav->GetKind()) { + switch (Uav->GetKind()) { case DXIL::ResourceKind::TextureCube: case DXIL::ResourceKind::TextureCubeArray: - ValCtx.EmitResourceError(uav.get(), + ValCtx.EmitResourceError(Uav.get(), ValidationRule::SmInvalidTextureKindOnUAV); break; default: break; } - if (uav->HasCounter() && !uav->IsStructuredBuffer()) { - ValCtx.EmitResourceError(uav.get(), + if (Uav->HasCounter() && !Uav->IsStructuredBuffer()) { + ValCtx.EmitResourceError(Uav.get(), ValidationRule::SmCounterOnlyOnStructBuf); } - if (uav->HasCounter() && uav->IsGloballyCoherent()) - ValCtx.EmitResourceFormatError(uav.get(), + if (Uav->HasCounter() && Uav->IsGloballyCoherent()) + ValCtx.EmitResourceFormatError(Uav.get(), ValidationRule::MetaGlcNotOnAppendConsume, - {ValCtx.GetResourceName(uav.get())}); + {ValCtx.GetResourceName(Uav.get())}); - ValidateResource(*uav, ValCtx); - ValidateResourceOverlap(*uav, uavAllocator, ValCtx); + ValidateResource(*Uav, ValCtx); + ValidateResourceOverlap(*Uav, UavAllocator, ValCtx); } - SpacesAllocator srvAllocator; - const vector> &srvs = ValCtx.DxilMod.GetSRVs(); - for (auto &srv : srvs) { + SpacesAllocator SrvAllocator; + const vector> &Srvs = ValCtx.DxilMod.GetSRVs(); + for (auto &srv : Srvs) { ValidateResource(*srv, ValCtx); - ValidateResourceOverlap(*srv, srvAllocator, ValCtx); + ValidateResourceOverlap(*srv, SrvAllocator, ValCtx); } - hlsl::DxilResourceBase *pNonDense; - if (!AreDxilResourcesDense(&ValCtx.M, &pNonDense)) { - ValCtx.EmitResourceError(pNonDense, ValidationRule::MetaDenseResIDs); + hlsl::DxilResourceBase *NonDenseRes; + if (!AreDxilResourcesDense(&ValCtx.M, &NonDenseRes)) { + ValCtx.EmitResourceError(NonDenseRes, ValidationRule::MetaDenseResIDs); } - SpacesAllocator samplerAllocator; + SpacesAllocator SamplerAllocator; for (auto &sampler : ValCtx.DxilMod.GetSamplers()) { if (sampler->GetSamplerKind() == DXIL::SamplerKind::Invalid) { ValCtx.EmitResourceError(sampler.get(), ValidationRule::MetaValidSamplerMode); } - ValidateResourceOverlap(*sampler, samplerAllocator, ValCtx); + ValidateResourceOverlap(*sampler, SamplerAllocator, ValCtx); } - SpacesAllocator cbufferAllocator; + SpacesAllocator CbufferAllocator; for (auto &cbuffer : ValCtx.DxilMod.GetCBuffers()) { ValidateCBuffer(*cbuffer, ValCtx); - ValidateResourceOverlap(*cbuffer, cbufferAllocator, ValCtx); + ValidateResourceOverlap(*cbuffer, CbufferAllocator, ValCtx); } } static void ValidateShaderFlags(ValidationContext &ValCtx) { - ShaderFlags calcFlags; - ValCtx.DxilMod.CollectShaderFlagsForModule(calcFlags); + ShaderFlags CalcFlags; + ValCtx.DxilMod.CollectShaderFlagsForModule(CalcFlags); // Special case for validator version prior to 1.8. // If DXR 1.1 flag is set, but our computed flags do not have this set, then // this is due to prior versions setting the flag based on DXR 1.1 subobjects, // which are gone by this point. Set the flag and the rest should match. - unsigned valMajor, valMinor; - ValCtx.DxilMod.GetValidatorVersion(valMajor, valMinor); - if (DXIL::CompareVersions(valMajor, valMinor, 1, 5) >= 0 && - DXIL::CompareVersions(valMajor, valMinor, 1, 8) < 0 && + unsigned ValMajor, ValMinor; + ValCtx.DxilMod.GetValidatorVersion(ValMajor, ValMinor); + if (DXIL::CompareVersions(ValMajor, ValMinor, 1, 5) >= 0 && + DXIL::CompareVersions(ValMajor, ValMinor, 1, 8) < 0 && ValCtx.DxilMod.m_ShaderFlags.GetRaytracingTier1_1() && - !calcFlags.GetRaytracingTier1_1()) { - calcFlags.SetRaytracingTier1_1(true); + !CalcFlags.GetRaytracingTier1_1()) { + CalcFlags.SetRaytracingTier1_1(true); } - const uint64_t mask = ShaderFlags::GetShaderFlagsRawForCollection(); - uint64_t declaredFlagsRaw = ValCtx.DxilMod.m_ShaderFlags.GetShaderFlagsRaw(); - uint64_t calcFlagsRaw = calcFlags.GetShaderFlagsRaw(); + const uint64_t Mask = ShaderFlags::GetShaderFlagsRawForCollection(); + uint64_t DeclaredFlagsRaw = ValCtx.DxilMod.m_ShaderFlags.GetShaderFlagsRaw(); + uint64_t CalcFlagsRaw = CalcFlags.GetShaderFlagsRaw(); - declaredFlagsRaw &= mask; - calcFlagsRaw &= mask; + DeclaredFlagsRaw &= Mask; + CalcFlagsRaw &= Mask; - if (declaredFlagsRaw == calcFlagsRaw) { + if (DeclaredFlagsRaw == CalcFlagsRaw) { return; } ValCtx.EmitError(ValidationRule::MetaFlagsUsage); dxilutil::EmitNoteOnContext(ValCtx.M.getContext(), Twine("Flags declared=") + - Twine(declaredFlagsRaw) + Twine(", actual=") + - Twine(calcFlagsRaw)); + Twine(DeclaredFlagsRaw) + Twine(", actual=") + + Twine(CalcFlagsRaw)); } static void ValidateSignatureElement(DxilSignatureElement &SE, ValidationContext &ValCtx) { - DXIL::SemanticKind semanticKind = SE.GetSemantic()->GetKind(); - CompType::Kind compKind = SE.GetCompType().GetKind(); + DXIL::SemanticKind SemanticKind = SE.GetSemantic()->GetKind(); + CompType::Kind CompKind = SE.GetCompType().GetKind(); DXIL::InterpolationMode Mode = SE.GetInterpolationMode()->GetKind(); StringRef Name = SE.GetName(); @@ -4032,86 +4032,86 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, ValCtx.EmitSignatureError(&SE, ValidationRule::MetaSemanticLen); } - if (semanticKind > DXIL::SemanticKind::Arbitrary && - semanticKind < DXIL::SemanticKind::Invalid) { - if (semanticKind != Semantic::GetByName(SE.GetName())->GetKind()) { + if (SemanticKind > DXIL::SemanticKind::Arbitrary && + SemanticKind < DXIL::SemanticKind::Invalid) { + if (SemanticKind != Semantic::GetByName(SE.GetName())->GetKind()) { ValCtx.EmitFormatError(ValidationRule::MetaSemaKindMatchesName, {SE.GetName(), SE.GetSemantic()->GetName()}); } } - unsigned compWidth = 0; - bool compFloat = false; - bool compInt = false; - bool compBool = false; + unsigned CompWidth = 0; + bool CompFloat = false; + bool CompInt = false; + bool CompBool = false; - switch (compKind) { + switch (CompKind) { case CompType::Kind::U64: - compWidth = 64; - compInt = true; + CompWidth = 64; + CompInt = true; break; case CompType::Kind::I64: - compWidth = 64; - compInt = true; + CompWidth = 64; + CompInt = true; break; // These should be translated for signatures: // case CompType::Kind::PackedS8x32: // case CompType::Kind::PackedU8x32: case CompType::Kind::U32: - compWidth = 32; - compInt = true; + CompWidth = 32; + CompInt = true; break; case CompType::Kind::I32: - compWidth = 32; - compInt = true; + CompWidth = 32; + CompInt = true; break; case CompType::Kind::U16: - compWidth = 16; - compInt = true; + CompWidth = 16; + CompInt = true; break; case CompType::Kind::I16: - compWidth = 16; - compInt = true; + CompWidth = 16; + CompInt = true; break; case CompType::Kind::I1: - compWidth = 1; - compBool = true; + CompWidth = 1; + CompBool = true; break; case CompType::Kind::F64: - compWidth = 64; - compFloat = true; + CompWidth = 64; + CompFloat = true; break; case CompType::Kind::F32: - compWidth = 32; - compFloat = true; + CompWidth = 32; + CompFloat = true; break; case CompType::Kind::F16: - compWidth = 16; - compFloat = true; + CompWidth = 16; + CompFloat = true; break; case CompType::Kind::SNormF64: - compWidth = 64; - compFloat = true; + CompWidth = 64; + CompFloat = true; break; case CompType::Kind::SNormF32: - compWidth = 32; - compFloat = true; + CompWidth = 32; + CompFloat = true; break; case CompType::Kind::SNormF16: - compWidth = 16; - compFloat = true; + CompWidth = 16; + CompFloat = true; break; case CompType::Kind::UNormF64: - compWidth = 64; - compFloat = true; + CompWidth = 64; + CompFloat = true; break; case CompType::Kind::UNormF32: - compWidth = 32; - compFloat = true; + CompWidth = 32; + CompFloat = true; break; case CompType::Kind::UNormF16: - compWidth = 16; - compFloat = true; + CompWidth = 16; + CompFloat = true; break; case CompType::Kind::Invalid: default: @@ -4120,7 +4120,7 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, break; } - if (compInt || compBool) { + if (CompInt || CompBool) { switch (Mode) { case DXIL::InterpolationMode::Linear: case DXIL::InterpolationMode::LinearCentroid: @@ -4137,91 +4137,91 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, } // Elements that should not appear in the Dxil signature: - bool bAllowedInSig = true; - bool bShouldBeAllocated = true; + bool AllowedInSig = true; + bool ShouldBeAllocated = true; switch (SE.GetInterpretation()) { case DXIL::SemanticInterpretationKind::NA: case DXIL::SemanticInterpretationKind::NotInSig: case DXIL::SemanticInterpretationKind::Invalid: - bAllowedInSig = false; + AllowedInSig = false; LLVM_FALLTHROUGH; case DXIL::SemanticInterpretationKind::NotPacked: case DXIL::SemanticInterpretationKind::Shadow: - bShouldBeAllocated = false; + ShouldBeAllocated = false; break; default: break; } - const char *inputOutput = nullptr; + const char *InputOutput = nullptr; if (SE.IsInput()) - inputOutput = "Input"; + InputOutput = "Input"; else if (SE.IsOutput()) - inputOutput = "Output"; + InputOutput = "Output"; else - inputOutput = "PatchConstant"; + InputOutput = "PatchConstant"; - if (!bAllowedInSig) { + if (!AllowedInSig) { ValCtx.EmitFormatError(ValidationRule::SmSemantic, {SE.GetName(), ValCtx.DxilMod.GetShaderModel()->GetKindName(), - inputOutput}); - } else if (bShouldBeAllocated && !SE.IsAllocated()) { + InputOutput}); + } else if (ShouldBeAllocated && !SE.IsAllocated()) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticShouldBeAllocated, - {inputOutput, SE.GetName()}); - } else if (!bShouldBeAllocated && SE.IsAllocated()) { + {InputOutput, SE.GetName()}); + } else if (!ShouldBeAllocated && SE.IsAllocated()) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticShouldNotBeAllocated, - {inputOutput, SE.GetName()}); + {InputOutput, SE.GetName()}); } - bool bIsClipCull = false; - bool bIsTessfactor = false; - bool bIsBarycentric = false; + bool IsClipCull = false; + bool IsTessfactor = false; + bool IsBarycentric = false; - switch (semanticKind) { + switch (SemanticKind) { case DXIL::SemanticKind::Depth: case DXIL::SemanticKind::DepthGreaterEqual: case DXIL::SemanticKind::DepthLessEqual: - if (!compFloat || compWidth > 32 || SE.GetCols() != 1) { + if (!CompFloat || CompWidth > 32 || SE.GetCols() != 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float"}); } break; case DXIL::SemanticKind::Coverage: - DXASSERT(!SE.IsInput() || !bAllowedInSig, + DXASSERT(!SE.IsInput() || !AllowedInSig, "else internal inconsistency between semantic interpretation " "table and validation code"); LLVM_FALLTHROUGH; case DXIL::SemanticKind::InnerCoverage: case DXIL::SemanticKind::OutputControlPointID: - if (compKind != CompType::Kind::U32 || SE.GetCols() != 1) { + if (CompKind != CompType::Kind::U32 || SE.GetCols() != 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "uint"}); } break; case DXIL::SemanticKind::Position: - if (!compFloat || compWidth > 32 || SE.GetCols() != 4) { + if (!CompFloat || CompWidth > 32 || SE.GetCols() != 4) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float4"}); } break; case DXIL::SemanticKind::Target: - if (compWidth > 32) { + if (CompWidth > 32) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float/int/uint"}); } break; case DXIL::SemanticKind::ClipDistance: case DXIL::SemanticKind::CullDistance: - bIsClipCull = true; - if (!compFloat || compWidth > 32) { + IsClipCull = true; + if (!CompFloat || CompWidth > 32) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float"}); } // NOTE: clip cull distance size is checked at ValidateSignature. break; case DXIL::SemanticKind::IsFrontFace: { - if (!(compInt && compWidth == 32) || SE.GetCols() != 1) { + if (!(CompInt && CompWidth == 32) || SE.GetCols() != 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "uint"}); } @@ -4235,14 +4235,14 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, case DXIL::SemanticKind::SampleIndex: case DXIL::SemanticKind::StencilRef: case DXIL::SemanticKind::ShadingRate: - if ((compKind != CompType::Kind::U32 && compKind != CompType::Kind::U16) || + if ((CompKind != CompType::Kind::U32 && CompKind != CompType::Kind::U16) || SE.GetCols() != 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "uint"}); } break; case DXIL::SemanticKind::CullPrimitive: { - if (!(compBool && compWidth == 1) || SE.GetCols() != 1) { + if (!(CompBool && CompWidth == 1) || SE.GetCols() != 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "bool"}); } @@ -4250,8 +4250,8 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, case DXIL::SemanticKind::TessFactor: case DXIL::SemanticKind::InsideTessFactor: // NOTE: the size check is at CheckPatchConstantSemantic. - bIsTessfactor = true; - if (!compFloat || compWidth > 32) { + IsTessfactor = true; + if (!CompFloat || CompWidth > 32) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float"}); } @@ -4260,12 +4260,12 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, break; case DXIL::SemanticKind::DomainLocation: case DXIL::SemanticKind::Invalid: - DXASSERT(!bAllowedInSig, "else internal inconsistency between semantic " - "interpretation table and validation code"); + DXASSERT(!AllowedInSig, "else internal inconsistency between semantic " + "interpretation table and validation code"); break; case DXIL::SemanticKind::Barycentrics: - bIsBarycentric = true; - if (!compFloat || compWidth > 32) { + IsBarycentric = true; + if (!CompFloat || CompWidth > 32) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType, {SE.GetSemantic()->GetName(), "float"}); } @@ -4310,32 +4310,32 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, } } - if (semanticKind == DXIL::SemanticKind::Target) { - // Verify packed row == semantic index - unsigned row = SE.GetStartRow(); + if (SemanticKind == DXIL::SemanticKind::Target) { + // Verify packed Row == semantic index + unsigned Row = SE.GetStartRow(); for (unsigned i : SE.GetSemanticIndexVec()) { - if (row != i) { + if (Row != i) { ValCtx.EmitSignatureError(&SE, ValidationRule::SmPSTargetIndexMatchesRow); } - ++row; + ++Row; } - // Verify packed col is 0 + // Verify packed Col is 0 if (SE.GetStartCol() != 0) { ValCtx.EmitSignatureError(&SE, ValidationRule::SmPSTargetCol0); } - // Verify max row used < 8 + // Verify max Row used < 8 if (SE.GetStartRow() + SE.GetRows() > 8) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax, {"SV_Target", "7"}); } - } else if (bAllowedInSig && semanticKind != DXIL::SemanticKind::Arbitrary) { - if (bIsBarycentric) { + } else if (AllowedInSig && SemanticKind != DXIL::SemanticKind::Arbitrary) { + if (IsBarycentric) { if (SE.GetSemanticStartIndex() > 1) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax, {SE.GetSemantic()->GetName(), "1"}); } - } else if (!bIsClipCull && SE.GetSemanticStartIndex() > 0) { + } else if (!IsClipCull && SE.GetSemanticStartIndex() > 0) { ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax, {SE.GetSemantic()->GetName(), "0"}); } @@ -4343,17 +4343,17 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, // with the exception of tessfactors, which are validated in // CheckPatchConstantSemantic and ClipDistance/CullDistance, which have // other custom constraints. - if (!bIsTessfactor && !bIsClipCull && SE.GetRows() > 1) { + if (!IsTessfactor && !IsClipCull && SE.GetRows() > 1) { ValCtx.EmitSignatureError(&SE, ValidationRule::MetaSystemValueRows); } } if (SE.GetCols() + (SE.IsAllocated() ? SE.GetStartCol() : 0) > 4) { - unsigned size = (SE.GetRows() - 1) * 4 + SE.GetCols(); + unsigned Size = (SE.GetRows() - 1) * 4 + SE.GetCols(); ValCtx.EmitFormatError(ValidationRule::MetaSignatureOutOfRange, {SE.GetName(), std::to_string(SE.GetStartRow()), std::to_string(SE.GetStartCol()), - std::to_string(size)}); + std::to_string(Size)}); } if (!SE.GetInterpolationMode()->IsValid()) { @@ -4362,8 +4362,8 @@ static void ValidateSignatureElement(DxilSignatureElement &SE, } static void ValidateSignatureOverlap(DxilSignatureElement &E, - unsigned maxScalars, - DxilSignatureAllocator &allocator, + unsigned MaxScalars, + DxilSignatureAllocator &Allocator, ValidationContext &ValCtx) { // Skip entries that are not or should not be allocated. Validation occurs in @@ -4381,16 +4381,16 @@ static void ValidateSignatureOverlap(DxilSignatureElement &E, break; } - DxilPackElement PE(&E, allocator.UseMinPrecision()); - DxilSignatureAllocator::ConflictType conflict = - allocator.DetectRowConflict(&PE, E.GetStartRow()); - if (conflict == DxilSignatureAllocator::kNoConflict || - conflict == DxilSignatureAllocator::kInsufficientFreeComponents) - conflict = - allocator.DetectColConflict(&PE, E.GetStartRow(), E.GetStartCol()); - switch (conflict) { + DxilPackElement PE(&E, Allocator.UseMinPrecision()); + DxilSignatureAllocator::ConflictType Conflict = + Allocator.DetectRowConflict(&PE, E.GetStartRow()); + if (Conflict == DxilSignatureAllocator::kNoConflict || + Conflict == DxilSignatureAllocator::kInsufficientFreeComponents) + Conflict = + Allocator.DetectColConflict(&PE, E.GetStartRow(), E.GetStartCol()); + switch (Conflict) { case DxilSignatureAllocator::kNoConflict: - allocator.PlaceElement(&PE, E.GetStartRow(), E.GetStartCol()); + Allocator.PlaceElement(&PE, E.GetStartRow(), E.GetStartCol()); break; case DxilSignatureAllocator::kConflictsWithIndexed: ValCtx.EmitFormatError(ValidationRule::MetaSignatureIndexConflict, @@ -4452,59 +4452,59 @@ static void ValidateSignatureOverlap(DxilSignatureElement &E, } static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S, - EntryStatus &Status, unsigned maxScalars) { - DxilSignatureAllocator allocator[DXIL::kNumOutputStreams] = { + EntryStatus &Status, unsigned MaxScalars) { + DxilSignatureAllocator Allocator[DXIL::kNumOutputStreams] = { {32, ValCtx.DxilMod.GetUseMinPrecision()}, {32, ValCtx.DxilMod.GetUseMinPrecision()}, {32, ValCtx.DxilMod.GetUseMinPrecision()}, {32, ValCtx.DxilMod.GetUseMinPrecision()}}; - unordered_set semanticUsageSet[DXIL::kNumOutputStreams]; - StringMap> semanticIndexMap[DXIL::kNumOutputStreams]; - unordered_set clipcullRowSet[DXIL::kNumOutputStreams]; - unsigned clipcullComponents[DXIL::kNumOutputStreams] = {0, 0, 0, 0}; + unordered_set SemanticUsageSet[DXIL::kNumOutputStreams]; + StringMap> SemanticIndexMap[DXIL::kNumOutputStreams]; + unordered_set ClipcullRowSet[DXIL::kNumOutputStreams]; + unsigned ClipcullComponents[DXIL::kNumOutputStreams] = {0, 0, 0, 0}; - bool isOutput = S.IsOutput(); + bool IsOutput = S.IsOutput(); unsigned TargetMask = 0; DXIL::SemanticKind DepthKind = DXIL::SemanticKind::Invalid; - const InterpolationMode *prevBaryInterpMode = nullptr; - unsigned numBarycentrics = 0; + const InterpolationMode *PrevBaryInterpMode = nullptr; + unsigned NumBarycentrics = 0; for (auto &E : S.GetElements()) { - DXIL::SemanticKind semanticKind = E->GetSemantic()->GetKind(); + DXIL::SemanticKind SemanticKind = E->GetSemantic()->GetKind(); ValidateSignatureElement(*E, ValCtx); - // Avoid OOB indexing on streamId. - unsigned streamId = E->GetOutputStream(); - if (streamId >= DXIL::kNumOutputStreams || !isOutput || + // Avoid OOB indexing on StreamId. + unsigned StreamId = E->GetOutputStream(); + if (StreamId >= DXIL::kNumOutputStreams || !IsOutput || !ValCtx.DxilMod.GetShaderModel()->IsGS()) { - streamId = 0; + StreamId = 0; } // Semantic index overlap check, keyed by name. - std::string nameUpper(E->GetName()); - std::transform(nameUpper.begin(), nameUpper.end(), nameUpper.begin(), + std::string NameUpper(E->GetName()); + std::transform(NameUpper.begin(), NameUpper.end(), NameUpper.begin(), ::toupper); - unordered_set &semIdxSet = semanticIndexMap[streamId][nameUpper]; - for (unsigned semIdx : E->GetSemanticIndexVec()) { - if (semIdxSet.count(semIdx) > 0) { + unordered_set &SemIdxSet = SemanticIndexMap[StreamId][NameUpper]; + for (unsigned SemIdx : E->GetSemanticIndexVec()) { + if (SemIdxSet.count(SemIdx) > 0) { ValCtx.EmitFormatError(ValidationRule::MetaNoSemanticOverlap, - {E->GetName(), std::to_string(semIdx)}); + {E->GetName(), std::to_string(SemIdx)}); return; } else - semIdxSet.insert(semIdx); + SemIdxSet.insert(SemIdx); } // SV_Target has special rules - if (semanticKind == DXIL::SemanticKind::Target) { + if (SemanticKind == DXIL::SemanticKind::Target) { // Validate target overlap if (E->GetStartRow() + E->GetRows() <= 8) { - unsigned mask = ((1 << E->GetRows()) - 1) << E->GetStartRow(); - if (TargetMask & mask) { + unsigned Mask = ((1 << E->GetRows()) - 1) << E->GetStartRow(); + if (TargetMask & Mask) { ValCtx.EmitFormatError( ValidationRule::MetaNoSemanticOverlap, {"SV_Target", std::to_string(E->GetStartRow())}); } - TargetMask = TargetMask | mask; + TargetMask = TargetMask | Mask; } if (E->GetRows() > 1) { ValCtx.EmitSignatureError(E.get(), ValidationRule::SmNoPSOutputIdx); @@ -4516,19 +4516,19 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S, continue; // validate system value semantic rules - switch (semanticKind) { + switch (SemanticKind) { case DXIL::SemanticKind::Arbitrary: break; case DXIL::SemanticKind::ClipDistance: case DXIL::SemanticKind::CullDistance: // Validate max 8 components across 2 rows (registers) - for (unsigned rowIdx = 0; rowIdx < E->GetRows(); rowIdx++) - clipcullRowSet[streamId].insert(E->GetStartRow() + rowIdx); - if (clipcullRowSet[streamId].size() > 2) { + for (unsigned RowIdx = 0; RowIdx < E->GetRows(); RowIdx++) + ClipcullRowSet[StreamId].insert(E->GetStartRow() + RowIdx); + if (ClipcullRowSet[StreamId].size() > 2) { ValCtx.EmitSignatureError(E.get(), ValidationRule::MetaClipCullMaxRows); } - clipcullComponents[streamId] += E->GetCols(); - if (clipcullComponents[streamId] > 8) { + ClipcullComponents[StreamId] += E->GetCols(); + if (ClipcullComponents[StreamId] > 8) { ValCtx.EmitSignatureError(E.get(), ValidationRule::MetaClipCullMaxComponents); } @@ -4540,58 +4540,58 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S, ValCtx.EmitSignatureError(E.get(), ValidationRule::SmPSMultipleDepthSemantic); } - DepthKind = semanticKind; + DepthKind = SemanticKind; break; case DXIL::SemanticKind::Barycentrics: { // There can only be up to two SV_Barycentrics // with differeent perspective interpolation modes. - if (numBarycentrics++ > 1) { + if (NumBarycentrics++ > 1) { ValCtx.EmitSignatureError( E.get(), ValidationRule::MetaBarycentricsTwoPerspectives); break; } - const InterpolationMode *mode = E->GetInterpolationMode(); - if (prevBaryInterpMode) { - if ((mode->IsAnyNoPerspective() && - prevBaryInterpMode->IsAnyNoPerspective()) || - (!mode->IsAnyNoPerspective() && - !prevBaryInterpMode->IsAnyNoPerspective())) { + const InterpolationMode *Mode = E->GetInterpolationMode(); + if (PrevBaryInterpMode) { + if ((Mode->IsAnyNoPerspective() && + PrevBaryInterpMode->IsAnyNoPerspective()) || + (!Mode->IsAnyNoPerspective() && + !PrevBaryInterpMode->IsAnyNoPerspective())) { ValCtx.EmitSignatureError( E.get(), ValidationRule::MetaBarycentricsTwoPerspectives); } } - prevBaryInterpMode = mode; + PrevBaryInterpMode = Mode; break; } default: - if (semanticUsageSet[streamId].count( - static_cast(semanticKind)) > 0) { + if (SemanticUsageSet[StreamId].count( + static_cast(SemanticKind)) > 0) { ValCtx.EmitFormatError(ValidationRule::MetaDuplicateSysValue, {E->GetSemantic()->GetName()}); } - semanticUsageSet[streamId].insert(static_cast(semanticKind)); + SemanticUsageSet[StreamId].insert(static_cast(SemanticKind)); break; } // Packed element overlap check. - ValidateSignatureOverlap(*E.get(), maxScalars, allocator[streamId], ValCtx); + ValidateSignatureOverlap(*E.get(), MaxScalars, Allocator[StreamId], ValCtx); - if (isOutput && semanticKind == DXIL::SemanticKind::Position) { + if (IsOutput && SemanticKind == DXIL::SemanticKind::Position) { Status.hasOutputPosition[E->GetOutputStream()] = true; } } if (Status.hasViewID && S.IsInput() && ValCtx.DxilMod.GetShaderModel()->GetKind() == DXIL::ShaderKind::Pixel) { - // Ensure sufficient space for ViewID: - DxilSignatureAllocator::DummyElement viewID; - viewID.rows = 1; - viewID.cols = 1; - viewID.kind = DXIL::SemanticKind::Arbitrary; - viewID.interpolation = DXIL::InterpolationMode::Constant; - viewID.interpretation = DXIL::SemanticInterpretationKind::SGV; - allocator[0].PackNext(&viewID, 0, 32); - if (!viewID.IsAllocated()) { + // Ensure sufficient space for ViewId: + DxilSignatureAllocator::DummyElement ViewId; + ViewId.rows = 1; + ViewId.cols = 1; + ViewId.kind = DXIL::SemanticKind::Arbitrary; + ViewId.interpolation = DXIL::InterpolationMode::Constant; + ViewId.interpretation = DXIL::SemanticInterpretationKind::SGV; + Allocator[0].PackNext(&ViewId, 0, 32); + if (!ViewId.IsAllocated()) { ValCtx.EmitError(ValidationRule::SmViewIDNeedsSlot); } } @@ -4616,12 +4616,12 @@ static void ValidateConstantInterpModeSignature(ValidationContext &ValCtx, } static void ValidateEntrySignatures(ValidationContext &ValCtx, - const DxilEntryProps &entryProps, + const DxilEntryProps &EntryProps, EntryStatus &Status, Function &F) { - const DxilFunctionProps &props = entryProps.props; - const DxilEntrySignature &S = entryProps.sig; + const DxilFunctionProps &Props = EntryProps.props; + const DxilEntrySignature &S = EntryProps.sig; - if (props.IsRay()) { + if (Props.IsRay()) { // No signatures allowed if (!S.InputSignature.GetElements().empty() || !S.OutputSignature.GetElements().empty() || @@ -4631,62 +4631,62 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, } // Validate payload/attribute/params sizes - unsigned payloadSize = 0; - unsigned attrSize = 0; - auto itPayload = F.arg_begin(); - auto itAttr = itPayload; - if (itAttr != F.arg_end()) - itAttr++; + unsigned PayloadSize = 0; + unsigned AttrSize = 0; + auto ItPayload = F.arg_begin(); + auto ItAttr = ItPayload; + if (ItAttr != F.arg_end()) + ItAttr++; DataLayout DL(F.getParent()); - switch (props.shaderKind) { + switch (Props.shaderKind) { case DXIL::ShaderKind::AnyHit: case DXIL::ShaderKind::ClosestHit: - if (itAttr != F.arg_end()) { - Type *Ty = itAttr->getType(); + if (ItAttr != F.arg_end()) { + Type *Ty = ItAttr->getType(); if (Ty->isPointerTy()) Ty = Ty->getPointerElementType(); - attrSize = + AttrSize = (unsigned)std::min(DL.getTypeAllocSize(Ty), (uint64_t)UINT_MAX); } LLVM_FALLTHROUGH; case DXIL::ShaderKind::Miss: case DXIL::ShaderKind::Callable: - if (itPayload != F.arg_end()) { - Type *Ty = itPayload->getType(); + if (ItPayload != F.arg_end()) { + Type *Ty = ItPayload->getType(); if (Ty->isPointerTy()) Ty = Ty->getPointerElementType(); - payloadSize = + PayloadSize = (unsigned)std::min(DL.getTypeAllocSize(Ty), (uint64_t)UINT_MAX); } break; } - if (props.ShaderProps.Ray.payloadSizeInBytes < payloadSize) { + if (Props.ShaderProps.Ray.payloadSizeInBytes < PayloadSize) { ValCtx.EmitFnFormatError( &F, ValidationRule::SmRayShaderPayloadSize, - {F.getName(), props.IsCallable() ? "params" : "payload"}); + {F.getName(), Props.IsCallable() ? "params" : "payload"}); } - if (props.ShaderProps.Ray.attributeSizeInBytes < attrSize) { + if (Props.ShaderProps.Ray.attributeSizeInBytes < AttrSize) { ValCtx.EmitFnFormatError(&F, ValidationRule::SmRayShaderPayloadSize, {F.getName(), "attribute"}); } return; } - bool isPS = props.IsPS(); - bool isVS = props.IsVS(); - bool isGS = props.IsGS(); - bool isCS = props.IsCS(); - bool isMS = props.IsMS(); + bool IsPs = Props.IsPS(); + bool IsVs = Props.IsVS(); + bool IsGs = Props.IsGS(); + bool IsCs = Props.IsCS(); + bool IsMs = Props.IsMS(); - if (isPS) { + if (IsPs) { // PS output no interp mode. ValidateNoInterpModeSignature(ValCtx, S.OutputSignature); - } else if (isVS) { + } else if (IsVs) { // VS input no interp mode. ValidateNoInterpModeSignature(ValCtx, S.InputSignature); } - if (isMS) { + if (IsMs) { // primitive output constant interp mode. ValidateConstantInterpModeSignature(ValCtx, S.PatchConstOrPrimSignature); } else { @@ -4694,38 +4694,38 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, ValidateNoInterpModeSignature(ValCtx, S.PatchConstOrPrimSignature); } - unsigned maxInputScalars = DXIL::kMaxInputTotalScalars; - unsigned maxOutputScalars = 0; - unsigned maxPatchConstantScalars = 0; + unsigned MaxInputScalars = DXIL::kMaxInputTotalScalars; + unsigned MaxOutputScalars = 0; + unsigned MaxPatchConstantScalars = 0; - switch (props.shaderKind) { + switch (Props.shaderKind) { case DXIL::ShaderKind::Compute: break; case DXIL::ShaderKind::Vertex: case DXIL::ShaderKind::Geometry: case DXIL::ShaderKind::Pixel: - maxOutputScalars = DXIL::kMaxOutputTotalScalars; + MaxOutputScalars = DXIL::kMaxOutputTotalScalars; break; case DXIL::ShaderKind::Hull: case DXIL::ShaderKind::Domain: - maxOutputScalars = DXIL::kMaxOutputTotalScalars; - maxPatchConstantScalars = DXIL::kMaxHSOutputPatchConstantTotalScalars; + MaxOutputScalars = DXIL::kMaxOutputTotalScalars; + MaxPatchConstantScalars = DXIL::kMaxHSOutputPatchConstantTotalScalars; break; case DXIL::ShaderKind::Mesh: - maxOutputScalars = DXIL::kMaxOutputTotalScalars; - maxPatchConstantScalars = DXIL::kMaxOutputTotalScalars; + MaxOutputScalars = DXIL::kMaxOutputTotalScalars; + MaxPatchConstantScalars = DXIL::kMaxOutputTotalScalars; break; case DXIL::ShaderKind::Amplification: default: break; } - ValidateSignature(ValCtx, S.InputSignature, Status, maxInputScalars); - ValidateSignature(ValCtx, S.OutputSignature, Status, maxOutputScalars); + ValidateSignature(ValCtx, S.InputSignature, Status, MaxInputScalars); + ValidateSignature(ValCtx, S.OutputSignature, Status, MaxOutputScalars); ValidateSignature(ValCtx, S.PatchConstOrPrimSignature, Status, - maxPatchConstantScalars); + MaxPatchConstantScalars); - if (isPS) { + if (IsPs) { // Gather execution information. hlsl::PSExecutionInfo PSExec; DxilSignatureElement *PosInterpSE = nullptr; @@ -4767,10 +4767,10 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, } // Validate PS output semantic. - const DxilSignature &outputSig = S.OutputSignature; - for (auto &SE : outputSig.GetElements()) { - Semantic::Kind semanticKind = SE->GetSemantic()->GetKind(); - switch (semanticKind) { + const DxilSignature &OutputSig = S.OutputSignature; + for (auto &SE : OutputSig.GetElements()) { + Semantic::Kind SemanticKind = SE->GetSemantic()->GetKind(); + switch (SemanticKind) { case Semantic::Kind::Target: case Semantic::Kind::Coverage: case Semantic::Kind::Depth: @@ -4786,24 +4786,24 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, } } - if (isGS) { - unsigned maxVertexCount = props.ShaderProps.GS.maxVertexCount; - unsigned outputScalarCount = 0; - const DxilSignature &outSig = S.OutputSignature; - for (auto &SE : outSig.GetElements()) { - outputScalarCount += SE->GetRows() * SE->GetCols(); + if (IsGs) { + unsigned MaxVertexCount = Props.ShaderProps.GS.maxVertexCount; + unsigned OutputScalarCount = 0; + const DxilSignature &OutSig = S.OutputSignature; + for (auto &SE : OutSig.GetElements()) { + OutputScalarCount += SE->GetRows() * SE->GetCols(); } - unsigned totalOutputScalars = maxVertexCount * outputScalarCount; - if (totalOutputScalars > DXIL::kMaxGSOutputTotalScalars) { + unsigned TotalOutputScalars = MaxVertexCount * OutputScalarCount; + if (TotalOutputScalars > DXIL::kMaxGSOutputTotalScalars) { ValCtx.EmitFnFormatError( &F, ValidationRule::SmGSTotalOutputVertexDataRange, - {std::to_string(maxVertexCount), std::to_string(outputScalarCount), - std::to_string(totalOutputScalars), + {std::to_string(MaxVertexCount), std::to_string(OutputScalarCount), + std::to_string(TotalOutputScalars), std::to_string(DXIL::kMaxGSOutputTotalScalars)}); } } - if (isCS) { + if (IsCs) { if (!S.InputSignature.GetElements().empty() || !S.OutputSignature.GetElements().empty() || !S.PatchConstOrPrimSignature.GetElements().empty()) { @@ -4811,7 +4811,7 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, } } - if (isMS) { + if (IsMs) { unsigned VertexSignatureRows = S.OutputSignature.GetRowCount(); if (VertexSignatureRows > DXIL::kMaxMSVSigRows) { ValCtx.EmitFnFormatError( @@ -4833,31 +4833,31 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx, const unsigned kScalarSizeForMSAttributes = 4; #define ALIGN32(n) (((n) + 31) & ~31) - unsigned maxAlign32VertexCount = - ALIGN32(props.ShaderProps.MS.maxVertexCount); - unsigned maxAlign32PrimitiveCount = - ALIGN32(props.ShaderProps.MS.maxPrimitiveCount); - unsigned totalOutputScalars = 0; + unsigned MaxAlign32VertexCount = + ALIGN32(Props.ShaderProps.MS.maxVertexCount); + unsigned MaxAlign32PrimitiveCount = + ALIGN32(Props.ShaderProps.MS.maxPrimitiveCount); + unsigned TotalOutputScalars = 0; for (auto &SE : S.OutputSignature.GetElements()) { - totalOutputScalars += - SE->GetRows() * SE->GetCols() * maxAlign32VertexCount; + TotalOutputScalars += + SE->GetRows() * SE->GetCols() * MaxAlign32VertexCount; } for (auto &SE : S.PatchConstOrPrimSignature.GetElements()) { - totalOutputScalars += - SE->GetRows() * SE->GetCols() * maxAlign32PrimitiveCount; + TotalOutputScalars += + SE->GetRows() * SE->GetCols() * MaxAlign32PrimitiveCount; } - if (totalOutputScalars * kScalarSizeForMSAttributes > + if (TotalOutputScalars * kScalarSizeForMSAttributes > DXIL::kMaxMSOutputTotalBytes) { ValCtx.EmitFnFormatError( &F, ValidationRule::SmMeshShaderOutputSize, {F.getName(), std::to_string(DXIL::kMaxMSOutputTotalBytes)}); } - unsigned totalInputOutputBytes = - totalOutputScalars * kScalarSizeForMSAttributes + - props.ShaderProps.MS.payloadSizeInBytes; - if (totalInputOutputBytes > DXIL::kMaxMSInputOutputTotalBytes) { + unsigned TotalInputOutputBytes = + TotalOutputScalars * kScalarSizeForMSAttributes + + Props.ShaderProps.MS.payloadSizeInBytes; + if (TotalInputOutputBytes > DXIL::kMaxMSInputOutputTotalBytes) { ValCtx.EmitFnFormatError( &F, ValidationRule::SmMeshShaderInOutSize, {F.getName(), std::to_string(DXIL::kMaxMSInputOutputTotalBytes)}); @@ -4870,9 +4870,9 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) { if (ValCtx.isLibProfile) { for (Function &F : DM.GetModule()->functions()) { if (DM.HasDxilEntryProps(&F)) { - DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F); EntryStatus &Status = ValCtx.GetEntryStatus(&F); - ValidateEntrySignatures(ValCtx, entryProps, Status, F); + ValidateEntrySignatures(ValCtx, EntryProps, Status, F); } } } else { @@ -4883,8 +4883,8 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) { return; } EntryStatus &Status = ValCtx.GetEntryStatus(Entry); - DxilEntryProps &entryProps = DM.GetDxilEntryProps(Entry); - ValidateEntrySignatures(ValCtx, entryProps, Status, *Entry); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(Entry); + ValidateEntrySignatures(ValCtx, EntryProps, Status, *Entry); } } @@ -4893,14 +4893,14 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) { struct CompatibilityChecker { ValidationContext &ValCtx; Function *EntryFn; - const DxilFunctionProps &props; - DXIL::ShaderKind shaderKind; + const DxilFunctionProps &Props; + DXIL::ShaderKind ShaderKind; // These masks identify the potential conflict flags based on the entry // function's shader kind and properties when either UsesDerivatives or // RequiresGroup flags are set in ShaderCompatInfo. - uint32_t maskForDeriv = 0; - uint32_t maskForGroup = 0; + uint32_t MaskForDeriv = 0; + uint32_t MaskForGroup = 0; enum class ConflictKind : uint32_t { Stage, @@ -4922,77 +4922,77 @@ struct CompatibilityChecker { CompatibilityChecker(ValidationContext &ValCtx, Function *EntryFn) : ValCtx(ValCtx), EntryFn(EntryFn), - props(ValCtx.DxilMod.GetDxilEntryProps(EntryFn).props), - shaderKind(props.shaderKind) { + Props(ValCtx.DxilMod.GetDxilEntryProps(EntryFn).props), + ShaderKind(Props.shaderKind) { // Precompute potential incompatibilities based on shader stage, shader kind // and entry attributes. These will turn into full conflicts if the entry // point's shader flags indicate that they use relevant features. if (!ValCtx.DxilMod.GetShaderModel()->IsSM66Plus() && - (shaderKind == DXIL::ShaderKind::Mesh || - shaderKind == DXIL::ShaderKind::Amplification || - shaderKind == DXIL::ShaderKind::Compute)) { - maskForDeriv |= + (ShaderKind == DXIL::ShaderKind::Mesh || + ShaderKind == DXIL::ShaderKind::Amplification || + ShaderKind == DXIL::ShaderKind::Compute)) { + MaskForDeriv |= static_cast(ConflictFlags::DerivInComputeShaderModel); - } else if (shaderKind == DXIL::ShaderKind::Node) { + } else if (ShaderKind == DXIL::ShaderKind::Node) { // Only broadcasting launch supports derivatives. - if (props.Node.LaunchType != DXIL::NodeLaunchType::Broadcasting) - maskForDeriv |= static_cast(ConflictFlags::DerivLaunch); + if (Props.Node.LaunchType != DXIL::NodeLaunchType::Broadcasting) + MaskForDeriv |= static_cast(ConflictFlags::DerivLaunch); // Thread launch node has no group. - if (props.Node.LaunchType == DXIL::NodeLaunchType::Thread) - maskForGroup |= static_cast(ConflictFlags::RequiresGroup); + if (Props.Node.LaunchType == DXIL::NodeLaunchType::Thread) + MaskForGroup |= static_cast(ConflictFlags::RequiresGroup); } - if (shaderKind == DXIL::ShaderKind::Mesh || - shaderKind == DXIL::ShaderKind::Amplification || - shaderKind == DXIL::ShaderKind::Compute || - shaderKind == DXIL::ShaderKind::Node) { + if (ShaderKind == DXIL::ShaderKind::Mesh || + ShaderKind == DXIL::ShaderKind::Amplification || + ShaderKind == DXIL::ShaderKind::Compute || + ShaderKind == DXIL::ShaderKind::Node) { // All compute-like stages // Thread dimensions must be either 1D and X is multiple of 4, or 2D // and X and Y must be multiples of 2. - if (props.numThreads[1] == 1 && props.numThreads[2] == 1) { - if ((props.numThreads[0] & 0x3) != 0) - maskForDeriv |= + if (Props.numThreads[1] == 1 && Props.numThreads[2] == 1) { + if ((Props.numThreads[0] & 0x3) != 0) + MaskForDeriv |= static_cast(ConflictFlags::DerivThreadGroupDim); - } else if ((props.numThreads[0] & 0x1) || (props.numThreads[1] & 0x1)) - maskForDeriv |= + } else if ((Props.numThreads[0] & 0x1) || (Props.numThreads[1] & 0x1)) + MaskForDeriv |= static_cast(ConflictFlags::DerivThreadGroupDim); } else { // other stages have no group - maskForGroup |= static_cast(ConflictFlags::RequiresGroup); + MaskForGroup |= static_cast(ConflictFlags::RequiresGroup); } } uint32_t - IdentifyConflict(const DxilModule::ShaderCompatInfo &compatInfo) const { - uint32_t conflictMask = 0; + IdentifyConflict(const DxilModule::ShaderCompatInfo &CompatInfo) const { + uint32_t ConflictMask = 0; // Compatibility check said this shader kind is not compatible. - if (0 == ((1 << (uint32_t)shaderKind) & compatInfo.mask)) - conflictMask |= (uint32_t)ConflictFlags::Stage; + if (0 == ((1 << (uint32_t)ShaderKind) & CompatInfo.mask)) + ConflictMask |= (uint32_t)ConflictFlags::Stage; // Compatibility check said this shader model is not compatible. if (DXIL::CompareVersions(ValCtx.DxilMod.GetShaderModel()->GetMajor(), ValCtx.DxilMod.GetShaderModel()->GetMinor(), - compatInfo.minMajor, compatInfo.minMinor) < 0) - conflictMask |= (uint32_t)ConflictFlags::ShaderModel; + CompatInfo.minMajor, CompatInfo.minMinor) < 0) + ConflictMask |= (uint32_t)ConflictFlags::ShaderModel; - if (compatInfo.shaderFlags.GetUsesDerivatives()) - conflictMask |= maskForDeriv; + if (CompatInfo.shaderFlags.GetUsesDerivatives()) + ConflictMask |= MaskForDeriv; - if (compatInfo.shaderFlags.GetRequiresGroup()) - conflictMask |= maskForGroup; + if (CompatInfo.shaderFlags.GetRequiresGroup()) + ConflictMask |= MaskForGroup; - return conflictMask; + return ConflictMask; } - void Diagnose(Function *F, uint32_t conflictMask, ConflictKind conflict, - ValidationRule rule, ArrayRef args = {}) { - if (conflictMask & (1 << (unsigned)conflict)) - ValCtx.EmitFnFormatError(F, rule, args); + void Diagnose(Function *F, uint32_t ConflictMask, ConflictKind Conflict, + ValidationRule Rule, ArrayRef Args = {}) { + if (ConflictMask & (1 << (unsigned)Conflict)) + ValCtx.EmitFnFormatError(F, Rule, Args); } - void DiagnoseConflicts(Function *F, uint32_t conflictMask) { + void DiagnoseConflicts(Function *F, uint32_t ConflictMask) { // Emit a diagnostic indicating that either the entry function or a function // called by the entry function contains a disallowed operation. if (F == EntryFn) @@ -5001,22 +5001,22 @@ struct CompatibilityChecker { ValCtx.EmitFnError(EntryFn, ValidationRule::SmIncompatibleCallInEntry); // Emit diagnostics for each conflict found in this function. - Diagnose(F, conflictMask, ConflictKind::Stage, + Diagnose(F, ConflictMask, ConflictKind::Stage, ValidationRule::SmIncompatibleStage, - {ShaderModel::GetKindName(props.shaderKind)}); - Diagnose(F, conflictMask, ConflictKind::ShaderModel, + {ShaderModel::GetKindName(Props.shaderKind)}); + Diagnose(F, ConflictMask, ConflictKind::ShaderModel, ValidationRule::SmIncompatibleShaderModel); - Diagnose(F, conflictMask, ConflictKind::DerivLaunch, + Diagnose(F, ConflictMask, ConflictKind::DerivLaunch, ValidationRule::SmIncompatibleDerivLaunch, - {GetLaunchTypeStr(props.Node.LaunchType)}); - Diagnose(F, conflictMask, ConflictKind::DerivThreadGroupDim, + {GetLaunchTypeStr(Props.Node.LaunchType)}); + Diagnose(F, ConflictMask, ConflictKind::DerivThreadGroupDim, ValidationRule::SmIncompatibleThreadGroupDim, - {std::to_string(props.numThreads[0]), - std::to_string(props.numThreads[1]), - std::to_string(props.numThreads[2])}); - Diagnose(F, conflictMask, ConflictKind::DerivInComputeShaderModel, + {std::to_string(Props.numThreads[0]), + std::to_string(Props.numThreads[1]), + std::to_string(Props.numThreads[2])}); + Diagnose(F, ConflictMask, ConflictKind::DerivInComputeShaderModel, ValidationRule::SmIncompatibleDerivInComputeShaderModel); - Diagnose(F, conflictMask, ConflictKind::RequiresGroup, + Diagnose(F, ConflictMask, ConflictKind::RequiresGroup, ValidationRule::SmIncompatibleRequiresGroup); } @@ -5025,59 +5025,59 @@ struct CompatibilityChecker { // functions called by that function introduced the conflict. // In those cases, the called functions themselves will emit the diagnostic. // Return conflict mask for this function. - uint32_t Visit(Function *F, uint32_t &remainingMask, - llvm::SmallPtrSet &visited, CallGraph &CG) { + uint32_t Visit(Function *F, uint32_t &RemainingMask, + llvm::SmallPtrSet &Visited, CallGraph &CG) { // Recursive check looks for where a conflict is found and not present // in functions called by the current function. // - When a source is found, emit diagnostics and clear the conflict // flags introduced by this function from the working mask so we don't // report this conflict again. - // - When the remainingMask is 0, we are done. + // - When the RemainingMask is 0, we are done. - if (remainingMask == 0) + if (RemainingMask == 0) return 0; // Nothing left to search for. - if (!visited.insert(F).second) + if (!Visited.insert(F).second) return 0; // Already visited. - const DxilModule::ShaderCompatInfo *compatInfo = + const DxilModule::ShaderCompatInfo *CompatInfo = ValCtx.DxilMod.GetCompatInfoForFunction(F); - DXASSERT(compatInfo, "otherwise, compat info not computed in module"); - if (!compatInfo) + DXASSERT(CompatInfo, "otherwise, compat info not computed in module"); + if (!CompatInfo) return 0; - uint32_t maskForThisFunction = IdentifyConflict(*compatInfo); + uint32_t MaskForThisFunction = IdentifyConflict(*CompatInfo); - uint32_t maskForCalls = 0; + uint32_t MaskForCalls = 0; if (CallGraphNode *CGNode = CG[F]) { for (auto &Call : *CGNode) { Function *called = Call.second->getFunction(); if (called->isDeclaration()) continue; - maskForCalls |= Visit(called, remainingMask, visited, CG); - if (remainingMask == 0) + MaskForCalls |= Visit(called, RemainingMask, Visited, CG); + if (RemainingMask == 0) return 0; // Nothing left to search for. } } // Mask of incompatibilities introduced by this function. - uint32_t conflictsIntroduced = - remainingMask & maskForThisFunction & ~maskForCalls; - if (conflictsIntroduced) { + uint32_t ConflictsIntroduced = + RemainingMask & MaskForThisFunction & ~MaskForCalls; + if (ConflictsIntroduced) { // This function introduces at least one conflict. - DiagnoseConflicts(F, conflictsIntroduced); + DiagnoseConflicts(F, ConflictsIntroduced); // Mask off diagnosed incompatibilities. - remainingMask &= ~conflictsIntroduced; + RemainingMask &= ~ConflictsIntroduced; } - return maskForThisFunction; + return MaskForThisFunction; } - void FindIncompatibleCall(const DxilModule::ShaderCompatInfo &compatInfo) { - uint32_t conflictMask = IdentifyConflict(compatInfo); - if (conflictMask == 0) + void FindIncompatibleCall(const DxilModule::ShaderCompatInfo &CompatInfo) { + uint32_t ConflictMask = IdentifyConflict(CompatInfo); + if (ConflictMask == 0) return; CallGraph &CG = ValCtx.GetCallGraph(); - llvm::SmallPtrSet visited; - Visit(EntryFn, conflictMask, visited, CG); + llvm::SmallPtrSet Visited; + Visit(EntryFn, ConflictMask, Visited, CG); } }; @@ -5086,14 +5086,14 @@ static void ValidateEntryCompatibility(ValidationContext &ValCtx) { DxilModule &DM = ValCtx.DxilMod; for (Function &F : DM.GetModule()->functions()) { if (DM.HasDxilEntryProps(&F)) { - const DxilModule::ShaderCompatInfo *compatInfo = + const DxilModule::ShaderCompatInfo *CompatInfo = DM.GetCompatInfoForFunction(&F); - DXASSERT(compatInfo, "otherwise, compat info not computed in module"); - if (!compatInfo) + DXASSERT(CompatInfo, "otherwise, compat info not computed in module"); + if (!CompatInfo) continue; CompatibilityChecker checker(ValCtx, &F); - checker.FindIncompatibleCall(*compatInfo); + checker.FindIncompatibleCall(*CompatInfo); } } } @@ -5101,101 +5101,101 @@ static void ValidateEntryCompatibility(ValidationContext &ValCtx) { static void CheckPatchConstantSemantic(ValidationContext &ValCtx, const DxilEntryProps &EntryProps, EntryStatus &Status, Function *F) { - const DxilFunctionProps &props = EntryProps.props; - bool isHS = props.IsHS(); + const DxilFunctionProps &Props = EntryProps.props; + bool IsHs = Props.IsHS(); - DXIL::TessellatorDomain domain = - isHS ? props.ShaderProps.HS.domain : props.ShaderProps.DS.domain; + DXIL::TessellatorDomain Domain = + IsHs ? Props.ShaderProps.HS.domain : Props.ShaderProps.DS.domain; - const DxilSignature &patchConstantSig = + const DxilSignature &PatchConstantSig = EntryProps.sig.PatchConstOrPrimSignature; - const unsigned kQuadEdgeSize = 4; - const unsigned kQuadInsideSize = 2; - const unsigned kQuadDomainLocSize = 2; + const unsigned KQuadEdgeSize = 4; + const unsigned KQuadInsideSize = 2; + const unsigned KQuadDomainLocSize = 2; - const unsigned kTriEdgeSize = 3; - const unsigned kTriInsideSize = 1; - const unsigned kTriDomainLocSize = 3; + const unsigned KTriEdgeSize = 3; + const unsigned KTriInsideSize = 1; + const unsigned KTriDomainLocSize = 3; - const unsigned kIsolineEdgeSize = 2; - const unsigned kIsolineInsideSize = 0; - const unsigned kIsolineDomainLocSize = 3; + const unsigned KIsolineEdgeSize = 2; + const unsigned KIsolineInsideSize = 0; + const unsigned KIsolineDomainLocSize = 3; - const char *domainName = ""; + const char *DomainName = ""; DXIL::SemanticKind kEdgeSemantic = DXIL::SemanticKind::TessFactor; - unsigned edgeSize = 0; + unsigned EdgeSize = 0; DXIL::SemanticKind kInsideSemantic = DXIL::SemanticKind::InsideTessFactor; - unsigned insideSize = 0; + unsigned InsideSize = 0; Status.domainLocSize = 0; - switch (domain) { + switch (Domain) { case DXIL::TessellatorDomain::IsoLine: - domainName = "IsoLine"; - edgeSize = kIsolineEdgeSize; - insideSize = kIsolineInsideSize; - Status.domainLocSize = kIsolineDomainLocSize; + DomainName = "IsoLine"; + EdgeSize = KIsolineEdgeSize; + InsideSize = KIsolineInsideSize; + Status.domainLocSize = KIsolineDomainLocSize; break; case DXIL::TessellatorDomain::Tri: - domainName = "Tri"; - edgeSize = kTriEdgeSize; - insideSize = kTriInsideSize; - Status.domainLocSize = kTriDomainLocSize; + DomainName = "Tri"; + EdgeSize = KTriEdgeSize; + InsideSize = KTriInsideSize; + Status.domainLocSize = KTriDomainLocSize; break; case DXIL::TessellatorDomain::Quad: - domainName = "Quad"; - edgeSize = kQuadEdgeSize; - insideSize = kQuadInsideSize; - Status.domainLocSize = kQuadDomainLocSize; + DomainName = "Quad"; + EdgeSize = KQuadEdgeSize; + InsideSize = KQuadInsideSize; + Status.domainLocSize = KQuadDomainLocSize; break; default: // Don't bother with other tests if domain is invalid return; } - bool bFoundEdgeSemantic = false; - bool bFoundInsideSemantic = false; - for (auto &SE : patchConstantSig.GetElements()) { - Semantic::Kind kind = SE->GetSemantic()->GetKind(); - if (kind == kEdgeSemantic) { - bFoundEdgeSemantic = true; - if (SE->GetRows() != edgeSize || SE->GetCols() > 1) { + bool FoundEdgeSemantic = false; + bool FoundInsideSemantic = false; + for (auto &SE : PatchConstantSig.GetElements()) { + Semantic::Kind Kind = SE->GetSemantic()->GetKind(); + if (Kind == kEdgeSemantic) { + FoundEdgeSemantic = true; + if (SE->GetRows() != EdgeSize || SE->GetCols() > 1) { ValCtx.EmitFnFormatError(F, ValidationRule::SmTessFactorSizeMatchDomain, {std::to_string(SE->GetRows()), - std::to_string(SE->GetCols()), domainName, - std::to_string(edgeSize)}); + std::to_string(SE->GetCols()), DomainName, + std::to_string(EdgeSize)}); } - } else if (kind == kInsideSemantic) { - bFoundInsideSemantic = true; - if (SE->GetRows() != insideSize || SE->GetCols() > 1) { + } else if (Kind == kInsideSemantic) { + FoundInsideSemantic = true; + if (SE->GetRows() != InsideSize || SE->GetCols() > 1) { ValCtx.EmitFnFormatError( F, ValidationRule::SmInsideTessFactorSizeMatchDomain, {std::to_string(SE->GetRows()), std::to_string(SE->GetCols()), - domainName, std::to_string(insideSize)}); + DomainName, std::to_string(InsideSize)}); } } } - if (isHS) { - if (!bFoundEdgeSemantic) { + if (IsHs) { + if (!FoundEdgeSemantic) { ValCtx.EmitFnError(F, ValidationRule::SmTessFactorForDomain); } - if (!bFoundInsideSemantic && domain != DXIL::TessellatorDomain::IsoLine) { + if (!FoundInsideSemantic && Domain != DXIL::TessellatorDomain::IsoLine) { ValCtx.EmitFnError(F, ValidationRule::SmTessFactorForDomain); } } } static void ValidatePassThruHS(ValidationContext &ValCtx, - const DxilEntryProps &entryProps, Function *F) { + const DxilEntryProps &EntryProps, Function *F) { // Check pass thru HS. if (F->isDeclaration()) { - const auto &props = entryProps.props; - if (props.IsHS()) { - const auto &HS = props.ShaderProps.HS; + const auto &Props = EntryProps.props; + if (Props.IsHS()) { + const auto &HS = Props.ShaderProps.HS; if (HS.inputControlPoints < HS.outputControlPoints) { ValCtx.EmitFnError( F, ValidationRule::SmHullPassThruControlPointCountMatch); @@ -5203,12 +5203,12 @@ static void ValidatePassThruHS(ValidationContext &ValCtx, // Check declared control point outputs storage amounts are ok to pass // through (less output storage than input for control points). - const DxilSignature &outSig = entryProps.sig.OutputSignature; - unsigned totalOutputCPScalars = 0; - for (auto &SE : outSig.GetElements()) { - totalOutputCPScalars += SE->GetRows() * SE->GetCols(); + const DxilSignature &OutSig = EntryProps.sig.OutputSignature; + unsigned TotalOutputCpScalars = 0; + for (auto &SE : OutSig.GetElements()) { + TotalOutputCpScalars += SE->GetRows() * SE->GetCols(); } - if (totalOutputCPScalars * HS.outputControlPoints > + if (TotalOutputCpScalars * HS.outputControlPoints > DXIL::kMaxHSOutputControlPointsTotalScalars) { ValCtx.EmitFnError(F, ValidationRule::SmOutputControlPointsTotalScalars); @@ -5223,35 +5223,35 @@ static void ValidatePassThruHS(ValidationContext &ValCtx, // validate wave size (currently allowed only on CS and node shaders but might // be supported on other shader types in the future) static void ValidateWaveSize(ValidationContext &ValCtx, - const DxilEntryProps &entryProps, Function *F) { - const DxilFunctionProps &props = entryProps.props; - const hlsl::DxilWaveSize &waveSize = props.WaveSize; + const DxilEntryProps &EntryProps, Function *F) { + const DxilFunctionProps &Props = EntryProps.props; + const hlsl::DxilWaveSize &WaveSize = Props.WaveSize; - switch (waveSize.Validate()) { + switch (WaveSize.Validate()) { case hlsl::DxilWaveSize::ValidationResult::Success: break; case hlsl::DxilWaveSize::ValidationResult::InvalidMin: ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue, - {"Min", std::to_string(waveSize.Min), + {"Min", std::to_string(WaveSize.Min), std::to_string(DXIL::kMinWaveSize), std::to_string(DXIL::kMaxWaveSize)}); break; case hlsl::DxilWaveSize::ValidationResult::InvalidMax: ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue, - {"Max", std::to_string(waveSize.Max), + {"Max", std::to_string(WaveSize.Max), std::to_string(DXIL::kMinWaveSize), std::to_string(DXIL::kMaxWaveSize)}); break; case hlsl::DxilWaveSize::ValidationResult::InvalidPreferred: ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue, - {"Preferred", std::to_string(waveSize.Preferred), + {"Preferred", std::to_string(WaveSize.Preferred), std::to_string(DXIL::kMinWaveSize), std::to_string(DXIL::kMaxWaveSize)}); break; case hlsl::DxilWaveSize::ValidationResult::MaxOrPreferredWhenUndefined: ValCtx.EmitFnFormatError( F, ValidationRule::SmWaveSizeAllZeroWhenUndefined, - {std::to_string(waveSize.Max), std::to_string(waveSize.Preferred)}); + {std::to_string(WaveSize.Max), std::to_string(WaveSize.Preferred)}); break; case hlsl::DxilWaveSize::ValidationResult::MaxEqualsMin: // This case is allowed because users may disable the ErrorDefault warning. @@ -5259,227 +5259,227 @@ static void ValidateWaveSize(ValidationContext &ValCtx, case hlsl::DxilWaveSize::ValidationResult::PreferredWhenNoRange: ValCtx.EmitFnFormatError( F, ValidationRule::SmWaveSizeMaxAndPreferredZeroWhenNoRange, - {std::to_string(waveSize.Max), std::to_string(waveSize.Preferred)}); + {std::to_string(WaveSize.Max), std::to_string(WaveSize.Preferred)}); break; case hlsl::DxilWaveSize::ValidationResult::MaxLessThanMin: ValCtx.EmitFnFormatError( F, ValidationRule::SmWaveSizeMaxGreaterThanMin, - {std::to_string(waveSize.Max), std::to_string(waveSize.Min)}); + {std::to_string(WaveSize.Max), std::to_string(WaveSize.Min)}); break; case hlsl::DxilWaveSize::ValidationResult::PreferredOutOfRange: ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizePreferredInRange, - {std::to_string(waveSize.Preferred), - std::to_string(waveSize.Min), - std::to_string(waveSize.Max)}); + {std::to_string(WaveSize.Preferred), + std::to_string(WaveSize.Min), + std::to_string(WaveSize.Max)}); break; } // Check shader model and kind. - if (waveSize.IsDefined()) { - if (!props.IsCS() && !props.IsNode()) { + if (WaveSize.IsDefined()) { + if (!Props.IsCS() && !Props.IsNode()) { ValCtx.EmitFnError(F, ValidationRule::SmWaveSizeOnComputeOrNode); } } } static void ValidateEntryProps(ValidationContext &ValCtx, - const DxilEntryProps &entryProps, + const DxilEntryProps &EntryProps, EntryStatus &Status, Function *F) { - const DxilFunctionProps &props = entryProps.props; - DXIL::ShaderKind ShaderType = props.shaderKind; + const DxilFunctionProps &Props = EntryProps.props; + DXIL::ShaderKind ShaderType = Props.shaderKind; - ValidateWaveSize(ValCtx, entryProps, F); + ValidateWaveSize(ValCtx, EntryProps, F); - if (ShaderType == DXIL::ShaderKind::Compute || props.IsNode()) { - unsigned x = props.numThreads[0]; - unsigned y = props.numThreads[1]; - unsigned z = props.numThreads[2]; + if (ShaderType == DXIL::ShaderKind::Compute || Props.IsNode()) { + unsigned X = Props.numThreads[0]; + unsigned Y = Props.numThreads[1]; + unsigned Z = Props.numThreads[2]; - unsigned threadsInGroup = x * y * z; + unsigned ThreadsInGroup = X * Y * Z; - if ((x < DXIL::kMinCSThreadGroupX) || (x > DXIL::kMaxCSThreadGroupX)) { + if ((X < DXIL::kMinCSThreadGroupX) || (X > DXIL::kMaxCSThreadGroupX)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"X", std::to_string(x), + {"X", std::to_string(X), std::to_string(DXIL::kMinCSThreadGroupX), std::to_string(DXIL::kMaxCSThreadGroupX)}); } - if ((y < DXIL::kMinCSThreadGroupY) || (y > DXIL::kMaxCSThreadGroupY)) { + if ((Y < DXIL::kMinCSThreadGroupY) || (Y > DXIL::kMaxCSThreadGroupY)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Y", std::to_string(y), + {"Y", std::to_string(Y), std::to_string(DXIL::kMinCSThreadGroupY), std::to_string(DXIL::kMaxCSThreadGroupY)}); } - if ((z < DXIL::kMinCSThreadGroupZ) || (z > DXIL::kMaxCSThreadGroupZ)) { + if ((Z < DXIL::kMinCSThreadGroupZ) || (Z > DXIL::kMaxCSThreadGroupZ)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Z", std::to_string(z), + {"Z", std::to_string(Z), std::to_string(DXIL::kMinCSThreadGroupZ), std::to_string(DXIL::kMaxCSThreadGroupZ)}); } - if (threadsInGroup > DXIL::kMaxCSThreadsPerGroup) { + if (ThreadsInGroup > DXIL::kMaxCSThreadsPerGroup) { ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup, - {std::to_string(threadsInGroup), + {std::to_string(ThreadsInGroup), std::to_string(DXIL::kMaxCSThreadsPerGroup)}); } - // type of threadID, thread group ID take care by DXIL operation overload + // type of ThreadID, thread group ID take care by DXIL operation overload // check. } else if (ShaderType == DXIL::ShaderKind::Mesh) { - const auto &MS = props.ShaderProps.MS; - unsigned x = props.numThreads[0]; - unsigned y = props.numThreads[1]; - unsigned z = props.numThreads[2]; + const auto &MS = Props.ShaderProps.MS; + unsigned X = Props.numThreads[0]; + unsigned Y = Props.numThreads[1]; + unsigned Z = Props.numThreads[2]; - unsigned threadsInGroup = x * y * z; + unsigned ThreadsInGroup = X * Y * Z; - if ((x < DXIL::kMinMSASThreadGroupX) || (x > DXIL::kMaxMSASThreadGroupX)) { + if ((X < DXIL::kMinMSASThreadGroupX) || (X > DXIL::kMaxMSASThreadGroupX)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"X", std::to_string(x), + {"X", std::to_string(X), std::to_string(DXIL::kMinMSASThreadGroupX), std::to_string(DXIL::kMaxMSASThreadGroupX)}); } - if ((y < DXIL::kMinMSASThreadGroupY) || (y > DXIL::kMaxMSASThreadGroupY)) { + if ((Y < DXIL::kMinMSASThreadGroupY) || (Y > DXIL::kMaxMSASThreadGroupY)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Y", std::to_string(y), + {"Y", std::to_string(Y), std::to_string(DXIL::kMinMSASThreadGroupY), std::to_string(DXIL::kMaxMSASThreadGroupY)}); } - if ((z < DXIL::kMinMSASThreadGroupZ) || (z > DXIL::kMaxMSASThreadGroupZ)) { + if ((Z < DXIL::kMinMSASThreadGroupZ) || (Z > DXIL::kMaxMSASThreadGroupZ)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Z", std::to_string(z), + {"Z", std::to_string(Z), std::to_string(DXIL::kMinMSASThreadGroupZ), std::to_string(DXIL::kMaxMSASThreadGroupZ)}); } - if (threadsInGroup > DXIL::kMaxMSASThreadsPerGroup) { + if (ThreadsInGroup > DXIL::kMaxMSASThreadsPerGroup) { ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup, - {std::to_string(threadsInGroup), + {std::to_string(ThreadsInGroup), std::to_string(DXIL::kMaxMSASThreadsPerGroup)}); } - // type of threadID, thread group ID take care by DXIL operation overload + // type of ThreadID, thread group ID take care by DXIL operation overload // check. - unsigned maxVertexCount = MS.maxVertexCount; - if (maxVertexCount > DXIL::kMaxMSOutputVertexCount) { + unsigned MaxVertexCount = MS.maxVertexCount; + if (MaxVertexCount > DXIL::kMaxMSOutputVertexCount) { ValCtx.EmitFnFormatError(F, ValidationRule::SmMeshShaderMaxVertexCount, {std::to_string(DXIL::kMaxMSOutputVertexCount), - std::to_string(maxVertexCount)}); + std::to_string(MaxVertexCount)}); } - unsigned maxPrimitiveCount = MS.maxPrimitiveCount; - if (maxPrimitiveCount > DXIL::kMaxMSOutputPrimitiveCount) { + unsigned MaxPrimitiveCount = MS.maxPrimitiveCount; + if (MaxPrimitiveCount > DXIL::kMaxMSOutputPrimitiveCount) { ValCtx.EmitFnFormatError( F, ValidationRule::SmMeshShaderMaxPrimitiveCount, {std::to_string(DXIL::kMaxMSOutputPrimitiveCount), - std::to_string(maxPrimitiveCount)}); + std::to_string(MaxPrimitiveCount)}); } } else if (ShaderType == DXIL::ShaderKind::Amplification) { - unsigned x = props.numThreads[0]; - unsigned y = props.numThreads[1]; - unsigned z = props.numThreads[2]; + unsigned X = Props.numThreads[0]; + unsigned Y = Props.numThreads[1]; + unsigned Z = Props.numThreads[2]; - unsigned threadsInGroup = x * y * z; + unsigned ThreadsInGroup = X * Y * Z; - if ((x < DXIL::kMinMSASThreadGroupX) || (x > DXIL::kMaxMSASThreadGroupX)) { + if ((X < DXIL::kMinMSASThreadGroupX) || (X > DXIL::kMaxMSASThreadGroupX)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"X", std::to_string(x), + {"X", std::to_string(X), std::to_string(DXIL::kMinMSASThreadGroupX), std::to_string(DXIL::kMaxMSASThreadGroupX)}); } - if ((y < DXIL::kMinMSASThreadGroupY) || (y > DXIL::kMaxMSASThreadGroupY)) { + if ((Y < DXIL::kMinMSASThreadGroupY) || (Y > DXIL::kMaxMSASThreadGroupY)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Y", std::to_string(y), + {"Y", std::to_string(Y), std::to_string(DXIL::kMinMSASThreadGroupY), std::to_string(DXIL::kMaxMSASThreadGroupY)}); } - if ((z < DXIL::kMinMSASThreadGroupZ) || (z > DXIL::kMaxMSASThreadGroupZ)) { + if ((Z < DXIL::kMinMSASThreadGroupZ) || (Z > DXIL::kMaxMSASThreadGroupZ)) { ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange, - {"Z", std::to_string(z), + {"Z", std::to_string(Z), std::to_string(DXIL::kMinMSASThreadGroupZ), std::to_string(DXIL::kMaxMSASThreadGroupZ)}); } - if (threadsInGroup > DXIL::kMaxMSASThreadsPerGroup) { + if (ThreadsInGroup > DXIL::kMaxMSASThreadsPerGroup) { ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup, - {std::to_string(threadsInGroup), + {std::to_string(ThreadsInGroup), std::to_string(DXIL::kMaxMSASThreadsPerGroup)}); } - // type of threadID, thread group ID take care by DXIL operation overload + // type of ThreadID, thread group ID take care by DXIL operation overload // check. } else if (ShaderType == DXIL::ShaderKind::Domain) { - const auto &DS = props.ShaderProps.DS; - DXIL::TessellatorDomain domain = DS.domain; - if (domain >= DXIL::TessellatorDomain::LastEntry) - domain = DXIL::TessellatorDomain::Undefined; - unsigned inputControlPointCount = DS.inputControlPoints; + const auto &DS = Props.ShaderProps.DS; + DXIL::TessellatorDomain Domain = DS.domain; + if (Domain >= DXIL::TessellatorDomain::LastEntry) + Domain = DXIL::TessellatorDomain::Undefined; + unsigned InputControlPointCount = DS.inputControlPoints; - if (inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { + if (InputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { ValCtx.EmitFnFormatError( F, ValidationRule::SmDSInputControlPointCountRange, {std::to_string(DXIL::kMaxIAPatchControlPointCount), - std::to_string(inputControlPointCount)}); + std::to_string(InputControlPointCount)}); } - if (domain == DXIL::TessellatorDomain::Undefined) { + if (Domain == DXIL::TessellatorDomain::Undefined) { ValCtx.EmitFnError(F, ValidationRule::SmValidDomain); } - CheckPatchConstantSemantic(ValCtx, entryProps, Status, F); + CheckPatchConstantSemantic(ValCtx, EntryProps, Status, F); } else if (ShaderType == DXIL::ShaderKind::Hull) { - const auto &HS = props.ShaderProps.HS; - DXIL::TessellatorDomain domain = HS.domain; - if (domain >= DXIL::TessellatorDomain::LastEntry) - domain = DXIL::TessellatorDomain::Undefined; - unsigned inputControlPointCount = HS.inputControlPoints; - if (inputControlPointCount == 0) { - const DxilSignature &inputSig = entryProps.sig.InputSignature; - if (!inputSig.GetElements().empty()) { + const auto &HS = Props.ShaderProps.HS; + DXIL::TessellatorDomain Domain = HS.domain; + if (Domain >= DXIL::TessellatorDomain::LastEntry) + Domain = DXIL::TessellatorDomain::Undefined; + unsigned InputControlPointCount = HS.inputControlPoints; + if (InputControlPointCount == 0) { + const DxilSignature &InputSig = EntryProps.sig.InputSignature; + if (!InputSig.GetElements().empty()) { ValCtx.EmitFnError(F, ValidationRule::SmZeroHSInputControlPointWithInput); } - } else if (inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { + } else if (InputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { ValCtx.EmitFnFormatError( F, ValidationRule::SmHSInputControlPointCountRange, {std::to_string(DXIL::kMaxIAPatchControlPointCount), - std::to_string(inputControlPointCount)}); + std::to_string(InputControlPointCount)}); } - unsigned outputControlPointCount = HS.outputControlPoints; - if (outputControlPointCount < DXIL::kMinIAPatchControlPointCount || - outputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { + unsigned OutputControlPointCount = HS.outputControlPoints; + if (OutputControlPointCount < DXIL::kMinIAPatchControlPointCount || + OutputControlPointCount > DXIL::kMaxIAPatchControlPointCount) { ValCtx.EmitFnFormatError( F, ValidationRule::SmOutputControlPointCountRange, {std::to_string(DXIL::kMinIAPatchControlPointCount), std::to_string(DXIL::kMaxIAPatchControlPointCount), - std::to_string(outputControlPointCount)}); + std::to_string(OutputControlPointCount)}); } - if (domain == DXIL::TessellatorDomain::Undefined) { + if (Domain == DXIL::TessellatorDomain::Undefined) { ValCtx.EmitFnError(F, ValidationRule::SmValidDomain); } - DXIL::TessellatorPartitioning partition = HS.partition; - if (partition == DXIL::TessellatorPartitioning::Undefined) { + DXIL::TessellatorPartitioning Partition = HS.partition; + if (Partition == DXIL::TessellatorPartitioning::Undefined) { ValCtx.EmitFnError(F, ValidationRule::MetaTessellatorPartition); } - DXIL::TessellatorOutputPrimitive tessOutputPrimitive = HS.outputPrimitive; - if (tessOutputPrimitive == DXIL::TessellatorOutputPrimitive::Undefined || - tessOutputPrimitive == DXIL::TessellatorOutputPrimitive::LastEntry) { + DXIL::TessellatorOutputPrimitive TessOutputPrimitive = HS.outputPrimitive; + if (TessOutputPrimitive == DXIL::TessellatorOutputPrimitive::Undefined || + TessOutputPrimitive == DXIL::TessellatorOutputPrimitive::LastEntry) { ValCtx.EmitFnError(F, ValidationRule::MetaTessellatorOutputPrimitive); } - float maxTessFactor = HS.maxTessFactor; - if (maxTessFactor < DXIL::kHSMaxTessFactorLowerBound || - maxTessFactor > DXIL::kHSMaxTessFactorUpperBound) { + float MaxTessFactor = HS.maxTessFactor; + if (MaxTessFactor < DXIL::kHSMaxTessFactorLowerBound || + MaxTessFactor > DXIL::kHSMaxTessFactorUpperBound) { ValCtx.EmitFnFormatError( F, ValidationRule::MetaMaxTessFactor, {std::to_string(DXIL::kHSMaxTessFactorLowerBound), std::to_string(DXIL::kHSMaxTessFactorUpperBound), - std::to_string(maxTessFactor)}); + std::to_string(MaxTessFactor)}); } // Domain and OutPrimivtive match. - switch (domain) { + switch (Domain) { case DXIL::TessellatorDomain::IsoLine: - switch (tessOutputPrimitive) { + switch (TessOutputPrimitive) { case DXIL::TessellatorOutputPrimitive::TriangleCW: case DXIL::TessellatorOutputPrimitive::TriangleCCW: ValCtx.EmitFnError(F, ValidationRule::SmIsoLineOutputPrimitiveMismatch); @@ -5489,7 +5489,7 @@ static void ValidateEntryProps(ValidationContext &ValCtx, } break; case DXIL::TessellatorDomain::Tri: - switch (tessOutputPrimitive) { + switch (TessOutputPrimitive) { case DXIL::TessellatorOutputPrimitive::Line: ValCtx.EmitFnError(F, ValidationRule::SmTriOutputPrimitiveMismatch); break; @@ -5498,7 +5498,7 @@ static void ValidateEntryProps(ValidationContext &ValCtx, } break; case DXIL::TessellatorDomain::Quad: - switch (tessOutputPrimitive) { + switch (TessOutputPrimitive) { case DXIL::TessellatorOutputPrimitive::Line: ValCtx.EmitFnError(F, ValidationRule::SmTriOutputPrimitiveMismatch); break; @@ -5511,39 +5511,39 @@ static void ValidateEntryProps(ValidationContext &ValCtx, break; } - CheckPatchConstantSemantic(ValCtx, entryProps, Status, F); + CheckPatchConstantSemantic(ValCtx, EntryProps, Status, F); } else if (ShaderType == DXIL::ShaderKind::Geometry) { - const auto &GS = props.ShaderProps.GS; - unsigned maxVertexCount = GS.maxVertexCount; - if (maxVertexCount > DXIL::kMaxGSOutputVertexCount) { + const auto &GS = Props.ShaderProps.GS; + unsigned MaxVertexCount = GS.maxVertexCount; + if (MaxVertexCount > DXIL::kMaxGSOutputVertexCount) { ValCtx.EmitFnFormatError(F, ValidationRule::SmGSOutputVertexCountRange, {std::to_string(DXIL::kMaxGSOutputVertexCount), - std::to_string(maxVertexCount)}); + std::to_string(MaxVertexCount)}); } - unsigned instanceCount = GS.instanceCount; - if (instanceCount > DXIL::kMaxGSInstanceCount || instanceCount < 1) { + unsigned InstanceCount = GS.instanceCount; + if (InstanceCount > DXIL::kMaxGSInstanceCount || InstanceCount < 1) { ValCtx.EmitFnFormatError(F, ValidationRule::SmGSInstanceCountRange, {std::to_string(DXIL::kMaxGSInstanceCount), - std::to_string(instanceCount)}); + std::to_string(InstanceCount)}); } - DXIL::PrimitiveTopology topo = DXIL::PrimitiveTopology::Undefined; - bool bTopoMismatch = false; - for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) { - if (GS.streamPrimitiveTopologies[i] != + DXIL::PrimitiveTopology Topo = DXIL::PrimitiveTopology::Undefined; + bool TopoMismatch = false; + for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) { + if (GS.streamPrimitiveTopologies[I] != DXIL::PrimitiveTopology::Undefined) { - if (topo == DXIL::PrimitiveTopology::Undefined) - topo = GS.streamPrimitiveTopologies[i]; - else if (topo != GS.streamPrimitiveTopologies[i]) { - bTopoMismatch = true; + if (Topo == DXIL::PrimitiveTopology::Undefined) + Topo = GS.streamPrimitiveTopologies[I]; + else if (Topo != GS.streamPrimitiveTopologies[I]) { + TopoMismatch = true; break; } } } - if (bTopoMismatch) - topo = DXIL::PrimitiveTopology::Undefined; - switch (topo) { + if (TopoMismatch) + Topo = DXIL::PrimitiveTopology::Undefined; + switch (Topo) { case DXIL::PrimitiveTopology::PointList: case DXIL::PrimitiveTopology::LineStrip: case DXIL::PrimitiveTopology::TriangleStrip: @@ -5553,9 +5553,9 @@ static void ValidateEntryProps(ValidationContext &ValCtx, } break; } - DXIL::InputPrimitive inputPrimitive = GS.inputPrimitive; - unsigned VertexCount = GetNumVertices(inputPrimitive); - if (VertexCount == 0 && inputPrimitive != DXIL::InputPrimitive::Undefined) { + DXIL::InputPrimitive InputPrimitive = GS.inputPrimitive; + unsigned VertexCount = GetNumVertices(InputPrimitive); + if (VertexCount == 0 && InputPrimitive != DXIL::InputPrimitive::Undefined) { ValCtx.EmitFnError(F, ValidationRule::SmGSValidInputPrimitive); } } @@ -5566,10 +5566,10 @@ static void ValidateShaderState(ValidationContext &ValCtx) { if (ValCtx.isLibProfile) { for (Function &F : DM.GetModule()->functions()) { if (DM.HasDxilEntryProps(&F)) { - DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F); EntryStatus &Status = ValCtx.GetEntryStatus(&F); - ValidateEntryProps(ValCtx, entryProps, Status, &F); - ValidatePassThruHS(ValCtx, entryProps, &F); + ValidateEntryProps(ValCtx, EntryProps, Status, &F); + ValidatePassThruHS(ValCtx, EntryProps, &F); } } } else { @@ -5580,33 +5580,33 @@ static void ValidateShaderState(ValidationContext &ValCtx) { return; } EntryStatus &Status = ValCtx.GetEntryStatus(Entry); - DxilEntryProps &entryProps = DM.GetDxilEntryProps(Entry); - ValidateEntryProps(ValCtx, entryProps, Status, Entry); - ValidatePassThruHS(ValCtx, entryProps, Entry); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(Entry); + ValidateEntryProps(ValCtx, EntryProps, Status, Entry); + ValidatePassThruHS(ValCtx, EntryProps, Entry); } } static CallGraphNode * -CalculateCallDepth(CallGraphNode *node, - std::unordered_map &depthMap, - std::unordered_set &callStack, - std::unordered_set &funcSet) { - unsigned depth = callStack.size(); - funcSet.insert(node->getFunction()); - for (auto it = node->begin(), ei = node->end(); it != ei; it++) { - CallGraphNode *toNode = it->second; - if (callStack.insert(toNode).second == false) { +CalculateCallDepth(CallGraphNode *Node, + std::unordered_map &DepthMap, + std::unordered_set &CallStack, + std::unordered_set &FuncSet) { + unsigned Depth = CallStack.size(); + FuncSet.insert(Node->getFunction()); + for (auto It = Node->begin(), EIt = Node->end(); It != EIt; It++) { + CallGraphNode *ToNode = It->second; + if (CallStack.insert(ToNode).second == false) { // Recursive. - return toNode; + return ToNode; } - if (depthMap[toNode] < depth) - depthMap[toNode] = depth; + if (DepthMap[ToNode] < Depth) + DepthMap[ToNode] = Depth; if (CallGraphNode *N = - CalculateCallDepth(toNode, depthMap, callStack, funcSet)) { + CalculateCallDepth(ToNode, DepthMap, CallStack, FuncSet)) { // Recursive return N; } - callStack.erase(toNode); + CallStack.erase(ToNode); } return nullptr; @@ -5616,29 +5616,29 @@ static void ValidateCallGraph(ValidationContext &ValCtx) { // Build CallGraph. CallGraph &CG = ValCtx.GetCallGraph(); - std::unordered_map depthMap; - std::unordered_set callStack; - CallGraphNode *entryNode = CG[ValCtx.DxilMod.GetEntryFunction()]; - depthMap[entryNode] = 0; - if (CallGraphNode *N = CalculateCallDepth(entryNode, depthMap, callStack, + std::unordered_map DepthMap; + std::unordered_set CallStack; + CallGraphNode *EntryNode = CG[ValCtx.DxilMod.GetEntryFunction()]; + DepthMap[EntryNode] = 0; + if (CallGraphNode *N = CalculateCallDepth(EntryNode, DepthMap, CallStack, ValCtx.entryFuncCallSet)) ValCtx.EmitFnError(N->getFunction(), ValidationRule::FlowNoRecursion); if (ValCtx.DxilMod.GetShaderModel()->IsHS()) { - CallGraphNode *patchConstantNode = + CallGraphNode *PatchConstantNode = CG[ValCtx.DxilMod.GetPatchConstantFunction()]; - depthMap[patchConstantNode] = 0; - callStack.clear(); + DepthMap[PatchConstantNode] = 0; + CallStack.clear(); if (CallGraphNode *N = - CalculateCallDepth(patchConstantNode, depthMap, callStack, + CalculateCallDepth(PatchConstantNode, DepthMap, CallStack, ValCtx.patchConstFuncCallSet)) ValCtx.EmitFnError(N->getFunction(), ValidationRule::FlowNoRecursion); } } static void ValidateFlowControl(ValidationContext &ValCtx) { - bool reducible = + bool Reducible = IsReducible(*ValCtx.DxilMod.GetModule(), IrreducibilityAction::Ignore); - if (!reducible) { + if (!Reducible) { ValCtx.EmitError(ValidationRule::FlowReducible); return; } @@ -5653,28 +5653,28 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { DominatorTree DT = DTA.run(F); LoopInfo LI; LI.Analyze(DT); - for (auto loopIt = LI.begin(); loopIt != LI.end(); loopIt++) { - Loop *loop = *loopIt; - SmallVector exitBlocks; - loop->getExitBlocks(exitBlocks); - if (exitBlocks.empty()) + for (auto LoopIt = LI.begin(); LoopIt != LI.end(); LoopIt++) { + Loop *Loop = *LoopIt; + SmallVector ExitBlocks; + Loop->getExitBlocks(ExitBlocks); + if (ExitBlocks.empty()) ValCtx.EmitFnError(&F, ValidationRule::FlowDeadLoop); } // validate that there is no use of a value that has been output-completed // for this function. - hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP(); + hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP(); - for (auto &it : hlslOP->GetOpFuncList(DXIL::OpCode::OutputComplete)) { - Function *pF = it.second; + for (auto &It : HlslOP->GetOpFuncList(DXIL::OpCode::OutputComplete)) { + Function *pF = It.second; if (!pF) continue; // first, collect all the output complete calls that are not dominated // by another OutputComplete call for the same handle value llvm::SmallMapVector, 4> - handleToCI; + HandleToCI; for (User *U : pF->users()) { // all OutputComplete calls are instructions, and call instructions, // so there shouldn't need to be a null check. @@ -5686,33 +5686,33 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { continue; DxilInst_OutputComplete OutputComplete(CI); - Value *completedRecord = OutputComplete.get_output(); + Value *CompletedRecord = OutputComplete.get_output(); - auto vIt = handleToCI.find(completedRecord); - if (vIt == handleToCI.end()) { + auto vIt = HandleToCI.find(CompletedRecord); + if (vIt == HandleToCI.end()) { llvm::SmallPtrSet s; s.insert(CI); - handleToCI.insert(std::make_pair(completedRecord, s)); + HandleToCI.insert(std::make_pair(CompletedRecord, s)); } else { // if the handle is already in the map, make sure the map's set of // output complete calls that dominate the handle and do not dominate // each other gets updated if necessary bool CI_is_dominated = false; - for (auto ocIt = vIt->second.begin(); ocIt != vIt->second.end();) { + for (auto OcIt = vIt->second.begin(); OcIt != vIt->second.end();) { // if our new OC CI dominates an OC instruction in the set, // then replace the instruction in the set with the new OC CI. - if (DT.dominates(CI, *ocIt)) { - auto cur_it = ocIt++; + if (DT.dominates(CI, *OcIt)) { + auto cur_it = OcIt++; vIt->second.erase(*cur_it); continue; } // Remember if our new CI gets dominated by any CI in the set. - if (DT.dominates(*ocIt, CI)) { + if (DT.dominates(*OcIt, CI)) { CI_is_dominated = true; break; } - ocIt++; + OcIt++; } // if no CI in the set dominates our new CI, // the new CI should be added to the set @@ -5721,14 +5721,14 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { } } - for (auto handle_iter = handleToCI.begin(), e = handleToCI.end(); + for (auto handle_iter = HandleToCI.begin(), e = HandleToCI.end(); handle_iter != e; handle_iter++) { for (auto user_itr = handle_iter->first->user_begin(); user_itr != handle_iter->first->user_end(); user_itr++) { User *pU = *user_itr; - Instruction *useInstr = cast(pU); - if (useInstr) { - if (CallInst *CI = dyn_cast(useInstr)) { + Instruction *UseInstr = cast(pU); + if (UseInstr) { + if (CallInst *CI = dyn_cast(UseInstr)) { // if the user is an output complete call that is in the set of // OutputComplete calls not dominated by another OutputComplete // call for the same handle value, no diagnostics need to be @@ -5739,15 +5739,15 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { // make sure any output complete call in the set // that dominates this use gets its diagnostic emitted. - for (auto ocIt = handle_iter->second.begin(); - ocIt != handle_iter->second.end(); ocIt++) { - Instruction *ocInstr = cast(*ocIt); - if (DT.dominates(ocInstr, useInstr)) { + for (auto OcIt = handle_iter->second.begin(); + OcIt != handle_iter->second.end(); OcIt++) { + Instruction *OcInstr = cast(*OcIt); + if (DT.dominates(OcInstr, UseInstr)) { ValCtx.EmitInstrError( - useInstr, + UseInstr, ValidationRule::InstrNodeRecordHandleUseAfterComplete); ValCtx.EmitInstrNote( - *ocIt, "record handle invalidated by OutputComplete"); + *OcIt, "record handle invalidated by OutputComplete"); break; } } @@ -5763,57 +5763,57 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { static void ValidateUninitializedOutput(ValidationContext &ValCtx, Function *F) { DxilModule &DM = ValCtx.DxilMod; - DxilEntryProps &entryProps = DM.GetDxilEntryProps(F); + DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F); EntryStatus &Status = ValCtx.GetEntryStatus(F); - const DxilFunctionProps &props = entryProps.props; + const DxilFunctionProps &Props = EntryProps.props; // For HS only need to check Tessfactor which is in patch constant sig. - if (props.IsHS()) { - std::vector &patchConstOrPrimCols = Status.patchConstOrPrimCols; - const DxilSignature &patchConstSig = - entryProps.sig.PatchConstOrPrimSignature; - for (auto &E : patchConstSig.GetElements()) { - unsigned mask = patchConstOrPrimCols[E->GetID()]; - unsigned requireMask = (1 << E->GetCols()) - 1; + if (Props.IsHS()) { + std::vector &PatchConstOrPrimCols = Status.patchConstOrPrimCols; + const DxilSignature &PatchConstSig = + EntryProps.sig.PatchConstOrPrimSignature; + for (auto &E : PatchConstSig.GetElements()) { + unsigned Mask = PatchConstOrPrimCols[E->GetID()]; + unsigned RequireMask = (1 << E->GetCols()) - 1; // TODO: check other case uninitialized output is allowed. - if (mask != requireMask && !E->GetSemantic()->IsArbitrary()) { + if (Mask != RequireMask && !E->GetSemantic()->IsArbitrary()) { ValCtx.EmitFnFormatError(F, ValidationRule::SmUndefinedOutput, {E->GetName()}); } } return; } - const DxilSignature &outSig = entryProps.sig.OutputSignature; - std::vector &outputCols = Status.outputCols; - for (auto &E : outSig.GetElements()) { - unsigned mask = outputCols[E->GetID()]; - unsigned requireMask = (1 << E->GetCols()) - 1; + const DxilSignature &OutSig = EntryProps.sig.OutputSignature; + std::vector &OutputCols = Status.outputCols; + for (auto &E : OutSig.GetElements()) { + unsigned Mask = OutputCols[E->GetID()]; + unsigned RequireMask = (1 << E->GetCols()) - 1; // TODO: check other case uninitialized output is allowed. - if (mask != requireMask && !E->GetSemantic()->IsArbitrary() && + if (Mask != RequireMask && !E->GetSemantic()->IsArbitrary() && E->GetSemantic()->GetKind() != Semantic::Kind::Target) { ValCtx.EmitFnFormatError(F, ValidationRule::SmUndefinedOutput, {E->GetName()}); } } - if (!props.IsGS()) { - unsigned posMask = Status.OutputPositionMask[0]; - if (posMask != 0xf && Status.hasOutputPosition[0]) { + if (!Props.IsGS()) { + unsigned PosMask = Status.OutputPositionMask[0]; + if (PosMask != 0xf && Status.hasOutputPosition[0]) { ValCtx.EmitFnError(F, ValidationRule::SmCompletePosition); } } else { - const auto &GS = props.ShaderProps.GS; - unsigned streamMask = 0; - for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) { - if (GS.streamPrimitiveTopologies[i] != + const auto &GS = Props.ShaderProps.GS; + unsigned StreamMask = 0; + for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) { + if (GS.streamPrimitiveTopologies[I] != DXIL::PrimitiveTopology::Undefined) { - streamMask |= 1 << i; + StreamMask |= 1 << I; } } - for (unsigned i = 0; i < DXIL::kNumOutputStreams; i++) { - if (streamMask & (1 << i)) { - unsigned posMask = Status.OutputPositionMask[i]; - if (posMask != 0xf && Status.hasOutputPosition[i]) { + for (unsigned I = 0; I < DXIL::kNumOutputStreams; I++) { + if (StreamMask & (1 << I)) { + unsigned PosMask = Status.OutputPositionMask[I]; + if (PosMask != 0xf && Status.hasOutputPosition[I]) { ValCtx.EmitFnError(F, ValidationRule::SmCompletePosition); } } From 0ffd60accba540b0127e727f68b61b8075d6130a Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Fri, 4 Apr 2025 13:10:28 -0700 Subject: [PATCH 71/88] [SM6.9] Native vector load/store lowering (#7292) Enables the declaration of long vector types for raw buffers, the lowering of those and traditional vectors in loads and stores maintaining the native types with new dxil ops along with validation and testing support of the same. Allow declaring long vector rawbuffer resources. Previously disallowed along with other global types, this provides a mechanism for indicating which buffers are raw and allowing them to contain long vectors, continuing to produce an error for other resource types verified by existing tests Introduce native vector DXIL load/store intrinsics. Add new raw buffer vector load/store intrinsics using the new vector overload types. Include them in validation associated with similar load/stores Lower native vector raw buffers load/stores into new ops. When the loaded/stored type is a vector of more than 1 element, the shader model is 6.9 or higher, and the operation is on a raw buffer, enable the generation of a native vector raw buffer load or store. Incidental removal of unused parameter in load translation and some refactoring of the lowering to flow better with the new resret types. add validation and compute shader tests Vector to scalar raw buffer load lowering pass Native vector loads and stores are generated for 6.9 targets and above. This includes the 6.x target used when compiling to libraries. This adds a pass run when linking that will lower the vector operations to scalar operations for shader models that don't have native vector support. This allows libraries compiled for supportive shader models to be linked to targets without support. Validate native vector loads and stores for properly defined parameters of the correct type. Add tests for both vector load/stores and the original scalar load/stores since they share a lot of validation code. Fixes #7118 --- include/dxc/DXIL/DxilConstants.h | 28 +- include/dxc/DXIL/DxilInstructions.h | 93 +++ include/dxc/HLSL/DxilGenerationPass.h | 2 + lib/DXIL/DxilOperations.cpp | 46 +- lib/DxilValidation/DxilValidation.cpp | 87 ++- lib/HLSL/CMakeLists.txt | 1 + lib/HLSL/DxilLinker.cpp | 4 + lib/HLSL/DxilScalarizeVectorLoadStores.cpp | 231 ++++++ lib/HLSL/HLOperationLower.cpp | 72 +- tools/clang/lib/Sema/SemaHLSL.cpp | 2 +- .../intrinsics/buffer-load-stores-sm69.hlsl | 91 +++ .../hlsl/types/longvec-operators-cs.hlsl | 719 ++++++++++++++++++ .../types/longvec-operators-vec1s-cs.hlsl | 680 +++++++++++++++++ .../hlsl/types/longvec-operators-vec1s.hlsl | 62 +- .../hlsl/types/longvec-operators.hlsl | 18 - .../longvec-load-stores-scalarizevecldst.ll | 478 ++++++++++++ .../DXILValidation/load-store-validation.hlsl | 74 ++ .../DXILValidation/vector-validation.hlsl | 14 + .../load-store-validation.ll | 229 ++++++ .../LitDXILValidation/vector-validation.ll | 78 ++ .../hlsl/types/invalid-longvecs-sm68.hlsl | 2 + tools/clang/unittests/HLSL/ValidationTest.cpp | 26 +- utils/hct/hctdb.py | 96 ++- 23 files changed, 2991 insertions(+), 142 deletions(-) create mode 100644 lib/HLSL/DxilScalarizeVectorLoadStores.cpp create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll create mode 100644 tools/clang/test/DXILValidation/load-store-validation.hlsl create mode 100644 tools/clang/test/DXILValidation/vector-validation.hlsl create mode 100644 tools/clang/test/LitDXILValidation/load-store-validation.ll create mode 100644 tools/clang/test/LitDXILValidation/vector-validation.ll diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 447728300b..4f8c521851 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -898,8 +898,11 @@ enum class OpCode : unsigned { GetDimensions = 72, // gets texture size information RawBufferLoad = 139, // reads from a raw buffer and structured buffer RawBufferStore = 140, // writes to a RWByteAddressBuffer or RWStructuredBuffer - TextureLoad = 66, // reads texel data without any filtering or sampling - TextureStore = 67, // reads texel data without any filtering or sampling + RawBufferVectorLoad = 303, // reads from a raw buffer and structured buffer + RawBufferVectorStore = + 304, // writes to a RWByteAddressBuffer or RWStructuredBuffer + TextureLoad = 66, // reads texel data without any filtering or sampling + TextureStore = 67, // reads texel data without any filtering or sampling TextureStoreSample = 225, // stores texel data at specified sample index // Sampler Feedback @@ -1044,7 +1047,7 @@ enum class OpCode : unsigned { NumOpCodes_Dxil_1_7 = 226, NumOpCodes_Dxil_1_8 = 258, - NumOpCodes = 303 // exclusive last value of enumeration + NumOpCodes = 305 // exclusive last value of enumeration }; // OPCODE-ENUM:END @@ -1278,6 +1281,8 @@ enum class OpCodeClass : unsigned { GetDimensions, RawBufferLoad, RawBufferStore, + RawBufferVectorLoad, + RawBufferVectorStore, TextureLoad, TextureStore, TextureStoreSample, @@ -1356,7 +1361,7 @@ enum class OpCodeClass : unsigned { NumOpClasses_Dxil_1_7 = 153, NumOpClasses_Dxil_1_8 = 174, - NumOpClasses = 177 // exclusive last value of enumeration + NumOpClasses = 179 // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END @@ -1415,6 +1420,12 @@ const unsigned kRawBufferLoadElementOffsetOpIdx = 3; const unsigned kRawBufferLoadMaskOpIdx = 4; const unsigned kRawBufferLoadAlignmentOpIdx = 5; +// RawBufferVectorLoad. +const unsigned kRawBufferVectorLoadHandleOpIdx = 1; +const unsigned kRawBufferVectorLoadIndexOpIdx = 2; +const unsigned kRawBufferVectorLoadElementOffsetOpIdx = 3; +const unsigned kRawBufferVectorLoadAlignmentOpIdx = 4; + // RawBufferStore const unsigned kRawBufferStoreHandleOpIdx = 1; const unsigned kRawBufferStoreIndexOpIdx = 2; @@ -1424,7 +1435,14 @@ const unsigned kRawBufferStoreVal1OpIdx = 5; const unsigned kRawBufferStoreVal2OpIdx = 6; const unsigned kRawBufferStoreVal3OpIdx = 7; const unsigned kRawBufferStoreMaskOpIdx = 8; -const unsigned kRawBufferStoreAlignmentOpIdx = 8; +const unsigned kRawBufferStoreAlignmentOpIdx = 9; + +// RawBufferVectorStore +const unsigned kRawBufferVectorStoreHandleOpIdx = 1; +const unsigned kRawBufferVectorStoreIndexOpIdx = 2; +const unsigned kRawBufferVectorStoreElementOffsetOpIdx = 3; +const unsigned kRawBufferVectorStoreValOpIdx = 4; +const unsigned kRawBufferVectorStoreAlignmentOpIdx = 5; // TextureStore. const unsigned kTextureStoreHandleOpIdx = 1; diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index f8d9ae77f3..6ee22869a5 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -8923,5 +8923,98 @@ struct DxilInst_HitObject_MakeNop { // Metadata bool requiresUniformInputs() const { return false; } }; + +/// This instruction reads from a raw buffer and structured buffer +struct DxilInst_RawBufferVectorLoad { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_RawBufferVectorLoad(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::RawBufferVectorLoad); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (5 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_buf = 1, + arg_index = 2, + arg_elementOffset = 3, + arg_alignment = 4, + }; + // Accessors + llvm::Value *get_buf() const { return Instr->getOperand(1); } + void set_buf(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_index() const { return Instr->getOperand(2); } + void set_index(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_elementOffset() const { return Instr->getOperand(3); } + void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_alignment() const { return Instr->getOperand(4); } + void set_alignment(llvm::Value *val) { Instr->setOperand(4, val); } + int32_t get_alignment_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(4)) + ->getZExtValue()); + } + void set_alignment_val(int32_t val) { + Instr->setOperand(4, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction writes to a RWByteAddressBuffer or RWStructuredBuffer +struct DxilInst_RawBufferVectorStore { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_RawBufferVectorStore(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::RawBufferVectorStore); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (6 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_uav = 1, + arg_index = 2, + arg_elementOffset = 3, + arg_value0 = 4, + arg_alignment = 5, + }; + // Accessors + llvm::Value *get_uav() const { return Instr->getOperand(1); } + void set_uav(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_index() const { return Instr->getOperand(2); } + void set_index(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_elementOffset() const { return Instr->getOperand(3); } + void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_value0() const { return Instr->getOperand(4); } + void set_value0(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_alignment() const { return Instr->getOperand(5); } + void set_alignment(llvm::Value *val) { Instr->setOperand(5, val); } + int32_t get_alignment_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(5)) + ->getZExtValue()); + } + void set_alignment_val(int32_t val) { + Instr->setOperand(5, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; // INSTR-HELPER:END } // namespace hlsl diff --git a/include/dxc/HLSL/DxilGenerationPass.h b/include/dxc/HLSL/DxilGenerationPass.h index c77ddab3d0..9df93e9232 100644 --- a/include/dxc/HLSL/DxilGenerationPass.h +++ b/include/dxc/HLSL/DxilGenerationPass.h @@ -81,6 +81,7 @@ ModulePass *createResumePassesPass(); FunctionPass *createMatrixBitcastLowerPass(); ModulePass *createDxilCleanupAddrSpaceCastPass(); ModulePass *createDxilRenameResourcesPass(); +ModulePass *createDxilScalarizeVectorLoadStoresPass(); void initializeDxilLowerCreateHandleForLibPass(llvm::PassRegistry &); void initializeDxilAllocateResourcesForLibPass(llvm::PassRegistry &); @@ -115,6 +116,7 @@ void initializeResumePassesPass(llvm::PassRegistry &); void initializeMatrixBitcastLowerPassPass(llvm::PassRegistry &); void initializeDxilCleanupAddrSpaceCastPass(llvm::PassRegistry &); void initializeDxilRenameResourcesPass(llvm::PassRegistry &); +void initializeDxilScalarizeVectorLoadStoresPass(llvm::PassRegistry &); ModulePass *createDxilValidateWaveSensitivityPass(); void initializeDxilValidateWaveSensitivityPass(llvm::PassRegistry &); diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 56cdd0d04f..0b4c7218d4 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2633,6 +2633,24 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { 0, {}, {}}, // Overloads: v + + // Resources + {OC::RawBufferVectorLoad, + "RawBufferVectorLoad", + OCC::RawBufferVectorLoad, + "rawBufferVectorLoad", + Attribute::ReadOnly, + 1, + {{0x4e7}}, + {{0xe7}}}, // Overloads: hfwidlgetNumParams() <= 4) return nullptr; return FT->getParamType(4); @@ -6134,7 +6173,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::TextureGatherRaw: case OpCode::SampleCmpLevel: case OpCode::SampleCmpGrad: - case OpCode::SampleCmpBias: { + case OpCode::SampleCmpBias: + case OpCode::RawBufferVectorLoad: { StructType *ST = cast(Ty); return ST->getElementType(0); } diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 97bde6ca24..a788f21d4e 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -1475,34 +1475,35 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode, } } } break; - case DXIL::OpCode::RawBufferLoad: { + case DXIL::OpCode::RawBufferLoad: if (!ValCtx.DxilMod.GetShaderModel()->IsSM63Plus()) { Type *Ty = OP::GetOverloadType(DXIL::OpCode::RawBufferLoad, CI->getCalledFunction()); - if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) { + if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore); - } } - DxilInst_RawBufferLoad BufLd(CI); + LLVM_FALLTHROUGH; + case DXIL::OpCode::RawBufferVectorLoad: { + Value *Handle = + CI->getOperand(DXIL::OperandIndex::kRawBufferLoadHandleOpIdx); DXIL::ComponentType CompTy; DXIL::ResourceClass ResClass; DXIL::ResourceKind ResKind = - GetResourceKindAndCompTy(BufLd.get_srv(), CompTy, ResClass, ValCtx); + GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx); if (ResClass != DXIL::ResourceClass::SRV && - ResClass != DXIL::ResourceClass::UAV) { + ResClass != DXIL::ResourceClass::UAV) + ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad); - } - Value *Offset = BufLd.get_elementOffset(); - Value *Align = BufLd.get_alignment(); - unsigned AlignSize = 0; - if (!isa(Align)) { - ValCtx.EmitInstrError(CI, - ValidationRule::InstrCoordinateCountForRawTypedBuf); - } else { - AlignSize = BufLd.get_alignment_val(); - } + unsigned AlignIdx = DXIL::OperandIndex::kRawBufferLoadAlignmentOpIdx; + if (DXIL::OpCode::RawBufferVectorLoad == Opcode) + AlignIdx = DXIL::OperandIndex::kRawBufferVectorLoadAlignmentOpIdx; + if (!isa(CI->getOperand(AlignIdx))) + ValCtx.EmitInstrError(CI, ValidationRule::InstrConstAlignForRawBuf); + + Value *Offset = + CI->getOperand(DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx); switch (ResKind) { case DXIL::ResourceKind::RawBuffer: if (!isa(Offset)) { @@ -1526,38 +1527,44 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode, if (!ValCtx.DxilMod.GetShaderModel()->IsSM63Plus()) { Type *Ty = OP::GetOverloadType(DXIL::OpCode::RawBufferStore, CI->getCalledFunction()); - if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) { + if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore); - } } - DxilInst_RawBufferStore BufSt(CI); - DXIL::ComponentType CompTy; - DXIL::ResourceClass ResClass; - DXIL::ResourceKind ResKind = - GetResourceKindAndCompTy(BufSt.get_uav(), CompTy, ResClass, ValCtx); - - if (ResClass != DXIL::ResourceClass::UAV) { - ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore); - } - - ConstantInt *Mask = dyn_cast(BufSt.get_mask()); + DxilInst_RawBufferStore bufSt(CI); + ConstantInt *Mask = dyn_cast(bufSt.get_mask()); unsigned StValMask = - StoreValueToMask({BufSt.get_value0(), BufSt.get_value1(), - BufSt.get_value2(), BufSt.get_value3()}); + StoreValueToMask({bufSt.get_value0(), bufSt.get_value1(), + bufSt.get_value2(), bufSt.get_value3()}); if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, false /*IsTyped*/, ValCtx)) return; + } + LLVM_FALLTHROUGH; + case DXIL::OpCode::RawBufferVectorStore: { + Value *Handle = + CI->getOperand(DXIL::OperandIndex::kRawBufferStoreHandleOpIdx); + DXIL::ComponentType CompTy; + DXIL::ResourceClass ResClass; + DXIL::ResourceKind ResKind = + GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx); - Value *Offset = BufSt.get_elementOffset(); - Value *Align = BufSt.get_alignment(); - unsigned AlignSize = 0; - if (!isa(Align)) { - ValCtx.EmitInstrError(CI, - ValidationRule::InstrCoordinateCountForRawTypedBuf); - } else { - AlignSize = BufSt.get_alignment_val(); + if (ResClass != DXIL::ResourceClass::UAV) + ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore); + + unsigned AlignIdx = DXIL::OperandIndex::kRawBufferStoreAlignmentOpIdx; + if (DXIL::OpCode::RawBufferVectorStore == Opcode) { + AlignIdx = DXIL::OperandIndex::kRawBufferVectorStoreAlignmentOpIdx; + unsigned ValueIx = DXIL::OperandIndex::kRawBufferVectorStoreValOpIdx; + if (isa(CI->getOperand(ValueIx))) + ValCtx.EmitInstrError(CI, + ValidationRule::InstrUndefinedValueForUAVStore); } + if (!isa(CI->getOperand(AlignIdx))) + ValCtx.EmitInstrError(CI, ValidationRule::InstrConstAlignForRawBuf); + + Value *Offset = + CI->getOperand(DXIL::OperandIndex::kRawBufferStoreElementOffsetOpIdx); switch (ResKind) { case DXIL::ResourceKind::RawBuffer: if (!isa(Offset)) { @@ -1684,6 +1691,8 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, case DXIL::OpCode::CBufferLoadLegacy: case DXIL::OpCode::RawBufferLoad: case DXIL::OpCode::RawBufferStore: + case DXIL::OpCode::RawBufferVectorLoad: + case DXIL::OpCode::RawBufferVectorStore: ValidateResourceDxilOp(CI, Opcode, ValCtx); break; // Input output. diff --git a/lib/HLSL/CMakeLists.txt b/lib/HLSL/CMakeLists.txt index 947fc4c14f..21bb9523a7 100644 --- a/lib/HLSL/CMakeLists.txt +++ b/lib/HLSL/CMakeLists.txt @@ -25,6 +25,7 @@ add_llvm_library(LLVMHLSL DxilNoops.cpp DxilPreserveAllOutputs.cpp DxilRenameResourcesPass.cpp + DxilScalarizeVectorLoadStores.cpp DxilSimpleGVNHoist.cpp DxilSignatureValidation.cpp DxilTargetLowering.cpp diff --git a/lib/HLSL/DxilLinker.cpp b/lib/HLSL/DxilLinker.cpp index ca343662ab..75d1bf78e9 100644 --- a/lib/HLSL/DxilLinker.cpp +++ b/lib/HLSL/DxilLinker.cpp @@ -1247,6 +1247,10 @@ void DxilLinkJob::RunPreparePass(Module &M) { PM.add(createDxilReinsertNopsPass()); PM.add(createAlwaysInlinerPass(/*InsertLifeTime*/ false)); + // If we need SROA and dynamicindexvector to array, + // do it early to allow following scalarization to go forward. + PM.add(createDxilScalarizeVectorLoadStoresPass()); + // Remove unused functions. PM.add(createDxilDeadFunctionEliminationPass()); diff --git a/lib/HLSL/DxilScalarizeVectorLoadStores.cpp b/lib/HLSL/DxilScalarizeVectorLoadStores.cpp new file mode 100644 index 0000000000..febcf32358 --- /dev/null +++ b/lib/HLSL/DxilScalarizeVectorLoadStores.cpp @@ -0,0 +1,231 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// DxilScalarizeVectorLoadStores.cpp // +// Copyright (C) Microsoft Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +// Lowers native vector load stores to potentially multiple scalar calls. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#include "dxc/DXIL/DxilInstructions.h" +#include "dxc/DXIL/DxilModule.h" +#include "dxc/HLSL/DxilGenerationPass.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +using namespace llvm; +using namespace hlsl; + +static void scalarizeVectorLoad(hlsl::OP *HlslOP, const DataLayout &DL, + CallInst *CI); +static void scalarizeVectorStore(hlsl::OP *HlslOP, const DataLayout &DL, + CallInst *CI); + +class DxilScalarizeVectorLoadStores : public ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + explicit DxilScalarizeVectorLoadStores() : ModulePass(ID) {} + + StringRef getPassName() const override { + return "DXIL scalarize vector load/stores"; + } + + bool runOnModule(Module &M) override { + DxilModule &DM = M.GetOrCreateDxilModule(); + // Shader Model 6.9 allows native vectors and doesn't need this pass. + if (DM.GetShaderModel()->IsSM69Plus()) + return false; + + bool Changed = false; + + hlsl::OP *HlslOP = DM.GetOP(); + for (auto FIt : HlslOP->GetOpFuncList(DXIL::OpCode::RawBufferVectorLoad)) { + Function *Func = FIt.second; + if (!Func) + continue; + for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) { + CallInst *CI = cast(*(U++)); + scalarizeVectorLoad(HlslOP, M.getDataLayout(), CI); + Changed = true; + } + } + for (auto FIt : HlslOP->GetOpFuncList(DXIL::OpCode::RawBufferVectorStore)) { + Function *Func = FIt.second; + if (!Func) + continue; + for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) { + CallInst *CI = cast(*(U++)); + scalarizeVectorStore(HlslOP, M.getDataLayout(), CI); + Changed = true; + } + } + return Changed; + } +}; + +static unsigned GetRawBufferMask(unsigned NumComponents) { + switch (NumComponents) { + case 0: + return 0; + case 1: + return DXIL::kCompMask_X; + case 2: + return DXIL::kCompMask_X | DXIL::kCompMask_Y; + case 3: + return DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z; + case 4: + default: + return DXIL::kCompMask_All; + } + return DXIL::kCompMask_All; +} + +static void scalarizeVectorLoad(hlsl::OP *HlslOP, const DataLayout &DL, + CallInst *CI) { + IRBuilder<> Builder(CI); + // Collect the information required to break this into scalar ops from args. + DxilInst_RawBufferVectorLoad VecLd(CI); + OP::OpCode OpCode = OP::OpCode::RawBufferLoad; + llvm::Constant *OpArg = Builder.getInt32((unsigned)OpCode); + SmallVector Args; + Args.emplace_back(OpArg); // opcode @0. + Args.emplace_back(VecLd.get_buf()); // Resource handle @1. + Args.emplace_back(VecLd.get_index()); // Index @2. + Args.emplace_back(VecLd.get_elementOffset()); // Offset @3. + Args.emplace_back(nullptr); // Mask to be set later @4. + Args.emplace_back(VecLd.get_alignment()); // Alignment @5. + + // Set offset to increment depending on whether the real offset is defined. + unsigned OffsetIdx; + if (isa(VecLd.get_elementOffset())) + // Byte Address Buffers can't use offset, so use index. + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx; + else + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx; + + StructType *ResRetTy = cast(CI->getType()); + Type *Ty = ResRetTy->getElementType(0); + unsigned NumComponents = Ty->getVectorNumElements(); + Type *EltTy = Ty->getScalarType(); + unsigned EltSize = DL.getTypeAllocSize(EltTy); + + const unsigned MaxElemCount = 4; + SmallVector Elts(NumComponents); + Value *Ld = nullptr; + for (unsigned EIx = 0; EIx < NumComponents;) { + // Load 4 elements or however many less than 4 are left to load. + unsigned ChunkSize = std::min(NumComponents - EIx, MaxElemCount); + Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] = + HlslOP->GetI8Const(GetRawBufferMask(ChunkSize)); + // If we've loaded a chunk already, update offset to next chunk. + if (EIx > 0) + Args[OffsetIdx] = + Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize)); + Function *F = HlslOP->GetOpFunc(OpCode, EltTy); + Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(OpCode)); + for (unsigned ChIx = 0; ChIx < ChunkSize; ChIx++, EIx++) + Elts[EIx] = Builder.CreateExtractValue(Ld, ChIx); + } + + Value *RetValNew = UndefValue::get(VectorType::get(EltTy, NumComponents)); + for (unsigned ElIx = 0; ElIx < NumComponents; ElIx++) + RetValNew = Builder.CreateInsertElement(RetValNew, Elts[ElIx], ElIx); + + // Replace users of the vector extracted from the vector load resret. + Value *Status = nullptr; + for (auto CU = CI->user_begin(), CE = CI->user_end(); CU != CE;) { + auto EV = cast(*(CU++)); + unsigned Ix = EV->getIndices()[0]; + if (Ix == 0) { + // Handle value uses. + EV->replaceAllUsesWith(RetValNew); + } else if (Ix == 1) { + // Handle status uses. + if (!Status) + Status = Builder.CreateExtractValue(Ld, DXIL::kResRetStatusIndex); + EV->replaceAllUsesWith(Status); + } + EV->eraseFromParent(); + } + CI->eraseFromParent(); +} + +static void scalarizeVectorStore(hlsl::OP *HlslOP, const DataLayout &DL, + CallInst *CI) { + IRBuilder<> Builder(CI); + // Collect the information required to break this into scalar ops from args. + DxilInst_RawBufferVectorStore VecSt(CI); + OP::OpCode OpCode = OP::OpCode::RawBufferStore; + llvm::Constant *OpArg = Builder.getInt32((unsigned)OpCode); + SmallVector Args; + Args.emplace_back(OpArg); // opcode @0. + Args.emplace_back(VecSt.get_uav()); // Resource handle @1. + Args.emplace_back(VecSt.get_index()); // Index @2. + Args.emplace_back(VecSt.get_elementOffset()); // Offset @3. + Args.emplace_back(nullptr); // Val0 to be set later @4. + Args.emplace_back(nullptr); // Val1 to be set later @5. + Args.emplace_back(nullptr); // Val2 to be set later @6. + Args.emplace_back(nullptr); // Val3 to be set later @7. + Args.emplace_back(nullptr); // Mask to be set later @8. + Args.emplace_back(VecSt.get_alignment()); // Alignment @9. + + // Set offset to increment depending on whether the real offset is defined. + unsigned OffsetIdx; + if (isa(VecSt.get_elementOffset())) + // Byte Address Buffers can't use offset, so use index. + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx; + else + OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx; + + Value *VecVal = VecSt.get_value0(); + + const unsigned MaxElemCount = 4; + Type *Ty = VecVal->getType(); + const unsigned NumComponents = Ty->getVectorNumElements(); + Type *EltTy = Ty->getScalarType(); + Value *UndefVal = UndefValue::get(EltTy); + unsigned EltSize = DL.getTypeAllocSize(EltTy); + Function *F = HlslOP->GetOpFunc(OpCode, EltTy); + for (unsigned EIx = 0; EIx < NumComponents;) { + // Store 4 elements or however many less than 4 are left to store. + unsigned ChunkSize = std::min(NumComponents - EIx, MaxElemCount); + // For second and subsequent store calls, increment the resource-appropriate + // index or offset parameter. + if (EIx > 0) + Args[OffsetIdx] = + Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize)); + // Populate all value arguments either with the vector or undefs. + uint8_t Mask = 0; + unsigned ChIx = 0; + for (; ChIx < ChunkSize; ChIx++, EIx++) { + Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] = + Builder.CreateExtractElement(VecVal, EIx); + Mask |= (1 << ChIx); + } + for (; ChIx < MaxElemCount; ChIx++) + Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] = UndefVal; + + Args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] = + HlslOP->GetU8Const(Mask); + Builder.CreateCall(F, Args); + } + CI->eraseFromParent(); +} + +char DxilScalarizeVectorLoadStores::ID = 0; + +ModulePass *llvm::createDxilScalarizeVectorLoadStoresPass() { + return new DxilScalarizeVectorLoadStores(); +} + +INITIALIZE_PASS(DxilScalarizeVectorLoadStores, + "hlsl-dxil-scalarize-vector-load-stores", + "DXIL scalarize vector load/stores", false, false) diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 445dbcc879..4d8201df8d 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -3956,6 +3956,11 @@ struct ResLoadHelper { : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst), addr(idx), offset(Offset), status(nullptr), mipLevel(mip) { opcode = LoadOpFromResKind(RK); + Type *Ty = Inst->getType(); + if (opcode == OP::OpCode::RawBufferLoad && Ty->isVectorTy() && + Ty->getVectorNumElements() > 1 && + Inst->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()) + opcode = OP::OpCode::RawBufferVectorLoad; } OP::OpCode opcode; IntrinsicOp intrinsicOpCode; @@ -4025,6 +4030,14 @@ ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK, if (RC == DxilResourceBase::Class::SRV) OffsetIdx = IsMS ? HLOperandIndex::kTex2DMSLoadOffsetOpIdx : HLOperandIndex::kTexLoadOffsetOpIdx; + } else if (opcode == OP::OpCode::RawBufferLoad) { + // If native vectors are available and this load had a vector + // with more than one elements, convert the RawBufferLod to the + // native vector variant RawBufferVectorLoad. + Type *Ty = CI->getType(); + if (Ty->isVectorTy() && Ty->getVectorNumElements() > 1 && + CI->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()) + opcode = OP::OpCode::RawBufferVectorLoad; } // Set offset. @@ -4082,7 +4095,7 @@ Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset, // Sets up arguments for buffer load call. static SmallVector GetBufLoadArgs(ResLoadHelper helper, HLResource::Kind RK, - IRBuilder<> Builder, Type *EltTy, + IRBuilder<> Builder, unsigned LdSize) { OP::OpCode opcode = helper.opcode; llvm::Constant *opArg = Builder.getInt32((uint32_t)opcode); @@ -4130,6 +4143,7 @@ static SmallVector GetBufLoadArgs(ResLoadHelper helper, // If not TextureLoad, it could be a typed or raw buffer load. // They have mostly similar arguments. DXASSERT(opcode == OP::OpCode::RawBufferLoad || + opcode == OP::OpCode::RawBufferVectorLoad || opcode == OP::OpCode::BufferLoad, "Wrong opcode in get load args"); Args.emplace_back( @@ -4140,6 +4154,9 @@ static SmallVector GetBufLoadArgs(ResLoadHelper helper, // Unlike typed buffer load, raw buffer load has mask and alignment. Args.emplace_back(nullptr); // Mask will be added later %4. Args.emplace_back(alignmentVal); // alignment @5. + } else if (opcode == OP::OpCode::RawBufferVectorLoad) { + // RawBufferVectorLoad takes just alignment, no mask. + Args.emplace_back(alignmentVal); // alignment @4 } } return Args; @@ -4165,18 +4182,21 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, if (isBool || (is64 && isTyped)) EltTy = Builder.getInt32Ty(); - // 64-bit types are stored as int32 pairs in typed buffers. + // Calculate load size with the scalar memory element type. + unsigned LdSize = DL.getTypeAllocSize(EltTy); + + // Adjust number of components as needed. if (is64 && isTyped) { + // 64-bit types are stored as int32 pairs in typed buffers. DXASSERT(NumComponents <= 2, "Typed buffers only allow 4 dwords."); NumComponents *= 2; + } else if (opcode == OP::OpCode::RawBufferVectorLoad) { + // Native vector loads only have a single vector element in ResRet. + EltTy = VectorType::get(EltTy, NumComponents); + NumComponents = 1; } - unsigned LdSize = DL.getTypeAllocSize(EltTy); - - SmallVector Elts(NumComponents); - - SmallVector Args = - GetBufLoadArgs(helper, RK, Builder, EltTy, LdSize); + SmallVector Args = GetBufLoadArgs(helper, RK, Builder, LdSize); // Keep track of the first load for debug info migration. Value *FirstLd = nullptr; @@ -4188,9 +4208,10 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, else if (RK == DxilResource::Kind::StructuredBuffer) OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx; - // Create calls to function object. + // Create call(s) to function object and collect results in Elts. // Typed buffer loads are limited to one load of up to 4 32-bit values. // Raw buffer loads might need multiple loads in chunks of 4. + SmallVector Elts(NumComponents); for (unsigned i = 0; i < NumComponents;) { // Load 4 elements or however many less than 4 are left to load. unsigned chunkSize = std::min(NumComponents - i, 4U); @@ -4200,7 +4221,7 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] = GetRawBufferMaskForETy(EltTy, chunkSize, OP); // If we've loaded a chunk already, update offset to next chunk. - if (FirstLd != nullptr && opcode == OP::OpCode::RawBufferLoad) + if (FirstLd != nullptr) Args[OffsetIdx] = Builder.CreateAdd(Args[OffsetIdx], OP->GetU32Const(4 * LdSize)); } @@ -4209,8 +4230,13 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode)); // Extract elements from returned ResRet. - for (unsigned j = 0; j < chunkSize; j++, i++) - Elts[i] = Builder.CreateExtractValue(Ld, j); + // Native vector loads just have one vector element in the ResRet. + // Others have up to four scalars that need to be individually extracted. + if (opcode == OP::OpCode::RawBufferVectorLoad) + Elts[i++] = Builder.CreateExtractValue(Ld, 0); + else + for (unsigned j = 0; j < chunkSize; j++, i++) + Elts[i] = Builder.CreateExtractValue(Ld, j); // Update status. UpdateStatus(Ld, helper.status, Builder, OP); @@ -4248,9 +4274,10 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, } } - // Package elements into a vector. + // Package elements into a vector as needed. Value *retValNew = nullptr; - if (!Ty->isVectorTy()) { + // Scalar or native vector loads need not construct vectors from elements. + if (!Ty->isVectorTy() || opcode == OP::OpCode::RawBufferVectorLoad) { retValNew = Elts[0]; } else { retValNew = UndefValue::get(VectorType::get(EltTy, NumComponents)); @@ -4348,6 +4375,10 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, case DxilResource::Kind::StructuredBuffer: IsTyped = false; opcode = OP::OpCode::RawBufferStore; + // Where shader model and type allows, use vector store intrinsic. + if (OP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() && + Ty->isVectorTy() && Ty->getVectorNumElements() > 1) + opcode = OP::OpCode::RawBufferVectorStore; break; case DxilResource::Kind::TypedBuffer: opcode = OP::OpCode::BufferStore; @@ -4390,7 +4421,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, EltTy = i32Ty; } - Function *F = OP->GetOpFunc(opcode, EltTy); llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode); llvm::Value *undefI = @@ -4404,6 +4434,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, unsigned OffsetIdx = 0; if (opcode == OP::OpCode::RawBufferStore || + opcode == OP::OpCode::RawBufferVectorStore || opcode == OP::OpCode::BufferStore) { // Append Coord0 (Index) value. if (Idx->getType()->isVectorTy()) { @@ -4423,7 +4454,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, OffsetIdx = storeArgs.size() - 1; // Coord1 (Offset). - // Only relevant when storing more than 4 elements to structured buffers. storeArgs.emplace_back(offset); } else { // texture store @@ -4444,6 +4474,16 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, // TODO: support mip for texture ST } + // RawBufferVectorStore only takes a single value and alignment arguments. + if (opcode == DXIL::OpCode::RawBufferVectorStore) { + storeArgs.emplace_back(val); + storeArgs.emplace_back(Alignment); + Function *F = OP->GetOpFunc(DXIL::OpCode::RawBufferVectorStore, Ty); + Builder.CreateCall(F, storeArgs); + return; + } + Function *F = OP->GetOpFunc(opcode, EltTy); + constexpr unsigned MaxStoreElemCount = 4; const unsigned CompCount = Ty->isVectorTy() ? Ty->getVectorNumElements() : 1; const unsigned StoreInstCount = diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index f9e011f8d4..027d7d3cbc 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -15193,7 +15193,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, } // Disallow long vecs from $Global cbuffers. - if (isGlobal && !isStatic && !isGroupShared) { + if (isGlobal && !isStatic && !isGroupShared && !IS_BASIC_OBJECT(basicKind)) { // Suppress actual emitting of errors for incompletable types here // They are redundant to those produced in ActOnUninitializedDecl. struct SilentDiagnoser : public TypeDiagnoser { diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl new file mode 100644 index 0000000000..5305ee495b --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl @@ -0,0 +1,91 @@ +// RUN: %dxc -DTYPE=float -DNUM=4 -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool -DNUM=4 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t -DNUM=2 -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=double -DNUM=2 -T vs_6_9 %s | FileCheck %s + +// RUN: %dxc -DTYPE=float -DNUM=6 -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool -DNUM=13 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t -DNUM=24 -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=double -DNUM=32 -T vs_6_9 %s | FileCheck %s + +/////////////////////////////////////////////////////////////////////// +// Test codegen for various load and store operations and conversions +// for different scalar/vector buffer types and indices. +/////////////////////////////////////////////////////////////////////// + +// CHECK: %dx.types.ResRet.[[VTY:v[0-9]*[a-z][0-9][0-9]]] = type { <[[NUM:[0-9]*]] x [[TYPE:[a-z_0-9]*]]>, i32 } + +ByteAddressBuffer RoByBuf : register(t1); +RWByteAddressBuffer RwByBuf : register(u1); + +StructuredBuffer > RoStBuf : register(t2); +RWStructuredBuffer > RwStBuf : register(u2); + +ConsumeStructuredBuffer > CnStBuf : register(u4); +AppendStructuredBuffer > ApStBuf : register(u5); + +// CHECK-LABEL: define void @main +[shader("vertex")] +void main(uint ix[2] : IX) { + // ByteAddressBuffer Tests + + // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) + // CHECK-DAG: [[HDLRWBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) + + // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false) + // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false) + + // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false) + // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false) + + // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector babElt1 = RwByBuf.Load< vector >(ix[0]); + + // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector babElt2 = RoByBuf.Load< vector >(ix[0]); + + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> + // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] + RwByBuf.Store< vector >(ix[0], babElt1 + babElt2); + + // StructuredBuffer Tests + // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt1 = RwStBuf.Load(ix[0]); + // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt2 = RwStBuf[ix[1]]; + + // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt3 = RoStBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt4 = RoStBuf[ix[1]]; + + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> + // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] + RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + + // {Append/Consume}StructuredBuffer Tests + // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] + // CHECK: [[CONIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLCON]], i8 -1) + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]] + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector cnElt = CnStBuf.Consume(); + + // CHECK: [[ANHDLAPP:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLAPP]] + // CHECK: [[APPIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLAPP]], i8 1) + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> + // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]] + ApStBuf.Append(cnElt); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl new file mode 100644 index 0000000000..0a115bd709 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl @@ -0,0 +1,719 @@ +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int -DNUM=2 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint -DNUM=5 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=double -DNUM=3 -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL,NOINT +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DNUM=9 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float16_t -DNUM=17 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int16_t -DNUM=33 -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG + +// Linking tests. +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=6 -Fo %t.1 %s +// RUN: %dxl -T cs_6_9 %t.1 | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DNUM=3 -DDBL -Fo %t.2 %s +// RUN: %dxl -T cs_6_9 %t.2 | FileCheck %s --check-prefixes=CHECK,DBL,NOINT +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t -DNUM=12 -DINT -enable-16bit-types -Fo %t.3 %s +// RUN: %dxl -T cs_6_9 %t.3 | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG + +// Test relevant operators on an assortment vector sizes and types with 6.9 native vectors. +// Tests in a CS environment where vector operations were previously disallowed to confirm that they are retained. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// Uses non vector buffer to avoid interacting with that implementation. +// CHECK-DAG: %dx.types.ResRet.[[TY:v[0-9]*[a-z][0-9]*]] = type { <[[NUM:[0-9]*]] x [[TYPE:[a-z_0-9]*]]> +// CHECK-DAG: %dx.types.ResRet.[[STY:[a-z][0-9]*]] = type { [[STYPE:[a-z0-9_]*]] +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> + +void assignments(inout vector things[11], TYPE scales[10]); +vector arithmetic(inout vector things[11])[11]; +vector scarithmetic(vector things[11], TYPE scales[10])[11]; +vector logic(vector truth[10], vector consequences[11])[10]; +vector index(vector things[11], int i)[11]; +void bittwiddlers(inout vector things[13]); + +struct Viface { + vector values[11]; +}; + +struct Siface { + TYPE values[10]; +}; + +struct Liface { + vector values[10]; +}; + +struct Binface { + vector values[13]; +}; + +RWStructuredBuffer Input : register(u11); +RWStructuredBuffer Output : register(u12); +RWStructuredBuffer Scales : register(u13); +RWStructuredBuffer Truths : register(u14); +RWStructuredBuffer Bits : register(u15); +RWStructuredBuffer > Offsets : register(u16); + +[shader("compute")] +[numthreads(8,1,1)] +// CHECK-LABEL: define void @main +void main(uint3 GID : SV_GroupThreadID) { + + // CHECK-DAG: [[Input:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 11, i32 11, i32 0, i8 1 }, i32 11 + // CHECK-DAG: [[Output:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 12, i32 12, i32 0, i8 1 }, i32 12 + // CHECK-DAG: [[Scales:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 13, i32 13, i32 0, i8 1 }, i32 13 + // CHECK-DAG: [[Truths:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 14, i32 14, i32 0, i8 1 }, i32 14 + // INT-DAG: [[Bits:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 15, i32 15, i32 0, i8 1 }, i32 15 + + // CHECK: [[InIx1:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0) + // CHECK: [[InIx2:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1) + // CHECK: [[OutIx:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 2) + // CHECK: [[scratch1:%.*]] = alloca [11 x <[[NUM]] x [[TYPE]]>] + // CHECK: [[scratch2:%.*]] = alloca [11 x <[[NUM]] x [[TYPE]]>] + + uint InIx1 = GID[0]; + uint InIx2 = GID[1]; + uint OutIx = GID[2]; + + // Assign vector offsets to capture the expected values. + // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 0, i32 0, <13 x i32> + Offsets[0] = vector(sizeof(vector)*0, + sizeof(vector)*1, + sizeof(vector)*2, + sizeof(vector)*3, + sizeof(vector)*4, + sizeof(vector)*5, + sizeof(vector)*6, + sizeof(vector)*7, + sizeof(vector)*8, + sizeof(vector)*9, + sizeof(vector)*10, + sizeof(vector)*11, + sizeof(vector)*12); + + // Assign scalar offsets to capture the expected values. + // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 1, i32 0, <13 x i32> + Offsets[1] = vector(sizeof(TYPE)*0, + sizeof(TYPE)*1, + sizeof(TYPE)*2, + sizeof(TYPE)*3, + sizeof(TYPE)*4, + sizeof(TYPE)*5, + sizeof(TYPE)*6, + sizeof(TYPE)*7, + sizeof(TYPE)*8, + sizeof(TYPE)*9, + sizeof(TYPE)*10, + sizeof(TYPE),// Effectively alignof. + sizeof(int));// Effectively integer alignof. + + // Assign boolean offsets to capture the expected values. + // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 2, i32 0, <13 x i32> + Offsets[2] = vector(sizeof(vector)*0, + sizeof(vector)*1, + sizeof(vector)*2, + sizeof(vector)*3, + sizeof(vector)*4, + sizeof(vector)*5, + sizeof(vector)*6, + sizeof(vector)*7, + sizeof(vector)*8, + sizeof(vector)*9, + sizeof(vector)*10, + sizeof(vector)*11, + sizeof(vector)*12); + + assignments(Input[InIx1+1].values, Scales[InIx2+1].values); + Output[OutIx+2].values = arithmetic(Input[InIx1+2].values); + Output[OutIx+3].values = scarithmetic(Input[InIx1+3].values, Scales[InIx2+3].values); + Truths[OutIx+4].values = logic(Truths[InIx2+4].values, Input[InIx1+4].values); + Output[OutIx+5].values = index(Input[InIx1+5].values, InIx2+5); +#ifdef INT + bittwiddlers(Bits[InIx1+6].values); +#endif +} + +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +void assignments(inout vector things[11], TYPE scales[10]) { + + // CHECK: [[VcIx:%.*]] = add i32 [[InIx1]], 1 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], i32 [[ALN]]) + // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], i32 [[ALN]]) + // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], i32 [[ALN]]) + // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], i32 [[ALN]]) + // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], i32 [[ALN]]) + // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], i32 [[ALN]]) + // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], i32 [[ALN]]) + // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], i32 [[ALN]]) + // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], i32 [[ALN]]) + // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // CHECK: [[ScIx:%.*]] = add i32 [[InIx2]], 1 + // CHECK: [[ScHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0 + // CHECK: [[res0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + things[0] = scales[0]; + + // CHECK: [[res1:%[0-9]*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec1]] + things[1] += things[5]; + + // CHECK: [[res2:%[0-9]*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]] + things[2] -= things[6]; + + // CHECK: [[res3:%[0-9]*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec7]], [[vec3]] + things[3] *= things[7]; + + // CHECK: [[res4:%[0-9]*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]] + things[4] /= things[8]; + +#ifdef DBL + // DBL can't use remainder operator, do something anyway to keep the rest consistent. + // DBL: [[fvec9:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec9]] to <[[NUM]] x float> + // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float> + // DBL: [[fres5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x float> [[fvec5]], [[fvec9]] + // DBL: [[res5:%[0-9]*]] = fpext <[[NUM]] x float> [[fres5]] to <[[NUM]] x double> + vector f9 = (vector)things[9]; + vector f5 = (vector)things[5]; + f5 %= f9; + things[5] = f5; +#else + // NODBL: [[res5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]] + things[5] %= things[9]; +#endif + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0 + // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res6:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt1]], [[vec6]] + things[6] += scales[1]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0 + // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res7:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec7]], [[spt2]] + things[7] -= scales[2]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0 + // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res8:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt3]], [[vec8]] + things[8] *= scales[3]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0 + // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res9:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec9]], [[spt4]] + things[9] /= scales[4]; + + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]]) + +} + +// Test arithmetic operators. +vector arithmetic(inout vector things[11])[11] { + vector res[11]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 2 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 2 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]]) + // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]]) + // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]]) + // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]]) + // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]]) + // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]]) + // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]]) + // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], i32 [[ALN]]) + // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], i32 [[ALN]]) + // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], i32 [[ALN]]) + // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], i32 [[ALN]]) + // CHECK: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // NOINT: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> <[[TYPE]] {{-?(0|0\.0*e\+0*|0xH8000),.*}}>, [[vec0]] + // INT: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> zeroinitializer, [[vec0]] + res[0] = -things[0]; + res[1] = +things[0]; + + // CHECK: [[res2:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec1]] + res[2] = things[1] + things[2]; + + // CHECK: [[res3:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + res[3] = things[2] - things[3]; + + // CHECK: [[res4:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]] + res[4] = things[3] * things[4]; + + // CHECK: [[res5:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + res[5] = things[4] / things[5]; + + // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float> +#ifdef DBL + // DBL can't use remainder operator, do something anyway to keep the rest consistent. + // DBL: [[fvec6:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec6]] to <[[NUM]] x float> + // DBL: [[fres6:%[0-9]*]] = [[REM]] <[[NUM]] x float> [[fvec5]], [[fvec6]] + // DBL: [[res6:%[0-9]*]] = fpext <[[NUM]] x float> [[fres6]] to <[[NUM]] x double> + res[6] = (vector)things[5] % (vector)things[6]; +#else + // NODBL: [[res6:%[0-9]*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[res7:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]], <[[TYPE]] [[POS1:(1|1\.0*e\+0*|0xH3C00)]] + res[7] = things[7]++; + + // CHECK: [[res8:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]], <[[TYPE]] [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]] + res[8] = things[8]--; + + // CHECK: [[res9:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]], <[[TYPE]] [[POS1]] + res[9] = ++things[9]; + + // CHECK: [[res10:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]], <[[TYPE]] [[NEG1]] + res[10] = --things[10]; + + // Things[] input gets all the result values since pre/post inc/decrements don't change the end result. + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]]) + + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]]) + // res1 is just vec0 since it was just the unary + operator. + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[vec0]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]]) + // res[] input gets either the original or the preincremented value. + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[vec7]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[vec8]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]]) + + return res; +} + +// Test arithmetic operators with scalars. +vector scarithmetic(vector things[11], TYPE scales[10])[11] { + vector res[11]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 3 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 3 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]]) + // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]]) + // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]]) + // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]]) + // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]]) + // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]]) + // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]]) + // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // CHECK: [[SclIx:%.*]] = add i32 [[InIx2]], 3 + // CHECK: [[SclHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[scl5:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[scl6:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0 + // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res0:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt0]], [[vec0]] + res[0] = things[0] + scales[0]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0 + // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res1:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec1]], [[spt1]] + res[1] = things[1] - scales[1]; + + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0 + // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res2:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt2]], [[vec2]] + res[2] = things[2] * scales[2]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0 + // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res3:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec3]], [[spt3]] + res[3] = things[3] / scales[3]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0 + // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res4:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt4]], [[vec4]] + res[4] = scales[4] + things[4]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl5]], i32 0 + // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res5:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[spt5]], [[vec5]] + res[5] = scales[5] - things[5]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl6]], i32 0 + // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res6:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt6]], [[vec6]] + res[6] = scales[6] * things[6]; + res[7] = res[8] = res[9] = res[10] = 0; + + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]]) + + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +vector logic(vector truth[10], vector consequences[11])[10] { + vector res[10]; + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 4 + // CHECK: [[TruHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Truths]] + // CHECK: [[TruIx:%.*]] = add i32 [[InIx2]], 4 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF0]], i32 [[IALN]]) + // CHECK: [[ivec0:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF1]], i32 [[IALN]]) + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF2]], i32 [[IALN]]) + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF3]], i32 [[IALN]]) + // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF4]], i32 [[IALN]]) + // CHECK: [[ivec4:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF5]], i32 [[IALN]]) + // CHECK: [[ivec5:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + + // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 4 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]]) + // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]]) + // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]]) + // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]]) + // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]]) + // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]]) + // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]]) + // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + + // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec0]], zeroinitializer + // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32> + res[0] = !truth[0]; + + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec1]], zeroinitializer + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec2]], zeroinitializer + // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]] + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + res[1] = truth[1] || truth[2]; + + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec3]], zeroinitializer + // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]] + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + res[2] = truth[2] && truth[3]; + + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec4]], zeroinitializer + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec5]], zeroinitializer + // CHECK: [[bres3:%[0-9]*]] = select <[[NUM]] x i1> [[bvec3]], <[[NUM]] x i1> [[bvec4]], <[[NUM]] x i1> [[bvec5]] + // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[cmp4:%[0-9]*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]] + // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp4]] to <[[NUM]] x i32> + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[cmp5:%[0-9]*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]] + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp5]] to <[[NUM]] x i32> + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[cmp6:%[0-9]*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp6]] to <[[NUM]] x i32> + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[cmp7:%[0-9]*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]] + // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp7]] to <[[NUM]] x i32> + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[cmp8:%[0-9]*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp8]] to <[[NUM]] x i32> + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[cmp9:%[0-9]*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp9]] to <[[NUM]] x i32> + res[9] = consequences[5] >= consequences[6]; + + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF0]], <[[NUM]] x i32> [[res0]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF1]], <[[NUM]] x i32> [[res1]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF2]], <[[NUM]] x i32> [[res2]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF3]], <[[NUM]] x i32> [[res3]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF4]], <[[NUM]] x i32> [[res4]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF5]], <[[NUM]] x i32> [[res5]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF6]], <[[NUM]] x i32> [[res6]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF7]], <[[NUM]] x i32> [[res7]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF8]], <[[NUM]] x i32> [[res8]], i32 4) + // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF9]], <[[NUM]] x i32> [[res9]], i32 4) + + return res; +} + +static const int Ix = 2; + +// Test indexing operators +vector index(vector things[11], int i)[11] { + vector res[11]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 5 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 5 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]]) + // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 1 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]]) + // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec1]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 2 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]]) + // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec2]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 3 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]]) + // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec3]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 4 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]]) + // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec4]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 5 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]]) + // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec5]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 6 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]]) + // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec6]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 7 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], i32 [[ALN]]) + // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 8 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], i32 [[ALN]]) + // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 9 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], i32 [[ALN]]) + // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec9]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 10 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], i32 [[ALN]]) + // CHECK: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec10]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + + // CHECK: [[Ix:%.*]] = add i32 [[InIx2]], 5 + + // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[adr0]], align [[ALN]] + res[0] = 0; + + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 [[Ix]] + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] [[POS1]],{{[^>]*}}>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + res[i] = 1; + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] [[TWO:(2|2\.?0*e?\+?0*|0xH4000)]],{{[^>]*}}>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + res[Ix] = 2; + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + res[3] = things[0]; + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 [[Ix]] + // CHECK: [[ldix:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[ldix]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + res[4] = things[i]; + + + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec2]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + res[5] = things[Ix]; + + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 0, <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 1 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> <[[TYPE]] [[TWO]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[vec0]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[ldix]], i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[vec2]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 6 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 7 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 8 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 9 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 10 + // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]]) + + return res; +} + +#ifdef INT +// Test bit twiddling operators. +void bittwiddlers(inout vector things[13]) { + // INT: [[VcIx:%.*]] = add i32 [[InIx1]], 6 + // INT: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Bits]] + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], i32 [[ALN]]) + // INT: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], i32 [[ALN]]) + // INT: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], i32 [[ALN]]) + // INT: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], i32 [[ALN]]) + // INT: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], i32 [[ALN]]) + // INT: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], i32 [[ALN]]) + // INT: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], i32 [[ALN]]) + // INT: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], i32 [[ALN]]) + // INT: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], i32 [[ALN]]) + // INT: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF10]], i32 [[ALN]]) + // INT: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF11]], i32 [[ALN]]) + // INT: [[vec11:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF12]], i32 [[ALN]]) + // INT: [[vec12:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // INT: [[res0:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec1]], <[[TYPE]] -1 + things[0] = ~things[1]; + + // INT: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]] + things[1] = things[2] | things[3]; + + // INT: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]] + things[2] = things[3] & things[4]; + + // INT: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + things[3] = things[4] ^ things[5]; + + // INT: [[shv6:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec6]] + // INT: [[res4:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[vec5]], [[shv6]] + things[4] = things[5] << things[6]; + + // INT: [[shv7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec7]] + // UNSIG: [[res5:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]] + // SIG: [[res5:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]] + things[5] = things[6] >> things[7]; + + // INT: [[res6:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec8]], [[vec6]] + things[6] |= things[8]; + + // INT: [[res7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec9]], [[vec7]] + things[7] &= things[9]; + + // INT: [[res8:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec8]], [[vec10]] + things[8] ^= things[10]; + + // INT: [[shv11:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec11]] + // INT: [[res9:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[vec9]], [[shv11]] + things[9] <<= things[11]; + + // INT: [[shv12:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec12]] + // UNSIG: [[res10:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[vec10]], [[shv12]] + // SIG: [[res10:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec10]], [[shv12]] + things[10] >>= things[12]; + + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF11]], <[[NUM]] x [[TYPE]]> [[vec11]], i32 [[ALN]]) + // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF12]], <[[NUM]] x [[TYPE]]> [[vec12]], i32 [[ALN]]) + + // CHECK-LABEL: ret void +} +#endif // INT diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl new file mode 100644 index 0000000000..ca239a5b22 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl @@ -0,0 +1,680 @@ +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=double -DDBL %s | FileCheck %s +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int16_t -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG + +// Scalar variants to confirm they match. +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=float %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=int -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=double -DDBL %s | FileCheck %s +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=float16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=int16_t -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG + +// Linking tests. +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -Fo %t.1 %s +// RUN: %dxl -T cs_6_9 %t.1 | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DDBL -Fo %t.2 %s +// RUN: %dxl -T cs_6_9 %t.2 | FileCheck %s --check-prefixes=CHECK,DBL,NOINT +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t -DINT -enable-16bit-types -Fo %t.3 %s +// RUN: %dxl -T cs_6_9 %t.3 | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG + +// Test relevant operators on vec1s in a 6.9 compute shader to ensure they continue to be treated as scalars. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// CHECK-DAG: %dx.types.ResRet.[[TY:[a-z][0-9]*]] = type { [[TYPE:[a-z0-9_]*]] +// CHECK-DAG: %dx.types.ResRet.[[ITY:i32]] = type { i32 + +#ifdef SCL +#define VTYPE TYPE +#else +#define VTYPE vector +#endif + +void assignments(inout VTYPE things[11], TYPE scales[10]); +VTYPE arithmetic(inout VTYPE things[11])[11]; +VTYPE scarithmetic(VTYPE things[11], TYPE scales[10])[11]; +bool1 logic(bool1 truth[10], VTYPE consequences[11])[10]; +VTYPE index(VTYPE things[11], int i)[11]; +void bittwiddlers(inout VTYPE things[13]); + +struct Viface { + VTYPE values[11]; +}; + +struct Siface { + TYPE values[10]; +}; + +struct Liface { + bool1 values[10]; +}; + +struct Binface { + VTYPE values[13]; +}; + +RWStructuredBuffer Input : register(u11); +RWStructuredBuffer Output : register(u12); +RWStructuredBuffer Scales : register(u13); +RWStructuredBuffer Truths : register(u14); +RWStructuredBuffer Bits : register(u15); +RWStructuredBuffer > Offsets : register(u16); + +[shader("compute")] +[numthreads(8,1,1)] +// CHECK-LABEL: define void @main +void main(uint3 GID : SV_GroupThreadID) { + + // CHECK-DAG: [[Input:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 11, i32 11, i32 0, i8 1 }, i32 11 + // CHECK-DAG: [[Output:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 12, i32 12, i32 0, i8 1 }, i32 12 + // CHECK-DAG: [[Scales:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 13, i32 13, i32 0, i8 1 }, i32 13 + // CHECK-DAG: [[Truths:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 14, i32 14, i32 0, i8 1 }, i32 14 + // INT-DAG: [[Bits:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 15, i32 15, i32 0, i8 1 }, i32 15 + + // CHECK: [[InIx1:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0) + // CHECK: [[InIx2:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1) + // CHECK: [[OutIx:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 2) + + uint InIx1 = GID[0]; + uint InIx2 = GID[1]; + uint OutIx = GID[2]; + + // Assign vector offsets to capture the expected values. + // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 0, i32 0, <13 x i32> + Offsets[0] = vector(sizeof(TYPE)*0, + sizeof(TYPE)*1, + sizeof(TYPE)*2, + sizeof(TYPE)*3, + sizeof(TYPE)*4, + sizeof(TYPE)*5, + sizeof(TYPE)*6, + sizeof(TYPE)*7, + sizeof(TYPE)*8, + sizeof(TYPE)*9, + sizeof(TYPE)*10, + sizeof(TYPE)*11, + sizeof(TYPE)*12); + + // Assign boolean offsets to capture the expected values. + // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 1, i32 0, <13 x i32> + Offsets[1] = vector(sizeof(int)*0, + sizeof(int)*1, + sizeof(int)*2, + sizeof(int)*3, + sizeof(int)*4, + sizeof(int)*5, + sizeof(int)*6, + sizeof(int)*7, + sizeof(int)*8, + sizeof(int)*9, + sizeof(int)*10, + sizeof(TYPE),// Effectively alignof. + sizeof(int));// Effectively integer alignof. + + assignments(Input[InIx1+1].values, Scales[InIx2+1].values); + Output[OutIx+2].values = arithmetic(Input[InIx1+2].values); + Output[OutIx+3].values = scarithmetic(Input[InIx1+3].values, Scales[InIx2+3].values); + Truths[OutIx+4].values = logic(Truths[InIx2+4].values, Input[InIx1+4].values); + Output[OutIx+5].values = index(Input[InIx1+5].values, InIx2+5); +#ifdef INT + bittwiddlers(Bits[InIx1+6].values); +#endif +} +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +void assignments(inout VTYPE things[11], TYPE scales[10]) { + + // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 1 + + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], i8 1, i32 [[ALN]]) + // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], i8 1, i32 [[ALN]]) + // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], i8 1, i32 [[ALN]]) + // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], i8 1, i32 [[ALN]]) + // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + + // CHECK: [[ScIx:%.*]] = add i32 [[InIx2]], 1 + // CHECK: [[ScHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // Nothing to check. Just a copy over. + things[0] = scales[0]; + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]]{{( nsw)?}} [[TYPE]] [[val5]], [[val1]] + things[1] += things[5]; + + // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]]{{( nsw)?}} [[TYPE]] [[val2]], [[val6]] + things[2] -= things[6]; + + // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]]{{( nsw)?}} [[TYPE]] [[val7]], [[val3]] + things[3] *= things[7]; + + // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]]{{( nsw)?}} [[TYPE]] [[val4]], [[val8]] + things[4] /= things[8]; + +#ifdef DBL + things[5] = 0; // Gotta give it something in any case for validation. +#else + // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[TYPE]] [[val5]], [[val9]] + things[5] %= things[9]; +#endif + + // CHECK: [[res6:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl1]], [[val6]] + things[6] += scales[1]; + + // CHECK: [[res7:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val7]], [[scl2]] + things[7] -= scales[2]; + + // CHECK: [[res8:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl3]], [[val8]] + things[8] *= scales[3]; + + // CHECK: [[res9:%[0-9]*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val9]], [[scl4]] + things[9] /= scales[4]; + + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], [[TYPE]] [[scl0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // NODBL: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], [[TYPE]] [[val10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + +} + +// Test arithmetic operators. +VTYPE arithmetic(inout VTYPE things[11])[11] { + TYPE res[11]; + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 2 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 2 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + res[0] = +things[0]; + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], i8 1, i32 [[ALN]]) + // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], i8 1, i32 [[ALN]]) + // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], i8 1, i32 [[ALN]]) + // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], i8 1, i32 [[ALN]]) + // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + + // CHECK: [[res1:%.*]] = [[SUB]]{{( nsw)?}} [[TYPE]] {{-?(0|0\.?0*e?\+?0*|0xH8000)}}, [[val0]] + res[1] = -things[0]; + + // CHECK: [[res2:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val2]], [[val1]] + res[2] = things[1] + things[2]; + + // CHECK: [[res3:%.*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val2]], [[val3]] + res[3] = things[2] - things[3]; + + // CHECK: [[res4:%.*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[val4]], [[val3]] + res[4] = things[3] * things[4]; + + // CHECK: [[res5:%.*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val4]], [[val5]] + res[5] = things[4] / things[5]; + +#ifdef DBL + res[6] = 0; // Gotta give it something in any case for validation. +#else + // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[val5]], [[val6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[res7:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val7]], [[POS1:(1|1\.0*e\+0*|0xH3C00)]] + res[7] = things[7]++; + + // CHECK: [[res8:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val8]], [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]] + res[8] = things[8]--; + + // CHECK: [[res9:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val9]], [[POS1]] + res[9] = ++things[9]; + + // CHECK: [[res10:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val10]], [[NEG1]] + res[10] = --things[10]; + + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], [[TYPE]] [[val1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], [[TYPE]] [[val2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], [[TYPE]] [[val3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], [[TYPE]] [[val4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], [[TYPE]] [[val5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], [[TYPE]] [[val6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // NODBL: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // Postincrement/decrements get the original value. + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], [[TYPE]] [[val7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], [[TYPE]] [[val8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + + return res; +} + +// Test arithmetic operators with scalars. +VTYPE scarithmetic(VTYPE things[11], TYPE scales[10])[11] { + VTYPE res[11]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 3 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 3 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // CHECK: [[SclIx:%.*]] = add i32 [[InIx2]], 3 + // CHECK: [[SclHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[scl5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[scl6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // CHECK: [[res0:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl0]], [[val0]] + res[0] = things[0] + scales[0]; + + // CHECK: [[res1:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val1]], [[scl1]] + res[1] = things[1] - scales[1]; + + // CHECK: [[res2:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl2]], [[val2]] + res[2] = things[2] * scales[2]; + + // CHECK: [[res3:%[0-9]*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val3]], [[scl3]] + res[3] = things[3] / scales[3]; + + // CHECK: [[res4:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl4]], [[val4]] + res[4] = scales[4] + things[4]; + + // CHECK: [[res5:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[scl5]], [[val5]] + res[5] = scales[5] - things[5]; + + // CHECK: [[res6:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl6]], [[val6]] + res[6] = scales[6] * things[6]; + res[7] = res[8] = res[9] = res[10] = 0; + + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], [[TYPE]] [[res0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + + return res; +} + + +// Test logic operators. +// Only permissable in pre-HLSL2021 +bool1 logic(bool1 truth[10], VTYPE consequences[11])[10] { + bool1 res[10]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 4 + // CHECK: [[TruHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Truths]] + // CHECK: [[TruIx:%.*]] = add i32 [[InIx2]], 4 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF0]], i8 1, i32 [[IALN]]) + // CHECK: [[ival0:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF1]], i8 1, i32 [[IALN]]) + // CHECK: [[ival1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF2]], i8 1, i32 [[IALN]]) + // CHECK: [[ival2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF3]], i8 1, i32 [[IALN]]) + // CHECK: [[ival3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF4]], i8 1, i32 [[IALN]]) + // CHECK: [[ival4:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF5]], i8 1, i32 [[IALN]]) + // CHECK: [[ival5:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + + // CHECK: [[valIx:%.*]] = add i32 [[InIx1]], 4 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + + // CHECK: [[bres0:%.*]] = icmp eq i32 [[ival0]], 0 + // CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32 + res[0] = !truth[0]; + + // CHECK: [[res1:%.*]] = or i32 [[ival2]], [[ival1]] + // CHECK: [[bres1:%.*]] = icmp ne i32 [[res1]], 0 + // CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32 + res[1] = truth[1] || truth[2]; + + // CHECK: [[bval2:%.*]] = icmp ne i32 [[ival2]], 0 + // CHECK: [[bval3:%.*]] = icmp ne i32 [[ival3]], 0 + // CHECK: [[bres2:%.*]] = and i1 [[bval2]], [[bval3]] + // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32 + res[2] = truth[2] && truth[3]; + + // CHECK: [[bval4:%.*]] = icmp ne i32 [[ival4]], 0 + // CHECK: [[bval5:%.*]] = icmp ne i32 [[ival5]], 0 + // CHECK: [[bres3:%.*]] = select i1 [[bval3]], i1 [[bval4]], i1 [[bval5]] + // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32 + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[TYPE]] [[val0]], [[val1]] + // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32 + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[val1]], [[val2]] + // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32 + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[val2]], [[val3]] + // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32 + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[val3]], [[val4]] + // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32 + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[val4]], [[val5]] + // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32 + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[val5]], [[val6]] + // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32 + res[9] = consequences[5] >= consequences[6]; + + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF0]], i32 [[res0]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF1]], i32 [[res1]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF2]], i32 [[res2]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF3]], i32 [[res3]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF4]], i32 [[res4]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF5]], i32 [[res5]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF6]], i32 [[res6]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF7]], i32 [[res7]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF8]], i32 [[res8]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF9]], i32 [[res9]], i32 undef, i32 undef, i32 undef, i8 1, i32 4) + + return res; +} + +static const int Ix = 2; + +// Test indexing operators +VTYPE index(VTYPE things[11], int i)[11] { + VTYPE res[11]; + + // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 5 + // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]] + // CHECK: [[valIx:%.*]] = add i32 [[InIx1]], 5 + // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]] + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1:%.*]], i32 0, i32 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF0]], i8 1, i32 [[ALN]]) + // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val0]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 1 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val1]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 2 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val2]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 3 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val3]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 4 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val4]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 5 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val5]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 6 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val6]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 7 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF7]], i8 1, i32 [[ALN]]) + // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val7]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 8 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF8]], i8 1, i32 [[ALN]]) + // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val8]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 9 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF9]], i8 1, i32 [[ALN]]) + // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val9]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 10 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF10]], i8 1, i32 [[ALN]]) + // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // CHECK: store [[TYPE]] [[val10]], [[TYPE]]* [[adr]], align [[ALN]] + + // CHECK: [[Ix:%.*]] = add i32 [[InIx2]], 5 + + // CHECK: [[adr0:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2:%.*]], i32 0, i32 0 + // CHECK: store [[TYPE]] {{(0|0\.?0*e?\+?0*|0xH0000)}}, [[TYPE]]* [[adr0]], align [[ALN]] + res[0] = 0; + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 [[Ix]] + // CHECK: store [[TYPE]] [[POS1]], [[TYPE]]* [[adr]] + res[i] = 1; + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 2 + // CHECK: store [[TYPE]] [[TWO:(2|2\.?0*e?\+?0*|0xH4000)]], [[TYPE]]* [[adr]] + res[Ix] = 2; + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 3 + // CHECK: store [[TYPE]] [[val0]], [[TYPE]]* [[adr]] + res[3] = things[0]; + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 [[Ix]] + // CHECK: [[vali:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 4 + // CHECK: store [[TYPE]] [[vali]], [[TYPE]]* [[adr]] + res[4] = things[i]; + + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 5 + // CHECK: store [[TYPE]] [[val2]], [[TYPE]]* [[adr]] + res[5] = things[Ix]; + + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 0, [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 1 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[TWO]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[vali]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[val2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 6 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 7 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 8 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 9 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 10 + // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]] + // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + + return res; +} + +#ifdef INT +// Test bit twiddling operators. +void bittwiddlers(inout VTYPE things[13]) { + // INT: [[ValIx:%.*]] = add i32 [[InIx1]], 6 + // INT: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Bits]] + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF1]], i8 1, i32 [[ALN]]) + // INT: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF2]], i8 1, i32 [[ALN]]) + // INT: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF3]], i8 1, i32 [[ALN]]) + // INT: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF4]], i8 1, i32 [[ALN]]) + // INT: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF5]], i8 1, i32 [[ALN]]) + // INT: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF6]], i8 1, i32 [[ALN]]) + // INT: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF7]], i8 1, i32 [[ALN]]) + // INT: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF8]], i8 1, i32 [[ALN]]) + // INT: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF9]], i8 1, i32 [[ALN]]) + // INT: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF10]], i8 1, i32 [[ALN]]) + // INT: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF11]], i8 1, i32 [[ALN]]) + // INT: [[val11:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF12]], i8 1, i32 [[ALN]]) + // INT: [[val12:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0 + + // INT: [[res0:%[0-9]*]] = xor [[TYPE]] [[val1]], -1 + things[0] = ~things[1]; + + // INT: [[res1:%[0-9]*]] = or [[TYPE]] [[val3]], [[val2]] + things[1] = things[2] | things[3]; + + // INT: [[res2:%[0-9]*]] = and [[TYPE]] [[val4]], [[val3]] + things[2] = things[3] & things[4]; + + // INT: [[res3:%[0-9]*]] = xor [[TYPE]] [[val5]], [[val4]] + things[3] = things[4] ^ things[5]; + + // INT: [[shv6:%[0-9]*]] = and [[TYPE]] [[val6]] + // INT: [[res4:%[0-9]*]] = shl [[TYPE]] [[val5]], [[shv6]] + things[4] = things[5] << things[6]; + + // INT: [[shv7:%[0-9]*]] = and [[TYPE]] [[val7]] + // UNSIG: [[res5:%[0-9]*]] = lshr [[TYPE]] [[val6]], [[shv7]] + // SIG: [[res5:%[0-9]*]] = ashr [[TYPE]] [[val6]], [[shv7]] + things[5] = things[6] >> things[7]; + + // INT: [[res6:%[0-9]*]] = or [[TYPE]] [[val8]], [[val6]] + things[6] |= things[8]; + + // INT: [[res7:%[0-9]*]] = and [[TYPE]] [[val9]], [[val7]] + things[7] &= things[9]; + + // INT: [[res8:%[0-9]*]] = xor [[TYPE]] [[val10]], [[val8]] + things[8] ^= things[10]; + + // INT: [[shv11:%[0-9]*]] = and [[TYPE]] [[val11]] + // INT: [[res9:%[0-9]*]] = shl [[TYPE]] [[val9]], [[shv11]] + things[9] <<= things[11]; + + // INT: [[shv12:%[0-9]*]] = and [[TYPE]] [[val12]] + // UNSIG: [[res10:%[0-9]*]] = lshr [[TYPE]] [[val10]], [[shv12]] + // SIG: [[res10:%[0-9]*]] = ashr [[TYPE]] [[val10]], [[shv12]] + things[10] >>= things[12]; + + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF0]], [[TYPE]] [[res0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF11]], [[TYPE]] [[val11]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF12]], [[TYPE]] [[val12]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]]) + + // CHECK-LABEL: ret void +} +#endif // INT diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl index c366261406..44c9be17d4 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl @@ -1,51 +1,23 @@ -// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float1 %s | FileCheck %s --check-prefixes=CHECK,NODBL -// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int1 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG -// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double1 -DDBL %s | FileCheck %s --check-prefixes=CHECK -// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t1 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG -// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t1 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL -// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t1 -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DDBL %s | FileCheck %s --check-prefixes=CHECK +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG -// Test relevant operators on an assortment bool vector sizes and types with 6.9 native vectors. +// Test relevant operators on vec1s in 6.9 to ensure they continue to be treated as scalars. + +#define VTYPE vector // Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. // CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[ELTY:[a-z0-9_]*]] // CHECK: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:.*]] } -RWStructuredBuffer buf; - -export void assignments(inout TYPE things[10], TYPE scales[10]); -export TYPE arithmetic(inout TYPE things[11])[11]; -export bool logic(bool truth[10], TYPE consequences[10])[10]; -export TYPE index(TYPE things[10], int i, TYPE val)[10]; - -struct Interface { - TYPE assigned[10]; - TYPE arithmeticked[11]; - bool logicked[10]; - TYPE indexed[10]; - TYPE scales[10]; -}; - -#if 0 -// Requires vector loading support. Enable when available. -RWStructuredBuffer Input; -RWStructuredBuffer Output; - -TYPE g_val; - -[shader("compute")] -[numthreads(8,1,1)] -void main(uint GI : SV_GroupIndex) { - assignments(Output[GI].assigned, Input[GI].scales); - Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked); - Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned); - Output[GI].indexed = index(Input[GI].indexed, GI, g_val); -} -#endif +RWStructuredBuffer buf; // A mixed-type overload to test overload resolution and mingle different vector element types in ops // Test assignment operators. // CHECK-LABEL: define void @"\01?assignments -export void assignments(inout TYPE things[10]) { +export void assignments(inout VTYPE things[10]) { // CHECK: [[buf:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle {{%.*}}, i32 1, i32 0, i8 1, i32 {{8|4|2}}) // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[buf]], 0 @@ -111,8 +83,8 @@ export void assignments(inout TYPE things[10]) { // Test arithmetic operators. // CHECK-LABEL: define void @"\01?arithmetic -export TYPE arithmetic(inout TYPE things[11])[11] { - TYPE res[11]; +export VTYPE arithmetic(inout VTYPE things[11])[11] { + VTYPE res[11]; // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0 // CHECK: [[res0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]] // CHECK: [[val0:%.*]] = extractelement [[TYPE]] [[res0]], i32 0 @@ -226,7 +198,7 @@ export TYPE arithmetic(inout TYPE things[11])[11] { // Test logic operators. // Only permissable in pre-HLSL2021 // CHECK-LABEL: define void @"\01?logic -export bool logic(bool truth[10], TYPE consequences[10])[10] { +export bool logic(bool truth[10], VTYPE consequences[10])[10] { bool res[10]; // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0 // CHECK: [[val0:%.*]] = load i32, i32* [[adr0]] @@ -332,9 +304,9 @@ static const int Ix = 2; // Test indexing operators // CHECK-LABEL: define void @"\01?index -export TYPE index(TYPE things[10], int i)[10] { +export VTYPE index(VTYPE things[10], int i)[10] { // CHECK: [[res:%.*]] = alloca [10 x [[ELTY]]] - TYPE res[10]; + VTYPE res[10]; // CHECK: [[res0:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 0 // CHECK: store [[ELTY]] {{(0|0*\.?0*e?\+?0*|0xH0000)}}, [[ELTY]]* [[res0]] @@ -375,7 +347,7 @@ export TYPE index(TYPE things[10], int i)[10] { #ifdef INT // Test bit twiddling operators. // INT-LABEL: define void @"\01?bittwiddlers -export void bittwiddlers(inout TYPE things[13]) { +export void bittwiddlers(inout VTYPE things[13]) { // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 1 // INT: [[ld1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr1]] // INT: [[val1:%[0-9]*]] = extractelement [[TYPE]] [[ld1]], i32 0 diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl index ed7a2bff25..ba76eca619 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl @@ -48,24 +48,6 @@ struct Interface { TYPE scales[10]; }; -#if 0 -// Requires vector loading support. Enable when available. -RWStructuredBuffer Input; -RWStructuredBuffer Output; - -TYPE g_val; - -[shader("compute")] -[numthreads(8,1,1)] -void main(uint GI : SV_GroupIndex) { - assignments(Output[GI].assigned, Input[GI].scales); - Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked); - Output[GI].scarithmeticked = scarithmetic(Input[GI].scarithmeticked, Input[GI].scales); - Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned); - Output[GI].indexed = index(Input[GI].indexed, GI, g_val); -} -#endif - // A mixed-type overload to test overload resolution and mingle different vector element types in ops // Test assignment operators. // CHECK-LABEL: define void @"\01?assignments diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll new file mode 100644 index 0000000000..f9a9b3d677 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll @@ -0,0 +1,478 @@ +; RUN: %dxopt %s -hlsl-passes-resume -hlsl-dxil-scalarize-vector-load-stores -S | FileCheck %s + +; Verify that scalarize vector load stores pass will convert raw buffer vector operations +; into the equivalent collection of scalar load store calls. +; Sourced from buffer-load-stors-sm69.hlsl. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.v17f32 = type { <17 x float>, i32 } +%struct.ByteAddressBuffer = type { i32 } +%"class.StructuredBuffer >" = type { <17 x float> } +%struct.RWByteAddressBuffer = type { i32 } +%"class.RWStructuredBuffer >" = type { <17 x float> } +%"class.ConsumeStructuredBuffer >" = type { <17 x float> } +%"class.AppendStructuredBuffer >" = type { <17 x float> } + +@"\01?RoByBuf@@3UByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 +@"\01?RwByBuf@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 +@"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4 +@"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4 +@"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4 +@"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4 + +define void @main() { +bb: + %tmp = load %dx.types.Handle, %dx.types.Handle* @"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A", align 4 + %tmp1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RoByBuf@@3UByteAddressBuffer@@A", align 4 + %tmp2 = load %dx.types.Handle, %dx.types.Handle* @"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4 + %tmp3 = load %dx.types.Handle, %dx.types.Handle* @"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4 + %tmp4 = load %dx.types.Handle, %dx.types.Handle* @"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4 + %tmp5 = load %dx.types.Handle, %dx.types.Handle* @"\01?RwByBuf@@3URWByteAddressBuffer@@A", align 4 + %tmp6 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef) + %tmp7 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp5) + %tmp8 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix1]], i32 undef, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix2:%.*]] = add i32 [[ix1]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix2]], i32 undef, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix3:%.*]] = add i32 [[ix2]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix3]], i32 undef, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix4:%.*]] = add i32 [[ix3]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix4]], i32 undef, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp9 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, i32 4) + %tmp10 = extractvalue %dx.types.ResRet.v17f32 %tmp9, 0 + %tmp11 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp1) + %tmp12 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp11, %dx.types.ResourceProperties { i32 11, i32 0 }) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 %tmp6, i32 undef, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix1]], i32 undef, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix2:%.*]] = add i32 [[ix1]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix2]], i32 undef, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix3:%.*]] = add i32 [[ix2]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix3]], i32 undef, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ix4:%.*]] = add i32 [[ix3]], 16 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix4]], i32 undef, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp13 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp12, i32 %tmp6, i32 undef, i32 4) + %tmp14 = extractvalue %dx.types.ResRet.v17f32 %tmp13, 0 + %tmp15 = fadd fast <17 x float> %tmp14, %tmp10 + + ; CHECK: [[val0:%.*]] = extractelement <17 x float> %tmp15, i64 0 + ; CHECK: [[val1:%.*]] = extractelement <17 x float> %tmp15, i64 1 + ; CHECK: [[val2:%.*]] = extractelement <17 x float> %tmp15, i64 2 + ; CHECK: [[val3:%.*]] = extractelement <17 x float> %tmp15, i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4) + ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16 + ; CHECK: [[val4:%.*]] = extractelement <17 x float> %tmp15, i64 4 + ; CHECK: [[val5:%.*]] = extractelement <17 x float> %tmp15, i64 5 + ; CHECK: [[val6:%.*]] = extractelement <17 x float> %tmp15, i64 6 + ; CHECK: [[val7:%.*]] = extractelement <17 x float> %tmp15, i64 7 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix1]], i32 undef, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4) + ; CHECK: [[ix2:%.*]] = add i32 %80, 16 + ; CHECK: [[val8:%.*]] = extractelement <17 x float> %tmp15, i64 8 + ; CHECK: [[val9:%.*]] = extractelement <17 x float> %tmp15, i64 9 + ; CHECK: [[val10:%.*]] = extractelement <17 x float> %tmp15, i64 10 + ; CHECK: [[val11:%.*]] = extractelement <17 x float> %tmp15, i64 11 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix2]], i32 undef, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4) + ; CHECK: [[ix3:%.*]] = add i32 %85, 16 + ; CHECK: [[val12:%.*]] = extractelement <17 x float> %tmp15, i64 12 + ; CHECK: [[val13:%.*]] = extractelement <17 x float> %tmp15, i64 13 + ; CHECK: [[val14:%.*]] = extractelement <17 x float> %tmp15, i64 14 + ; CHECK: [[val15:%.*]] = extractelement <17 x float> %tmp15, i64 15 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix3]], i32 undef, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4) + ; CHECK: [[ix4:%.*]] = add i32 %90, 16 + ; CHECK: [[val16:%.*]] = extractelement <17 x float> %tmp15, i64 16 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix4]], i32 undef, float [[val16]], float undef, float undef, float undef, i8 1, i32 4) + call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, <17 x float> %tmp15, i32 4) + %tmp16 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp4) + %tmp17 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp16, %dx.types.ResourceProperties { i32 4108, i32 68 }) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 16, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 32, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 48, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 64, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp18 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, i32 4) + %tmp19 = extractvalue %dx.types.ResRet.v17f32 %tmp18, 0 + %tmp20 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 1, i8 0, i32 undef) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 16, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 32, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 48, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 64, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp21 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp17, i32 %tmp20, i32 0, i32 4) + %tmp22 = extractvalue %dx.types.ResRet.v17f32 %tmp21, 0 + %tmp23 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp) + %tmp24 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp23, %dx.types.ResourceProperties { i32 12, i32 68 }) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 16, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 32, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 48, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 64, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp25 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp24, i32 %tmp6, i32 0, i32 4) + %tmp26 = extractvalue %dx.types.ResRet.v17f32 %tmp25, 0 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 16, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 32, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 48, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 64, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp27 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp24, i32 %tmp20, i32 0, i32 4) + %tmp28 = extractvalue %dx.types.ResRet.v17f32 %tmp27, 0 + %tmp29 = fadd fast <17 x float> %tmp22, %tmp19 + %tmp30 = fadd fast <17 x float> %tmp29, %tmp26 + %tmp31 = fadd fast <17 x float> %tmp30, %tmp28 + + ; CHECK: [[val0:%.*]] = extractelement <17 x float> %tmp31, i64 0 + ; CHECK: [[val1:%.*]] = extractelement <17 x float> %tmp31, i64 1 + ; CHECK: [[val2:%.*]] = extractelement <17 x float> %tmp31, i64 2 + ; CHECK: [[val3:%.*]] = extractelement <17 x float> %tmp31, i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractelement <17 x float> %tmp31, i64 4 + ; CHECK: [[val5:%.*]] = extractelement <17 x float> %tmp31, i64 5 + ; CHECK: [[val6:%.*]] = extractelement <17 x float> %tmp31, i64 6 + ; CHECK: [[val7:%.*]] = extractelement <17 x float> %tmp31, i64 7 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 16, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractelement <17 x float> %tmp31, i64 8 + ; CHECK: [[val9:%.*]] = extractelement <17 x float> %tmp31, i64 9 + ; CHECK: [[val10:%.*]] = extractelement <17 x float> %tmp31, i64 10 + ; CHECK: [[val11:%.*]] = extractelement <17 x float> %tmp31, i64 11 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 32, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractelement <17 x float> %tmp31, i64 12 + ; CHECK: [[val13:%.*]] = extractelement <17 x float> %tmp31, i64 13 + ; CHECK: [[val14:%.*]] = extractelement <17 x float> %tmp31, i64 14 + ; CHECK: [[val15:%.*]] = extractelement <17 x float> %tmp31, i64 15 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 48, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4) + ; CHECK: [[val16:%.*]] = extractelement <17 x float> %tmp31, i64 16 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 64, float [[val16]], float undef, float undef, float undef, i8 1, i32 4) + call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, <17 x float> %tmp31, i32 4) + %tmp32 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp3) + %tmp33 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp32, %dx.types.ResourceProperties { i32 36876, i32 68 }) + %tmp34 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %tmp33, i8 -1) + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 0, i8 15, i32 4) + ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 16, i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 32, i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 48, i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1 + ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2 + ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 64, i8 1, i32 4) + ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0 + ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0 + ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1 + ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2 + ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3 + ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4 + ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5 + ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6 + ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7 + ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8 + ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9 + ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10 + ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11 + ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12 + ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13 + ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14 + ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15 + ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16 + %tmp35 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp33, i32 %tmp34, i32 0, i32 4) + %tmp36 = extractvalue %dx.types.ResRet.v17f32 %tmp35, 0 + %tmp37 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp2) + %tmp38 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp37, %dx.types.ResourceProperties { i32 36876, i32 68 }) + %tmp39 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %tmp38, i8 1) + + ; CHECK: [[val0:%.*]] = extractelement <17 x float> [[vec16]], i64 0 + ; CHECK: [[val1:%.*]] = extractelement <17 x float> [[vec16]], i64 1 + ; CHECK: [[val2:%.*]] = extractelement <17 x float> [[vec16]], i64 2 + ; CHECK: [[val3:%.*]] = extractelement <17 x float> [[vec16]], i64 3 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4) + ; CHECK: [[val4:%.*]] = extractelement <17 x float> [[vec16]], i64 4 + ; CHECK: [[val5:%.*]] = extractelement <17 x float> [[vec16]], i64 5 + ; CHECK: [[val6:%.*]] = extractelement <17 x float> [[vec16]], i64 6 + ; CHECK: [[val7:%.*]] = extractelement <17 x float> [[vec16]], i64 7 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 16, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4) + ; CHECK: [[val8:%.*]] = extractelement <17 x float> [[vec16]], i64 8 + ; CHECK: [[val9:%.*]] = extractelement <17 x float> [[vec16]], i64 9 + ; CHECK: [[val10:%.*]] = extractelement <17 x float> [[vec16]], i64 10 + ; CHECK: [[val11:%.*]] = extractelement <17 x float> [[vec16]], i64 11 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 32, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4) + ; CHECK: [[val12:%.*]] = extractelement <17 x float> [[vec16]], i64 12 + ; CHECK: [[val13:%.*]] = extractelement <17 x float> [[vec16]], i64 13 + ; CHECK: [[val14:%.*]] = extractelement <17 x float> [[vec16]], i64 14 + ; CHECK: [[val15:%.*]] = extractelement <17 x float> [[vec16]], i64 15 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 48, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4) + ; CHECK: [[val16:%.*]] = extractelement <17 x float> [[vec16]], i64 16 + ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 64, float [[val16]], float undef, float undef, float undef, i8 1, i32 4) + call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp38, i32 %tmp39, i32 0, <17 x float> %tmp36, i32 4) + ret void +} + +declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0 +declare %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32, %dx.types.Handle, i32, i32, i32) #1 +declare void @dx.op.rawBufferVectorStore.v17f32(i32, %dx.types.Handle, i32, i32, <17 x float>, i32) #2 +declare i32 @dx.op.bufferUpdateCounter(i32, %dx.types.Handle, i8) #2 +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #0 +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #1 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind } + +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.resources = !{!3} +!dx.typeAnnotations = !{!13} +!dx.entryPoints = !{!17, !19} + +!1 = !{i32 1, i32 8} +!2 = !{!"lib", i32 6, i32 8} +!3 = !{!4, !8, null, null} +!4 = !{!5, !6} +!5 = !{i32 0, %struct.ByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?RoByBuf@@3UByteAddressBuffer@@A" to %struct.ByteAddressBuffer*), !"RoByBuf", i32 0, i32 1, i32 1, i32 11, i32 0, null} +!6 = !{i32 1, %"class.StructuredBuffer >"* bitcast (%dx.types.Handle* @"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.StructuredBuffer >"*), !"RoStBuf", i32 0, i32 2, i32 1, i32 12, i32 0, !7} +!7 = !{i32 1, i32 68} +!8 = !{!9, !10, !11, !12} +!9 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?RwByBuf@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"RwByBuf", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!10 = !{i32 1, %"class.RWStructuredBuffer >"* bitcast (%dx.types.Handle* @"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.RWStructuredBuffer >"*), !"RwStBuf", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !7} +!11 = !{i32 2, %"class.ConsumeStructuredBuffer >"* bitcast (%dx.types.Handle* @"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.ConsumeStructuredBuffer >"*), !"CnStBuf", i32 0, i32 4, i32 1, i32 12, i1 false, i1 true, i1 false, !7} +!12 = !{i32 3, %"class.AppendStructuredBuffer >"* bitcast (%dx.types.Handle* @"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.AppendStructuredBuffer >"*), !"ApStBuf", i32 0, i32 5, i32 1, i32 12, i1 false, i1 true, i1 false, !7} +!13 = !{i32 1, void ()* @main, !14} +!14 = !{!15} +!15 = !{i32 0, !16, !16} +!16 = !{} +!17 = !{null, !"", null, !3, !18} +!18 = !{i32 0, i64 8589934608} +!19 = !{void ()* @main, !"main", !20, null, !24} +!20 = !{!21, null, null} +!21 = !{!22} +!22 = !{i32 0, !"IX", i8 5, i8 0, !23, i8 0, i32 2, i8 1, i32 0, i8 0, null} +!23 = !{i32 0, i32 1} +!24 = !{i32 8, i32 1, i32 5, !25} +!25 = !{i32 0} diff --git a/tools/clang/test/DXILValidation/load-store-validation.hlsl b/tools/clang/test/DXILValidation/load-store-validation.hlsl new file mode 100644 index 0000000000..d4e5e29db8 --- /dev/null +++ b/tools/clang/test/DXILValidation/load-store-validation.hlsl @@ -0,0 +1,74 @@ +// This file is not used directly for testing. +// This is the HLSL source for validation of various invalid load/store parameters. +// It is used to generate LitDxilValidation/load-store-validation.ll using `dxc -T ps_6_9`. +// Output is modified to trigger various validation errors. + +Texture1D Tex; +RWTexture1D RwTex; +SamplerState Samp; + +StructuredBuffer VecBuf; +StructuredBuffer ScalBuf; +ByteAddressBuffer BaBuf; + +RWStructuredBuffer OutVecBuf; +RWStructuredBuffer OutScalBuf; +RWByteAddressBuffer OutBaBuf; + +// Some simple ways to generate the vector ops in question. +float4 main(int i : IX) : SV_Target { + // Texture provides some invalid handles to plug in. + float4 TexVal = Tex.Sample(Samp, i); + RwTex[0] = TexVal; + + // For invalid RC on Load (and inevitably invalid RK). + float BadRCLd = ScalBuf[0]; + // For invalid RK on Load. + float BadRKLd = ScalBuf[1]; + // For non-constant alignment on Load. + float BadAlnLd = ScalBuf[2]; + // For undefined offset on Structured Buffer Load. + float BadStrOffLd = ScalBuf[3]; + // For defined (and therefore invalid) offset on Byte Address Buffer Load. + float BadBabOffLd = BaBuf.Load(0); + + // For invalid RC on Vector Load (and inevitably invalid RK). + float4 BadRCVcLd = VecBuf[0]; + // For invalid RK on Vector Load. + float4 BadRKVcLd = VecBuf[1]; + // For non-constant alignment on Vector Load. + float4 BadAlnVcLd = VecBuf[2]; + // For undefined offset on Structured Buffer Vector Load. + float4 BadStrOffVcLd = VecBuf[3]; + // For defined (and therefore invalid) offset on Byte Address Buffer Vector Load. + float4 BadBabOffVcLd = BaBuf.Load(4); + + // For Store to non-UAV. + OutScalBuf[0] = BadRCLd; + // For invalid RK on Store. + OutScalBuf[1] = BadRKLd; + // For non-constant alignment on Store. + OutScalBuf[2] = BadAlnLd; + // For undefined offset on Structured Buffer Store. + OutScalBuf[3] = BadStrOffLd; + // For undefined value Store. + OutScalBuf[4] = 77; + // For defined (and therefore invalid) offset on Byte Address Buffer Store. + OutBaBuf.Store(0, BadBabOffLd); + + // For Vector Store to non-UAV. + OutVecBuf[0] = BadRCVcLd; + // For invalid RK on Vector Store. + OutVecBuf[1] = BadRKVcLd; + // For non-constant alignment on Vector Store. + OutVecBuf[2] = BadAlnVcLd; + // For undefined offset on Structured Buffer Vector Store. + OutVecBuf[3] = BadStrOffVcLd; + // For undefinded value Vector Store. + OutVecBuf[4] = 77; + // For defined (and therefore invalid) offset on Byte Address Buffer Vector Store. + OutBaBuf.Store(4, BadBabOffVcLd); + + return TexVal; +} + diff --git a/tools/clang/test/DXILValidation/vector-validation.hlsl b/tools/clang/test/DXILValidation/vector-validation.hlsl new file mode 100644 index 0000000000..5d6a5cd4a2 --- /dev/null +++ b/tools/clang/test/DXILValidation/vector-validation.hlsl @@ -0,0 +1,14 @@ +// This file is not used directly for testing. +// This is the HLSL source for validation of disallowed 6.9 features in previous shader models. +// It is used to generate LitDxilValidation/vector-validation.ll using `dxc -T ps_6_9`. +// Output is modified to have shader model 6.8 instead. + +RWStructuredBuffer VecBuf; + +// some simple ways to generate the vector ops in question. +float4 main(float val : VAL) :SV_Position { + float4 vec = VecBuf[1]; + VecBuf[0] = val; + return vec[2]; +} + diff --git a/tools/clang/test/LitDXILValidation/load-store-validation.ll b/tools/clang/test/LitDXILValidation/load-store-validation.ll new file mode 100644 index 0000000000..34b2f6b602 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/load-store-validation.ll @@ -0,0 +1,229 @@ +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; Ensure proper validation errors are produced for invalid parameters to load and store operations. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.f32 = type { float, float, float, float, i32 } +%dx.types.ResRet.v4f32 = type { <4 x float>, i32 } +%"class.Texture1D >" = type { <4 x float>, %"class.Texture1D >::mips_type" } +%"class.Texture1D >::mips_type" = type { i32 } +%"class.StructuredBuffer >" = type { <4 x float> } +%"class.StructuredBuffer" = type { float } +%struct.ByteAddressBuffer = type { i32 } +%"class.RWStructuredBuffer >" = type { <4 x float> } +%"class.RWStructuredBuffer" = type { float } +%struct.RWByteAddressBuffer = type { i32 } +%struct.SamplerState = type { i32 } + +; Unfortunately, the validation errors come in weird orders. +; Inlining them isn't helpful, so we'll just dump them all here. +; Inline comments, variable names, and notes should help find the corresponding source. + +; CHECK: error: raw/typed buffer offset must be undef. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp44, i32 0, i32 0, float %badBabOff, float undef, float undef, float undef, i8 1, i32 4)' +; CHECK: error: Assignment of undefined values to UAV. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp42, i32 4, i32 0, float undef, float undef, float undef, float undef, i8 1, i32 4) +; CHECK: error: structured buffer requires defined index and offset coordinates. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp41, i32 3, i32 undef, float %badStrOff, float undef, float undef, float undef, i8 1, i32 4) +; CHECK: error: Raw Buffer alignment value must be a constant. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp40, i32 2, i32 0, float %badAln, float undef, float undef, float undef, i8 1, i32 %ix)' +; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %rwTex, i32 1, i32 0, float %badRK, float undef, float undef, float undef, i8 1, i32 4)' +; CHECK: error: store should be on uav resource. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %scalBuf, i32 0, i32 0, float %badRC, float undef, float undef, float undef, i8 1, i32 4)' + +; CHECK: error: raw/typed buffer offset must be undef. +; CHECK-NEXT: note: at '%badBabOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %baBuf, i32 0, i32 0, i8 1, i32 4)' +; CHECK: error: structured buffer requires defined index and offset coordinates. +; CHECK-NEXT: note: at '%badStrOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 3, i32 undef, i8 1, i32 4)' +; CHECK: error: Raw Buffer alignment value must be a constant. +; CHECK-NEXT: note: at '%badAlnLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 2, i32 0, i8 1, i32 %ix)' +; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer +; CHECK-NEXT: note: at '%badRKLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tex, i32 1, i32 0, i8 1, i32 4)' +; CHECK: error: load can only run on UAV/SRV resource. +; CHECK-NEXT: note: at '%badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4)' +; CHECK-NEXT: error: buffer load/store only works on Raw/Typed/StructuredBuffer. +; CHECK-NEXT: note: at '%badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4)' + +; CHECK: error: raw/typed buffer offset must be undef. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp51, i32 4, i32 0, <4 x float> %badBabOffVc, i32 4)' +; CHECK: error: Assignment of undefined values to UAV. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp49, i32 4, i32 0, <4 x float> undef, i32 4)' +; CHECK: error: structured buffer requires defined index and offset coordinates. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp48, i32 3, i32 undef, <4 x float> %badStrOffVc, i32 4)' +; CHECK: error: Raw Buffer alignment value must be a constant. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp47, i32 2, i32 0, <4 x float> %badAlnVc, i32 %ix)' +; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %rwTex, i32 1, i32 0, <4 x float> %badRKVc, i32 4)' +; CHECK: error: store should be on uav resource. +; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %vecBuf, i32 0, i32 0, <4 x float> %badRCVc, i32 4)' + +; CHECK: error: raw/typed buffer offset must be undef. +; CHECK-NEXT: note: at '%badBabOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %baBuf, i32 4, i32 0, i32 4)' +; CHECK: error: structured buffer requires defined index and offset coordinates. +; CHECK-NEXT: note: at '%badStrOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 3, i32 undef, i32 4)' +; CHECK: error: Raw Buffer alignment value must be a constant. +; CHECK-NEXT: note: at '%badAlnVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 2, i32 0, i32 %ix)' +; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer +; CHECK-NEXT: note: at '%badRKVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %tex, i32 1, i32 0, i32 4)' +; CHECK: error: load can only run on UAV/SRV resource. +; CHECK-NEXT: note: at '%badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4)' +; CHECK-NEXT: error: buffer load/store only works on Raw/Typed/StructuredBuffer. +; CHECK-NEXT: note: at '%badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4)' + +define void @main() { +bb: + %tmp = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false) + %tmp1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) + %tmp2 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) + %tmp3 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false) + %tmp4 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false) + %tmp5 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) + %tmp6 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind zeroinitializer, i32 0, i1 false) + %tmp7 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 3 }, i32 0, i1 false) + %tmp8 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 0, i1 false) + %ix = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef) + %texIx = sitofp i32 %ix to float + %tex = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 1, i32 1033 }) + %samp = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 14, i32 0 }) + %tmp10 = call %dx.types.ResRet.f32 @dx.op.sample.f32(i32 60, %dx.types.Handle %tex, %dx.types.Handle %samp, float %texIx, float undef, float undef, float undef, i32 0, i32 undef, i32 undef, float undef) + %tmp11 = extractvalue %dx.types.ResRet.f32 %tmp10, 0 + %tmp12 = extractvalue %dx.types.ResRet.f32 %tmp10, 1 + %tmp13 = extractvalue %dx.types.ResRet.f32 %tmp10, 2 + %tmp14 = extractvalue %dx.types.ResRet.f32 %tmp10, 3 + %rwTex = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp8, %dx.types.ResourceProperties { i32 4097, i32 1033 }) + call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle %rwTex, i32 0, i32 undef, i32 undef, float %tmp11, float %tmp12, float %tmp13, float %tmp14, i8 15) + %scalBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp4, %dx.types.ResourceProperties { i32 12, i32 4 }) + ; Invalid RC on Load (and inevitably invalid RK). + %badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4) + %badRC = extractvalue %dx.types.ResRet.f32 %badRCLd, 0 + ; Invalid RK on Load. + %badRKLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tex, i32 1, i32 0, i8 1, i32 4) + %badRK = extractvalue %dx.types.ResRet.f32 %badRKLd, 0 + ; Non-constant alignment on Load. + %badAlnLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 2, i32 0, i8 1, i32 %ix) + %badAln = extractvalue %dx.types.ResRet.f32 %badAlnLd, 0 + ; Undefined offset on Structured Buffer Load. + %badStrOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 3, i32 undef, i8 1, i32 4) + %badStrOff = extractvalue %dx.types.ResRet.f32 %badStrOffLd, 0 + %baBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp3, %dx.types.ResourceProperties { i32 11, i32 0 }) + ; Defined (and therefore invalid) offset on Byte Address Buffer Load. + %badBabOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %baBuf, i32 0, i32 0, i8 1, i32 4) + %badBabOff = extractvalue %dx.types.ResRet.f32 %badBabOffLd, 0 + + %vecBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp5, %dx.types.ResourceProperties { i32 12, i32 16 }) + ; Invalid RC on Vector Load (and inevitably invalid RK). + %badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4) + %badRCVc = extractvalue %dx.types.ResRet.v4f32 %badRCVcLd, 0 + ; Invalid RK on Vector Load. + %badRKVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %tex, i32 1, i32 0, i32 4) + %badRKVc = extractvalue %dx.types.ResRet.v4f32 %badRKVcLd, 0 + ; Non-constant alignment on Vector Load. + %badAlnVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 2, i32 0, i32 %ix) + %badAlnVc = extractvalue %dx.types.ResRet.v4f32 %badAlnVcLd, 0 + ; Undefined offset on Structured Buffer Vector Load. + %badStrOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 3, i32 undef, i32 4) + %badStrOffVc = extractvalue %dx.types.ResRet.v4f32 %badStrOffVcLd, 0 + ; Defined (and therefore invalid) offset on Byte Address Buffer Vector Load. + %badBabOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %baBuf, i32 4, i32 0, i32 4) + %badBabOffVc = extractvalue %dx.types.ResRet.v4f32 %badBabOffVcLd, 0 + + ; Store to non-UAV. + %tmp38 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %scalBuf, i32 0, i32 0, float %badRC, float undef, float undef, float undef, i8 1, i32 4) + ; Invalid RK on Store. + %tmp39 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %rwTex, i32 1, i32 0, float %badRK, float undef, float undef, float undef, i8 1, i32 4) + ; Non-constant alignment on Store. + %tmp40 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp40, i32 2, i32 0, float %badAln, float undef, float undef, float undef, i8 1, i32 %ix) + ; Undefined offset on Structured Buffer Store. + %tmp41 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp41, i32 3, i32 undef, float %badStrOff, float undef, float undef, float undef, i8 1, i32 4) + ; Undefined value Store. + %tmp42 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp42, i32 4, i32 0, float undef, float undef, float undef, float undef, i8 1, i32 4) + ; Defined (and therefore invalid) offset on Byte Address Buffer Store. + %tmp44 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp, %dx.types.ResourceProperties { i32 4107, i32 0 }) + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp44, i32 0, i32 0, float %badBabOff, float undef, float undef, float undef, i8 1, i32 4) + + ; Vector Store to non-UAV. + %tmp45 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %rwTex, %dx.types.ResourceProperties { i32 4108, i32 16 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %vecBuf, i32 0, i32 0, <4 x float> %badRCVc, i32 4) + ; Invalid RK on Vector Store. + %tmp46 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %rwTex, i32 1, i32 0, <4 x float> %badRKVc, i32 4) + ; Non-constant alignment on Vector Store. + %tmp47 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp47, i32 2, i32 0, <4 x float> %badAlnVc, i32 %ix) + ; Undefined offset on Structured Buffer Vector Store. + %tmp48 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp48, i32 3, i32 undef, <4 x float> %badStrOffVc, i32 4) + ; Undefinded value Vector Store. + %tmp49 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp49, i32 4, i32 0, <4 x float> undef, i32 4) + ; Defined (and therefore invalid) offset on Byte Address Buffer Vector Store. + %tmp51 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp, %dx.types.ResourceProperties { i32 4107, i32 0 }) + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp51, i32 4, i32 0, <4 x float> %badBabOffVc, i32 4) + + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %tmp11) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %tmp12) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %tmp13) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %tmp14) + ret void +} + +declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #2 +declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0 +declare %dx.types.ResRet.f32 @dx.op.sample.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32, float) #1 +declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #0 +declare void @dx.op.rawBufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8, i32) #0 +declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #1 +declare void @dx.op.rawBufferVectorStore.v4f32(i32, %dx.types.Handle, i32, i32, <4 x float>, i32) #0 +declare %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32, %dx.types.Handle, i32, i32, i32) #1 +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } + +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.resources = !{!3} +!dx.viewIdState = !{!18} +!dx.entryPoints = !{!19} + +!1 = !{i32 1, i32 9} +!2 = !{!"ps", i32 6, i32 9} +!3 = !{!4, !12, null, !16} +!4 = !{!5, !7, !9, !11} +!5 = !{i32 0, %"class.Texture1D >"* undef, !"", i32 0, i32 0, i32 1, i32 1, i32 0, !6} +!6 = !{i32 0, i32 9} +!7 = !{i32 1, %"class.StructuredBuffer >"* undef, !"", i32 0, i32 1, i32 1, i32 12, i32 0, !8} +!8 = !{i32 1, i32 16} +!9 = !{i32 2, %"class.StructuredBuffer"* undef, !"", i32 0, i32 2, i32 1, i32 12, i32 0, !10} +!10 = !{i32 1, i32 4} +!11 = !{i32 3, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 3, i32 1, i32 11, i32 0, null} +!12 = !{!13, !14, !15} +!13 = !{i32 0, %"class.RWStructuredBuffer >"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !8} +!14 = !{i32 1, %"class.RWStructuredBuffer"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !10} +!15 = !{i32 2, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 2, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!16 = !{!17} +!17 = !{i32 0, %struct.SamplerState* undef, !"", i32 0, i32 0, i32 1, i32 0, null} +!18 = !{[3 x i32] [i32 1, i32 4, i32 0]} +!19 = !{void ()* @main, !"main", !20, !3, !27} +!20 = !{!21, !24, null} +!21 = !{!22} +!22 = !{i32 0, !"IX", i8 4, i8 0, !23, i8 1, i32 1, i8 1, i32 0, i8 0, null} +!23 = !{i32 0} +!24 = !{!25} +!25 = !{i32 0, !"SV_Target", i8 9, i8 16, !23, i8 0, i32 1, i8 4, i32 0, i8 0, !26} +!26 = !{i32 3, i32 15} +!27 = !{i32 0, i64 8589934608} diff --git a/tools/clang/test/LitDXILValidation/vector-validation.ll b/tools/clang/test/LitDXILValidation/vector-validation.ll new file mode 100644 index 0000000000..74e8116e88 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/vector-validation.ll @@ -0,0 +1,78 @@ +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; Confirm that 6.9 specific LLVM operations and DXIL intrinsics fail in 6.8 + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.v4f32 = type { <4 x float>, i32 } +%"class.RWStructuredBuffer >" = type { <4 x float> } + +; CHECK: Function: main: error: Instructions must be of an allowed type. +; CHECK: note: at '%6 = insertelement <4 x float> undef, float %2, i32 0 +; CHECK: Function: main: error: Instructions must be of an allowed type. +; CHECK: note: at '%7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK: Function: main: error: Instructions must be of an allowed type. +; CHECK: note: at '%8 = extractelement <4 x float> %5, i32 2 +; CHECK: Function: main: error: Opcode RawBufferVectorLoad not valid in shader model vs_6_8. +; CHECK: note: at '%4 = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %3, i32 1, i32 0, i32 8)' +; CHECK: Function: main: error: Opcode RawBufferVectorStore not valid in shader model vs_6_8. +; CHECK: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %3, i32 0, i32 0, <4 x float> %7, i32 4)' +; CHECK: Function: main: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK: Function: main: error: Function uses features incompatible with the shader model. +define void @main() { + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) + %2 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef) + %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4108, i32 16 }) + %4 = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %3, i32 1, i32 0, i32 8) + %5 = extractvalue %dx.types.ResRet.v4f32 %4, 0 + %6 = insertelement <4 x float> undef, float %2, i32 0 + %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer + call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %3, i32 0, i32 0, <4 x float> %7, i32 4) + %8 = extractelement <4 x float> %5, i32 2 + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %8) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %8) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %8) + call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %8) + ret void +} + +declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0 +declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1 +declare %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32, %dx.types.Handle, i32, i32, i32) #2 +declare void @dx.op.rawBufferVectorStore.v4f32(i32, %dx.types.Handle, i32, i32, <4 x float>, i32) #1 +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #0 +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } +attributes #2 = { nounwind readonly } + +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.resources = !{!3} +!dx.viewIdState = !{!7} +!dx.entryPoints = !{!8} + +!1 = !{i32 1, i32 8} +!2 = !{!"vs", i32 6, i32 8} +!3 = !{null, !4, null, null} +!4 = !{!5} +!5 = !{i32 0, %"class.RWStructuredBuffer >"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !6} +!6 = !{i32 1, i32 16} +!7 = !{[3 x i32] [i32 1, i32 4, i32 0]} +!8 = !{void ()* @main, !"main", !9, !3, !17} +!9 = !{!10, !14, null} +!10 = !{!11} +!11 = !{i32 0, !"VAL", i8 9, i8 0, !12, i8 0, i32 1, i8 1, i32 0, i8 0, !13} +!12 = !{i32 0} +!13 = !{i32 3, i32 1} +!14 = !{!15} +!15 = !{i32 0, !"SV_Position", i8 9, i8 3, !12, i8 4, i32 1, i8 4, i32 0, i8 0, !16} +!16 = !{i32 3, i32 15} +!17 = !{i32 0, i64 8590000144} + diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl index 42eb6b077c..54c85191da 100644 --- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl @@ -3,6 +3,8 @@ #define TYPE float #define NUM 5 +StructuredBuffer > sbuf; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + struct LongVec { float4 f; vector vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} diff --git a/tools/clang/unittests/HLSL/ValidationTest.cpp b/tools/clang/unittests/HLSL/ValidationTest.cpp index f69b0be204..01f24e0227 100644 --- a/tools/clang/unittests/HLSL/ValidationTest.cpp +++ b/tools/clang/unittests/HLSL/ValidationTest.cpp @@ -1506,21 +1506,23 @@ TEST_F(ValidationTest, StructBufStrideOutOfBound) { } TEST_F(ValidationTest, StructBufLoadCoordinates) { - RewriteAssemblyCheckMsg(L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0", - "bufferLoad.f32(i32 68, %dx.types.Handle " - "%buf1_texture_structbuf, i32 1, i32 8)", - "bufferLoad.f32(i32 68, %dx.types.Handle " - "%buf1_texture_structbuf, i32 1, i32 undef)", - "structured buffer require 2 coordinates"); + RewriteAssemblyCheckMsg( + L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0", + "bufferLoad.f32(i32 68, %dx.types.Handle " + "%buf1_texture_structbuf, i32 1, i32 8)", + "bufferLoad.f32(i32 68, %dx.types.Handle " + "%buf1_texture_structbuf, i32 1, i32 undef)", + "structured buffer requires defined index and offset coordinates"); } TEST_F(ValidationTest, StructBufStoreCoordinates) { - RewriteAssemblyCheckMsg(L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0", - "bufferStore.f32(i32 69, %dx.types.Handle " - "%buf2_UAV_structbuf, i32 0, i32 0", - "bufferStore.f32(i32 69, %dx.types.Handle " - "%buf2_UAV_structbuf, i32 0, i32 undef", - "structured buffer require 2 coordinates"); + RewriteAssemblyCheckMsg( + L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0", + "bufferStore.f32(i32 69, %dx.types.Handle " + "%buf2_UAV_structbuf, i32 0, i32 0", + "bufferStore.f32(i32 69, %dx.types.Handle " + "%buf2_UAV_structbuf, i32 0, i32 undef", + "structured buffer requires defined index and offset coordinates"); } TEST_F(ValidationTest, TypedBufRetType) { diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 5eb35fb52a..691c3ba58f 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -479,7 +479,7 @@ def populate_categories_and_models(self): self.name_idx[i].category = "Dot" for ( i - ) in "CreateHandle,CBufferLoad,CBufferLoadLegacy,TextureLoad,TextureStore,TextureStoreSample,BufferLoad,BufferStore,BufferUpdateCounter,CheckAccessFullyMapped,GetDimensions,RawBufferLoad,RawBufferStore".split( + ) in "CreateHandle,CBufferLoad,CBufferLoadLegacy,TextureLoad,TextureStore,TextureStoreSample,BufferLoad,BufferStore,BufferUpdateCounter,CheckAccessFullyMapped,GetDimensions,RawBufferLoad,RawBufferStore,RawBufferVectorLoad,RawBufferVectorStore".split( "," ): self.name_idx[i].category = "Resources" @@ -606,6 +606,8 @@ def populate_categories_and_models(self): for i in "RawBufferLoad,RawBufferStore".split(","): self.name_idx[i].shader_model = 6, 2 self.name_idx[i].shader_model_translated = 6, 0 + for i in "RawBufferVectorLoad,RawBufferVectorStore".split(","): + self.name_idx[i].shader_model = 6, 9 for i in "DispatchRaysIndex,DispatchRaysDimensions".split(","): self.name_idx[i].category = "Ray Dispatch Arguments" self.name_idx[i].shader_model = 6, 3 @@ -5778,6 +5780,84 @@ def UFI(name, **mappings): # Reserved block C next_op_idx = self.reserve_dxil_op_range("ReservedC", next_op_idx, 10) + # Long Vectors + self.add_dxil_op( + "RawBufferVectorLoad", + next_op_idx, + "RawBufferVectorLoad", + "reads from a raw buffer and structured buffer", + "hfwidl<", + "ro", + [ + db_dxil_param(0, "$r", "", "the loaded value"), + db_dxil_param(2, "res", "buf", "handle of Raw Buffer to load from"), + db_dxil_param( + 3, + "i32", + "index", + "element index for StructuredBuffer, or byte offset for ByteAddressBuffer", + ), + db_dxil_param( + 4, + "i32", + "elementOffset", + "offset into element for StructuredBuffer, or undef for ByteAddressBuffer", + ), + db_dxil_param( + 5, + "i32", + "alignment", + "relative load access alignment", + is_const=True, + ), + ], + counters=("tex_load",), + ) + next_op_idx += 1 + + self.add_dxil_op( + "RawBufferVectorStore", + next_op_idx, + "RawBufferVectorStore", + "writes to a RWByteAddressBuffer or RWStructuredBuffer", + "hfwidl<", + "", + [ + db_dxil_param(0, "v", "", ""), + db_dxil_param(2, "res", "uav", "handle of UAV to store to"), + db_dxil_param( + 3, + "i32", + "index", + "element index for StructuredBuffer, or byte offset for ByteAddressBuffer", + ), + db_dxil_param( + 4, + "i32", + "elementOffset", + "offset into element for StructuredBuffer, or undef for ByteAddressBuffer", + ), + db_dxil_param(5, "$o", "value0", "value"), + db_dxil_param( + 6, + "i32", + "alignment", + "relative store access alignment", + is_const=True, + ), + ], + counters=("tex_store",), + ) + next_op_idx += 1 + + # End of DXIL 1.9 opcodes. + # NOTE!! Update and uncomment when DXIL 1.9 opcodes are finalized: + # self.set_op_count_for_version(1, 9, next_op_idx) + # assert next_op_idx == NNN, ( + # "NNN is expected next operation index but encountered %d and thus opcodes are broken" + # % next_op_idx + # ) + # Set interesting properties. self.build_indices() for ( @@ -6385,6 +6465,12 @@ def add_pass(name, type_name, doc, opts): "DXIL Lower createHandleForLib", [], ) + add_pass( + "hlsl-dxil-scalarize-vector-load-stores", + "DxilScalarizeVectorLoadStores", + "DXIL scalarize vector load/stores", + [], + ) add_pass( "hlsl-dxil-cleanup-dynamic-resource-handle", "DxilCleanupDynamicResourceHandle", @@ -7607,11 +7693,15 @@ def build_valrules(self): ) self.add_valrule( "Instr.CoordinateCountForRawTypedBuf", - "raw/typed buffer don't need 2 coordinates.", + "raw/typed buffer offset must be undef.", + ) + self.add_valrule( + "Instr.ConstAlignForRawBuf", + "Raw Buffer alignment value must be a constant.", ) self.add_valrule( "Instr.CoordinateCountForStructBuf", - "structured buffer require 2 coordinates.", + "structured buffer requires defined index and offset coordinates.", ) self.add_valrule( "Instr.MipLevelForGetDimension", From 9e9184426c9103a96ec8da2fe4da290f467d4486 Mon Sep 17 00:00:00 2001 From: Chris B Date: Mon, 7 Apr 2025 14:22:34 -0500 Subject: [PATCH 72/88] [NFC] containsLongVector -> ContainsLongVector (#7255) I provided feedback during code review that this function should be named following LLVM conventions. That feedback did not account for the fact that SemaHLSL is otherwise consistent using CamelCase instead of camelCase naming. This corrects my error by renaming to match the consistent style in SemaHLSL.h. I've also updated the parameter naming in the source file to conform to LLVM style since I was in the area anyways. --- tools/clang/include/clang/Sema/SemaHLSL.h | 2 +- tools/clang/lib/Sema/SemaDXR.cpp | 2 +- tools/clang/lib/Sema/SemaHLSL.cpp | 24 ++++++++++----------- tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp | 4 ++-- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h index d6103b55e6..ac6e08b3fa 100644 --- a/tools/clang/include/clang/Sema/SemaHLSL.h +++ b/tools/clang/include/clang/Sema/SemaHLSL.h @@ -128,7 +128,7 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema, const clang::InitListExpr *InitList, const clang::QualType EltTy); -bool containsLongVector(clang::QualType qt); +bool ContainsLongVector(clang::QualType); bool IsConversionToLessOrEqualElements(clang::Sema *self, const clang::ExprResult &sourceExpr, diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index 0f27de8291..36ab55ea10 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -810,7 +810,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, return; } - if (containsLongVector(Payload->getType())) { + if (ContainsLongVector(Payload->getType())) { const unsigned PayloadParametersIdx = 10; S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector) << PayloadParametersIdx; diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 027d7d3cbc..6796badcb6 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -5529,7 +5529,7 @@ class HLSLExternalSource : public ExternalSemaSource { m_sema->RequireCompleteType(argSrcLoc, argType, diag::err_typecheck_decl_incomplete_type); - if (containsLongVector(argType)) { + if (ContainsLongVector(argType)) { const unsigned ConstantBuffersOrTextureBuffersIdx = 0; m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector) << ConstantBuffersOrTextureBuffersIdx; @@ -5637,7 +5637,7 @@ class HLSLExternalSource : public ExternalSemaSource { CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); if (Decl && !Decl->isCompleteDefinition()) return true; - if (containsLongVector(arg.getAsType())) { + if (ContainsLongVector(arg.getAsType())) { const unsigned TessellationPatchesIDx = 1; m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) @@ -5656,7 +5656,7 @@ class HLSLExternalSource : public ExternalSemaSource { CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); if (Decl && !Decl->isCompleteDefinition()) return true; - if (containsLongVector(arg.getAsType())) { + if (ContainsLongVector(arg.getAsType())) { const unsigned GeometryStreamsIdx = 2; m_sema->Diag(argLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) @@ -12545,14 +12545,14 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType, return false; } -bool hlsl::containsLongVector(QualType qt) { - if (qt.isNull() || qt->isDependentType()) +bool hlsl::ContainsLongVector(QualType QT) { + if (QT.isNull() || QT->isDependentType()) return false; - while (const ArrayType *Arr = qt->getAsArrayTypeUnsafe()) - qt = Arr->getElementType(); + while (const ArrayType *Arr = QT->getAsArrayTypeUnsafe()) + QT = Arr->getElementType(); - if (CXXRecordDecl *Decl = qt->getAsCXXRecordDecl()) { + if (CXXRecordDecl *Decl = QT->getAsCXXRecordDecl()) { if (!Decl->isCompleteDefinition()) return false; return Decl->hasHLSLLongVector(); @@ -15201,7 +15201,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) {} } SD; RequireCompleteType(D.getLocStart(), qt, SD); - if (containsLongVector(qt)) { + if (ContainsLongVector(qt)) { unsigned CbuffersOrTbuffersIdx = 4; Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) << CbuffersOrTbuffersIdx; @@ -16099,7 +16099,7 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) { // Verify that user-defined intrinsic struct args contain no long vectors static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) { - if (containsLongVector(Arg->getType())) { + if (ContainsLongVector(Arg->getType())) { const unsigned UserDefinedStructParameterIdx = 5; S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector) << UserDefinedStructParameterIdx; @@ -16842,14 +16842,14 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) { // Would be nice to check for resources here as they crash the compiler now. // See issue #7186. for (const auto *param : FD->params()) { - if (containsLongVector(param->getType())) { + if (ContainsLongVector(param->getType())) { const unsigned EntryFunctionParametersIdx = 6; S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) << EntryFunctionParametersIdx; } } - if (containsLongVector(FD->getReturnType())) { + if (ContainsLongVector(FD->getReturnType())) { const unsigned EntryFunctionReturnIdx = 7; S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector) << EntryFunctionReturnIdx; diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index c562ee8d52..abca7cbf86 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -710,14 +710,14 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { } } for (const auto *param : pPatchFnDecl->params()) - if (containsLongVector(param->getType())) { + if (ContainsLongVector(param->getType())) { const unsigned PatchConstantFunctionParametersIdx = 8; self->Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) << PatchConstantFunctionParametersIdx; } - if (containsLongVector(pPatchFnDecl->getReturnType())) { + if (ContainsLongVector(pPatchFnDecl->getReturnType())) { const unsigned PatchConstantFunctionReturnIdx = 9; self->Diag(pPatchFnDecl->getLocation(), diag::err_hlsl_unsupported_long_vector) From dc4a2b6e910f47ef51cc482c648f105e866f58f7 Mon Sep 17 00:00:00 2001 From: nopandbrk <202358470+nopandbrk@users.noreply.github.com> Date: Mon, 7 Apr 2025 15:13:08 -0700 Subject: [PATCH 73/88] [PIX] Add a pass for PIX to log missing NonUniformResourceIndex usage into a UAV (#7272) This is a pass to add instructions to determine missing usage of the NonUniformResourceIndex qualifier when dynamically indexing resources. The instruction numbers will be written out to a UAV for later ingestion by PIX to present a view of the output. --- include/dxc/DxilPIXPasses/DxilPIXPasses.h | 3 + lib/DxilPIXPasses/CMakeLists.txt | 1 + ...NonUniformResourceIndexInstrumentation.cpp | 173 ++++++++++++++ .../DxilShaderAccessTracking.cpp | 89 +------ lib/DxilPIXPasses/PixPassHelpers.cpp | 84 +++++++ lib/DxilPIXPasses/PixPassHelpers.h | 7 +- tools/clang/unittests/HLSL/PixTest.cpp | 219 ++++++++++++++++++ utils/hct/hctdb.py | 6 + 8 files changed, 499 insertions(+), 83 deletions(-) create mode 100644 lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp diff --git a/include/dxc/DxilPIXPasses/DxilPIXPasses.h b/include/dxc/DxilPIXPasses/DxilPIXPasses.h index ad0ddfdfd2..5cc7c4aa50 100644 --- a/include/dxc/DxilPIXPasses/DxilPIXPasses.h +++ b/include/dxc/DxilPIXPasses/DxilPIXPasses.h @@ -27,6 +27,7 @@ ModulePass *createDxilDebugInstrumentationPass(); ModulePass *createDxilShaderAccessTrackingPass(); ModulePass *createDxilPIXAddTidToAmplificationShaderPayloadPass(); ModulePass *createDxilPIXDXRInvocationsLogPass(); +ModulePass *createDxilNonUniformResourceIndexInstrumentationPass(); void initializeDxilAddPixelHitInstrumentationPass(llvm::PassRegistry &); void initializeDxilDbgValueToDbgDeclarePass(llvm::PassRegistry &); @@ -41,5 +42,7 @@ void initializeDxilShaderAccessTrackingPass(llvm::PassRegistry &); void initializeDxilPIXAddTidToAmplificationShaderPayloadPass( llvm::PassRegistry &); void initializeDxilPIXDXRInvocationsLogPass(llvm::PassRegistry &); +void initializeDxilNonUniformResourceIndexInstrumentationPass( + llvm::PassRegistry &); } // namespace llvm diff --git a/lib/DxilPIXPasses/CMakeLists.txt b/lib/DxilPIXPasses/CMakeLists.txt index c36d11d559..67e77f17cd 100644 --- a/lib/DxilPIXPasses/CMakeLists.txt +++ b/lib/DxilPIXPasses/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_library(LLVMDxilPIXPasses PixPassHelpers.cpp DxilPIXAddTidToAmplificationShaderPayload.cpp DxilPIXDXRInvocationsLog.cpp + DxilNonUniformResourceIndexInstrumentation.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/IR diff --git a/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp b/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp new file mode 100644 index 0000000000..a442bfabed --- /dev/null +++ b/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp @@ -0,0 +1,173 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// DxilNonUniformResourceIndexInstrumentation.cpp // +// Copyright (C) Microsoft Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +// Provides a pass to add instrumentation to determine missing usage of the // +// NonUniformResourceIndex qualifier when dynamically indexing resources. // +// Used by PIX. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#include "PixPassHelpers.h" +#include "dxc/DXIL/DxilInstructions.h" +#include "dxc/DxilPIXPasses/DxilPIXPasses.h" +#include "dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h" +#include "dxc/Support/Global.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; +using namespace hlsl; + +class DxilNonUniformResourceIndexInstrumentation : public ModulePass { + +public: + static char ID; // Pass identification, replacement for typeid + explicit DxilNonUniformResourceIndexInstrumentation() : ModulePass(ID) {} + StringRef getPassName() const override { + return "DXIL NonUniformResourceIndex Instrumentation"; + } + bool runOnModule(Module &M) override; +}; + +bool DxilNonUniformResourceIndexInstrumentation::runOnModule(Module &M) { + // This pass adds instrumentation for incorrect NonUniformResourceIndex usage + + DxilModule &DM = M.GetOrCreateDxilModule(); + LLVMContext &Ctx = M.getContext(); + OP *HlslOP = DM.GetOP(); + + hlsl::DxilResource *PixUAVResource = nullptr; + + UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(Ctx)); + + // Use WaveActiveAllEqual to check if a dynamic index is uniform + Function *WaveActiveAllEqualFunc = HlslOP->GetOpFunc( + DXIL::OpCode::WaveActiveAllEqual, Type::getInt32Ty(Ctx)); + Constant *WaveActiveAllEqualOpCode = + HlslOP->GetI32Const((int32_t)DXIL::OpCode::WaveActiveAllEqual); + + // Atomic operation to use for writing to the result uav resource + Function *AtomicOpFunc = + HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(Ctx)); + Constant *AtomicBinOpcode = + HlslOP->GetU32Const((uint32_t)OP::OpCode::AtomicBinOp); + Constant *AtomicOr = HlslOP->GetU32Const((uint32_t)DXIL::AtomicBinOpCode::Or); + + std::map FunctionToUAVHandle; + + // This is the main pass that will iterate through all of the resources that + // are dynamically indexed. If not already marked NonUniformResourceIndex, + // then insert WaveActiveAllEqual to determine if the index is uniform + // and finally write to a UAV resource with the result. + + PIXPassHelpers::ForEachDynamicallyIndexedResource( + DM, [&](bool IsNonUniformIndex, Instruction *CreateHandle, + Value *IndexOperand) { + if (IsNonUniformIndex) { + // The NonUniformResourceIndex qualifier was used, continue. + return true; + } + + if (!PixUAVResource) { + PixUAVResource = + PIXPassHelpers::CreateGlobalUAVResource(DM, 0, "PixUAVResource"); + } + + CallInst *PixUAVHandle = nullptr; + Function *F = CreateHandle->getParent()->getParent(); + + const auto FunctionToUAVHandleIter = FunctionToUAVHandle.lower_bound(F); + + if ((FunctionToUAVHandleIter != FunctionToUAVHandle.end()) && + (FunctionToUAVHandleIter->first == F)) { + PixUAVHandle = FunctionToUAVHandleIter->second; + } else { + IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt()); + + PixUAVHandle = PIXPassHelpers::CreateHandleForResource( + DM, Builder, PixUAVResource, "PixUAVHandle"); + + FunctionToUAVHandle.insert(FunctionToUAVHandleIter, + {F, PixUAVHandle}); + } + + IRBuilder<> Builder(CreateHandle); + + uint32_t InstructionNumber = 0; + if (!pix_dxil::PixDxilInstNum::FromInst(CreateHandle, + &InstructionNumber)) { + DXASSERT_NOMSG(false); + } + + // The output UAV is treated as a bit array where each bit corresponds + // to an instruction number. This determines what byte offset to write + // our result to based on the instruction number. + const uint32_t InstructionNumByteOffset = + (InstructionNumber / 32u) * sizeof(uint32_t); + const uint32_t InstructionNumBitPosition = (InstructionNumber % 32u); + const uint32_t InstructionNumBitMask = 1u << InstructionNumBitPosition; + + Constant *UAVByteOffsetArg = + HlslOP->GetU32Const(InstructionNumByteOffset); + + CallInst *WaveActiveAllEqualCall = Builder.CreateCall( + WaveActiveAllEqualFunc, {WaveActiveAllEqualOpCode, IndexOperand}); + + // This takes the result of the WaveActiveAllEqual result and shifts + // it into the same bit position as the instruction number, followed + // by an xor to determine what to write to the UAV + Value *IsWaveEqual = + Builder.CreateZExt(WaveActiveAllEqualCall, Builder.getInt32Ty()); + Value *WaveEqualBitMask = + Builder.CreateShl(IsWaveEqual, InstructionNumBitPosition); + Value *FinalResult = + Builder.CreateXor(WaveEqualBitMask, InstructionNumBitMask); + + // Generate instructions to bitwise OR a UAV value corresponding + // to the instruction number and result of WaveActiveAllEqual. + // If WaveActiveAllEqual was false, we write a 1, otherwise a 0. + Builder.CreateCall( + AtomicOpFunc, + { + AtomicBinOpcode, // i32, ; opcode + PixUAVHandle, // %dx.types.Handle, ; resource handle + AtomicOr, // i32, ; binary operation code : + // EXCHANGE, IADD, AND, OR, XOR + // IMIN, IMAX, UMIN, UMAX + UAVByteOffsetArg, // i32, ; coordinate c0: byte offset + UndefArg, // i32, ; coordinate c1 (unused) + UndefArg, // i32, ; coordinate c2 (unused) + FinalResult // i32); value + }, + "UAVInstructionNumberBitSet"); + return true; + }); + + const bool modified = (PixUAVResource != nullptr); + + if (modified) { + DM.ReEmitDxilResources(); + + if (OSOverride != nullptr) { + formatted_raw_ostream FOS(*OSOverride); + FOS << "\nFoundDynamicIndexingNoNuri\n"; + } + } + + return modified; +} + +char DxilNonUniformResourceIndexInstrumentation::ID = 0; + +ModulePass *llvm::createDxilNonUniformResourceIndexInstrumentationPass() { + return new DxilNonUniformResourceIndexInstrumentation(); +} + +INITIALIZE_PASS(DxilNonUniformResourceIndexInstrumentation, + "hlsl-dxil-non-uniform-resource-index-instrumentation", + "HLSL DXIL NonUniformResourceIndex instrumentation for PIX", + false, false) diff --git a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp index 4f4cc7c620..bd96d83965 100644 --- a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp +++ b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp @@ -795,87 +795,6 @@ DxilShaderAccessTracking::GetResourceFromHandle(Value *resHandle, return ret; } -static bool CheckForDynamicIndexing(OP *HlslOP, LLVMContext &Ctx, - DxilModule &DM) { - bool FoundDynamicIndexing = false; - - for (llvm::Function &F : DM.GetModule()->functions()) { - if (F.isDeclaration() && !F.use_empty() && OP::IsDxilOpFunc(&F)) { - if (F.hasName()) { - if (F.getName().find("createHandleForLib") != StringRef::npos) { - auto FunctionUses = F.uses(); - for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) { - auto &FunctionUse = *FI++; - auto FunctionUser = FunctionUse.getUser(); - auto instruction = cast(FunctionUser); - Value *resourceLoad = - instruction->getOperand(kCreateHandleForLibResOpIdx); - if (auto *load = cast(resourceLoad)) { - auto *resOrGep = load->getOperand(0); - if (isa(resOrGep)) { - FoundDynamicIndexing = true; - break; - } - } - } - } - } - } - if (FoundDynamicIndexing) { - break; - } - } - - if (!FoundDynamicIndexing) { - auto CreateHandleFn = - HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx)); - for (auto FI = CreateHandleFn->user_begin(); - FI != CreateHandleFn->user_end();) { - auto *FunctionUser = *FI++; - auto instruction = cast(FunctionUser); - Value *index = instruction->getOperand(kCreateHandleResIndexOpIdx); - if (!isa(index)) { - FoundDynamicIndexing = true; - break; - } - } - } - - if (!FoundDynamicIndexing) { - auto CreateHandleFromBindingFn = HlslOP->GetOpFunc( - DXIL::OpCode::CreateHandleFromBinding, Type::getVoidTy(Ctx)); - for (auto FI = CreateHandleFromBindingFn->user_begin(); - FI != CreateHandleFromBindingFn->user_end();) { - auto *FunctionUser = *FI++; - auto instruction = cast(FunctionUser); - Value *index = - instruction->getOperand(kCreateHandleFromBindingResIndexOpIdx); - if (!isa(index)) { - FoundDynamicIndexing = true; - break; - } - } - } - - if (!FoundDynamicIndexing) { - auto CreateHandleFromHeapFn = HlslOP->GetOpFunc( - DXIL::OpCode::CreateHandleFromHeap, Type::getVoidTy(Ctx)); - for (auto FI = CreateHandleFromHeapFn->user_begin(); - FI != CreateHandleFromHeapFn->user_end();) { - auto *FunctionUser = *FI++; - auto instruction = cast(FunctionUser); - Value *index = - instruction->getOperand(kCreateHandleFromHeapHeapIndexOpIdx); - if (!isa(index)) { - FoundDynamicIndexing = true; - break; - } - } - } - - return FoundDynamicIndexing; -} - bool DxilShaderAccessTracking::runOnModule(Module &M) { // This pass adds instrumentation for shader access to resources @@ -887,7 +806,13 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) { if (m_CheckForDynamicIndexing) { - bool FoundDynamicIndexing = CheckForDynamicIndexing(HlslOP, Ctx, DM); + bool FoundDynamicIndexing = false; + + PIXPassHelpers::ForEachDynamicallyIndexedResource( + DM, [&FoundDynamicIndexing](bool, Instruction *, Value *) { + FoundDynamicIndexing = true; + return false; + }); if (FoundDynamicIndexing) { if (OSOverride != nullptr) { diff --git a/lib/DxilPIXPasses/PixPassHelpers.cpp b/lib/DxilPIXPasses/PixPassHelpers.cpp index 69385ae048..65d9a660cc 100644 --- a/lib/DxilPIXPasses/PixPassHelpers.cpp +++ b/lib/DxilPIXPasses/PixPassHelpers.cpp @@ -512,6 +512,90 @@ unsigned int FindOrAddSV_Position(hlsl::DxilModule &DM, } } +void ForEachDynamicallyIndexedResource( + hlsl::DxilModule &DM, + const std::function &Visitor) { + OP *HlslOP = DM.GetOP(); + LLVMContext &Ctx = DM.GetModule()->getContext(); + + for (llvm::Function &F : DM.GetModule()->functions()) { + if (F.isDeclaration() && !F.use_empty() && OP::IsDxilOpFunc(&F)) { + if (F.hasName()) { + if (F.getName().find("createHandleForLib") != StringRef::npos) { + auto FunctionUses = F.uses(); + for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) { + auto &FunctionUse = *FI++; + auto FunctionUser = FunctionUse.getUser(); + auto instruction = cast(FunctionUser); + Value *resourceLoad = instruction->getOperand( + DXIL::OperandIndex::kCreateHandleForLibResOpIdx); + if (auto *load = cast(resourceLoad)) { + auto *resOrGep = load->getOperand(0); + if (auto *gep = dyn_cast(resOrGep)) { + if (!Visitor(DxilMDHelper::IsMarkedNonUniform(gep), load, + gep->getOperand(2))) { + return; + } + } + } + } + } + } + } + } + + auto CreateHandleFn = + HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx)); + for (auto FI = CreateHandleFn->user_begin(); + FI != CreateHandleFn->user_end();) { + auto *FunctionUser = *FI++; + auto instruction = cast(FunctionUser); + Value *index = + instruction->getOperand(DXIL::OperandIndex::kCreateHandleResIndexOpIdx); + if (!isa(index)) { + const DxilInst_CreateHandle createHandle(instruction); + if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction, + index)) { + return; + } + } + } + + auto CreateHandleFromBindingFn = HlslOP->GetOpFunc( + DXIL::OpCode::CreateHandleFromBinding, Type::getVoidTy(Ctx)); + for (auto FI = CreateHandleFromBindingFn->user_begin(); + FI != CreateHandleFromBindingFn->user_end();) { + auto *FunctionUser = *FI++; + auto instruction = cast(FunctionUser); + Value *index = instruction->getOperand( + DXIL::OperandIndex::kCreateHandleFromBindingResIndexOpIdx); + if (!isa(index)) { + const DxilInst_CreateHandleFromBinding createHandle(instruction); + if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction, + index)) { + return; + } + } + } + + auto CreateHandleFromHeapFn = HlslOP->GetOpFunc( + DXIL::OpCode::CreateHandleFromHeap, Type::getVoidTy(Ctx)); + for (auto FI = CreateHandleFromHeapFn->user_begin(); + FI != CreateHandleFromHeapFn->user_end();) { + auto *FunctionUser = *FI++; + auto instruction = cast(FunctionUser); + Value *index = instruction->getOperand( + DXIL::OperandIndex::kCreateHandleFromHeapHeapIndexOpIdx); + if (!isa(index)) { + const DxilInst_CreateHandleFromHeap createHandle(instruction); + if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction, + index)) { + return; + } + } + } +} + #ifdef PIX_DEBUG_DUMP_HELPER static int g_logIndent = 0; diff --git a/lib/DxilPIXPasses/PixPassHelpers.h b/lib/DxilPIXPasses/PixPassHelpers.h index 4cd0e1a549..d7b0b40af8 100644 --- a/lib/DxilPIXPasses/PixPassHelpers.h +++ b/lib/DxilPIXPasses/PixPassHelpers.h @@ -9,6 +9,7 @@ #pragma once +#include #include #include "dxc/DXIL/DxilModule.h" @@ -16,7 +17,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" -//#define PIX_DEBUG_DUMP_HELPER +// #define PIX_DEBUG_DUMP_HELPER #ifdef PIX_DEBUG_DUMP_HELPER #include "dxc/Support/Global.h" #endif @@ -82,4 +83,8 @@ void ReplaceAllUsesOfInstructionWithNewValueAndDeleteInstruction( llvm::Instruction *Instr, llvm::Value *newValue, llvm::Type *newType); unsigned int FindOrAddSV_Position(hlsl::DxilModule &DM, unsigned UpStreamSVPosRow); +void ForEachDynamicallyIndexedResource( + hlsl::DxilModule &DM, + const std::function + &Visitor); } // namespace PIXPassHelpers diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp index b97aa70c05..e337d2951c 100644 --- a/tools/clang/unittests/HLSL/PixTest.cpp +++ b/tools/clang/unittests/HLSL/PixTest.cpp @@ -153,6 +153,10 @@ class PixTest : public ::testing::Test { TEST_METHOD(DebugInstrumentation_VectorAllocaWrite_Structs) + TEST_METHOD(NonUniformResourceIndex_Resource) + TEST_METHOD(NonUniformResourceIndex_DescriptorHeap) + TEST_METHOD(NonUniformResourceIndex_Raytracing) + dxc::DxcDllSupport m_dllSupport; VersionSupportInfo m_ver; @@ -444,6 +448,11 @@ class PixTest : public ::testing::Test { std::string RunDxilPIXAddTidToAmplificationShaderPayloadPass(IDxcBlob *blob); CComPtr RunDxilPIXMeshShaderOutputPass(IDxcBlob *blob); CComPtr RunDxilPIXDXRInvocationsLog(IDxcBlob *blob); + std::vector + RunDxilNonUniformResourceIndexInstrumentation(IDxcBlob *blob, + std::string &outputText); + void TestNuriCase(const char *source, const wchar_t *target, + uint32_t expectedResult); void TestPixUAVCase(char const *hlsl, wchar_t const *model, wchar_t const *entry); std::string Disassemble(IDxcBlob *pProgram); @@ -671,6 +680,29 @@ CComPtr PixTest::RunDxilPIXDXRInvocationsLog(IDxcBlob *blob) { return pOptimizedModule; } +std::vector PixTest::RunDxilNonUniformResourceIndexInstrumentation( + IDxcBlob *blob, std::string &outputText) { + + CComPtr dxil = FindModule(DFCC_ShaderDebugInfoDXIL, blob); + CComPtr pOptimizer; + VERIFY_SUCCEEDED( + m_dllSupport.CreateInstance(CLSID_DxcOptimizer, &pOptimizer)); + std::array Options = { + L"-opt-mod-passes", L"-dxil-dbg-value-to-dbg-declare", + L"-dxil-annotate-with-virtual-regs", + L"-hlsl-dxil-non-uniform-resource-index-instrumentation"}; + + CComPtr pOptimizedModule; + CComPtr pText; + VERIFY_SUCCEEDED(pOptimizer->RunOptimizer( + dxil, Options.data(), Options.size(), &pOptimizedModule, &pText)); + + outputText = BlobToUtf8(pText); + + const std::string disassembly = Disassemble(pOptimizedModule); + return Tokenize(disassembly, "\n"); +} + std::string PixTest::RunDxilPIXAddTidToAmplificationShaderPayloadPass(IDxcBlob *blob) { CComPtr dxil = FindModule(DFCC_ShaderDebugInfoDXIL, blob); @@ -2983,6 +3015,193 @@ void MyMiss(inout MyPayload payload) RunDxilPIXDXRInvocationsLog(compiledLib); } +uint32_t NuriGetWaveInstructionCount(const std::vector &lines) { + // This is the instruction we'll insert into the shader if we detect dynamic + // resource indexing + const char *const waveActiveAllEqual = "call i1 @dx.op.waveActiveAllEqual"; + + uint32_t instCount = 0; + for (const std::string &line : lines) { + instCount += line.find(waveActiveAllEqual) != std::string::npos; + } + return instCount; +} + +void PixTest::TestNuriCase(const char *source, const wchar_t *target, + uint32_t expectedResult) { + + for (const OptimizationChoice &choice : OptimizationChoices) { + const std::vector compilationOptions = {choice.Flag}; + + CComPtr compiledLib = + Compile(m_dllSupport, source, target, compilationOptions); + + std::string outputText; + const std::vector dxilLines = + RunDxilNonUniformResourceIndexInstrumentation(compiledLib, outputText); + + VERIFY_ARE_EQUAL(NuriGetWaveInstructionCount(dxilLines), expectedResult); + + bool foundDynamicIndexingNoNuri = false; + const std::vector outputTextLines = Tokenize(outputText, "\n"); + for (const std::string &line : outputTextLines) { + if (line.find("FoundDynamicIndexingNoNuri") != std::string::npos) { + foundDynamicIndexingNoNuri = true; + break; + } + } + + VERIFY_ARE_EQUAL((expectedResult != 0), foundDynamicIndexingNoNuri); + } +} + +TEST_F(PixTest, NonUniformResourceIndex_Resource) { + + const char *source = R"x( +Texture2D tex[] : register(t0); +float4 main(float2 uv : TEXCOORD0) : SV_TARGET +{ + uint index = uv.x * uv.y; + return tex[index].Load(int3(0, 0, 0)); +})x"; + + const char *sourceWithNuri = R"x( +Texture2D tex[] : register(t0); +float4 main(float2 uv : TEXCOORD0) : SV_TARGET +{ + uint i = uv.x * uv.y; + return tex[NonUniformResourceIndex(i)].Load(int3(0, 0, 0)); +})x"; + + TestNuriCase(source, L"ps_6_0", 1); + TestNuriCase(sourceWithNuri, L"ps_6_0", 0); + + if (m_ver.SkipDxilVersion(1, 6)) { + return; + } + + TestNuriCase(source, L"ps_6_6", 1); + TestNuriCase(sourceWithNuri, L"ps_6_6", 0); +} + +TEST_F(PixTest, NonUniformResourceIndex_DescriptorHeap) { + + if (m_ver.SkipDxilVersion(1, 6)) { + return; + } + + const char *source = R"x( +Texture2D tex[] : register(t0); +float4 main(float2 uv : TEXCOORD0) : SV_TARGET +{ + uint i = uv.x + uv.y; + Texture2D dynResTex = + ResourceDescriptorHeap[i]; + SamplerState dynResSampler = + SamplerDescriptorHeap[i]; + return dynResTex.Sample(dynResSampler, uv); +})x"; + + const char *sourceWithNuri = R"x( +Texture2D tex[] : register(t0); +float4 main(float2 uv : TEXCOORD0) : SV_TARGET +{ + uint i = uv.x + uv.y; + Texture2D dynResTex = + ResourceDescriptorHeap[NonUniformResourceIndex(i)]; + SamplerState dynResSampler = + SamplerDescriptorHeap[NonUniformResourceIndex(i)]; + return dynResTex.Sample(dynResSampler, uv); +})x"; + + TestNuriCase(source, L"ps_6_6", 2); + TestNuriCase(sourceWithNuri, L"ps_6_6", 0); +} + +TEST_F(PixTest, NonUniformResourceIndex_Raytracing) { + + if (m_ver.SkipDxilVersion(1, 5)) { + return; + } + + const char *source = R"x( +RWTexture2D RT[] : register(u0); + +[noinline] +void FuncNoInline(uint index) +{ + float2 rayIndex = DispatchRaysIndex().xy; + uint i = index + rayIndex.x * rayIndex.y; + float4 c = float4(0.5, 0.5, 0.5, 0); + RT[i][rayIndex.xy] += c; +} + +void Func(uint index) +{ + float2 rayIndex = DispatchRaysIndex().xy; + uint i = index + rayIndex.y; + float4 c = float4(0, 1, 0, 0); + RT[i][rayIndex.xy] += c; +} + +[shader("raygeneration")] +void Main() +{ + float2 rayIndex = DispatchRaysIndex().xy; + + uint i1 = rayIndex.x; + float4 c1 = float4(1, 0, 1, 1); + RT[i1][rayIndex.xy] += c1; + + uint i2 = rayIndex.x * rayIndex.y * 0.25; + float4 c2 = float4(0.25, 0, 0.25, 0); + RT[i2][rayIndex.xy] += c2; + + Func(i1); + FuncNoInline(i2); +})x"; + + const char *sourceWithNuri = R"x( +RWTexture2D RT[] : register(u0); + +[noinline] +void FuncNoInline(uint index) +{ + float2 rayIndex = DispatchRaysIndex().xy; + uint i = index + rayIndex.x * rayIndex.y; + float4 c = float4(0.5, 0.5, 0.5, 0); + RT[NonUniformResourceIndex(i)][rayIndex.xy] += c; +} + +void Func(uint index) +{ + float2 rayIndex = DispatchRaysIndex().xy; + uint i = index + rayIndex.y; + float4 c = float4(0, 1, 0, 0); + RT[NonUniformResourceIndex(i)][rayIndex.xy] += c; +} + +[shader("raygeneration")] +void Main() +{ + float2 rayIndex = DispatchRaysIndex().xy; + + uint i1 = rayIndex.x; + float4 c1 = float4(1, 0, 1, 1); + RT[NonUniformResourceIndex(i1)][rayIndex.xy] += c1; + + uint i2 = rayIndex.x * rayIndex.y * 0.25; + float4 c2 = float4(0.25, 0, 0.25, 0); + RT[NonUniformResourceIndex(i2)][rayIndex.xy] += c2; + + Func(i1); + FuncNoInline(i2); +})x"; + + TestNuriCase(source, L"lib_6_5", 4); + TestNuriCase(sourceWithNuri, L"lib_6_5", 0); +} + TEST_F(PixTest, DebugInstrumentation_TextOutput) { const char *source = R"x( diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 691c3ba58f..0008b752b1 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -6340,6 +6340,12 @@ def add_pass(name, type_name, doc, opts): "HLSL DXIL Logs all non-RayGen DXR 1.0 invocations into a UAV", [{"n": "maxNumEntriesInLog", "t": "int", "c": 1}], ) + add_pass( + "hlsl-dxil-non-uniform-resource-index-instrumentation", + "DxilNonUniformResourceIndexInstrumentation", + "HLSL DXIL NonUniformResourceIndex instrumentation for PIX", + [], + ) category_lib = "dxil_gen" From c940161bb3398ff988fafc343ed1623d4a3fad6c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 8 Apr 2025 11:19:24 -0700 Subject: [PATCH 74/88] Bump cryptography from 43.0.1 to 44.0.1 in /utils/git (#7220) Bumps [cryptography](https://github.com/pyca/cryptography) from 43.0.1 to 44.0.1.
Changelog

Sourced from cryptography's changelog.

44.0.1 - 2025-02-11


* Updated Windows, macOS, and Linux wheels to be compiled with OpenSSL
3.4.1.
* We now build ``armv7l`` ``manylinux`` wheels and publish them to PyPI.
* We now build ``manylinux_2_34`` wheels and publish them to PyPI.

.. _v44-0-0:

44.0.0 - 2024-11-27

  • BACKWARDS INCOMPATIBLE: Dropped support for LibreSSL < 3.9.
  • Deprecated Python 3.7 support. Python 3.7 is no longer supported by the Python core team. Support for Python 3.7 will be removed in a future cryptography release.
  • Updated Windows, macOS, and Linux wheels to be compiled with OpenSSL 3.4.0.
  • macOS wheels are now built against the macOS 10.13 SDK. Users on older versions of macOS should upgrade, or they will need to build cryptography themselves.
  • Enforce the :rfc:5280 requirement that extended key usage extensions must not be empty.
  • Added support for timestamp extraction to the :class:~cryptography.fernet.MultiFernet class.
  • Relax the Authority Key Identifier requirements on root CA certificates during X.509 verification to allow fields permitted by :rfc:5280 but forbidden by the CA/Browser BRs.
  • Added support for :class:~cryptography.hazmat.primitives.kdf.argon2.Argon2id when using OpenSSL 3.2.0+.
  • Added support for the :class:~cryptography.x509.Admissions certificate extension.
  • Added basic support for PKCS7 decryption (including S/MIME 3.2) via :func:~cryptography.hazmat.primitives.serialization.pkcs7.pkcs7_decrypt_der, :func:~cryptography.hazmat.primitives.serialization.pkcs7.pkcs7_decrypt_pem, and :func:~cryptography.hazmat.primitives.serialization.pkcs7.pkcs7_decrypt_smime.

.. _v43-0-3:

43.0.3 - 2024-10-18


* Fixed release metadata for ``cryptography-vectors``

.. _v43-0-2:

43.0.2 - 2024-10-18

  • Fixed compilation when using LibreSSL 4.0.0.

.. _v43-0-1:

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=cryptography&package-manager=pip&previous-version=43.0.1&new-version=44.0.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/microsoft/DirectXShaderCompiler/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- utils/git/requirements_formatting.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/git/requirements_formatting.txt b/utils/git/requirements_formatting.txt index 06db8176c9..6f3e07dcf2 100644 --- a/utils/git/requirements_formatting.txt +++ b/utils/git/requirements_formatting.txt @@ -18,7 +18,7 @@ charset-normalizer==3.2.0 # via requests click==8.1.7 # via black -cryptography==43.0.1 +cryptography==44.0.1 # via pyjwt darker==1.7.2 # via -r llvm/utils/git/requirements_formatting.txt.in From 5d2fa929699b2a09a474796257b9709b1d48829f Mon Sep 17 00:00:00 2001 From: Chris B Date: Wed, 9 Apr 2025 16:41:47 -0500 Subject: [PATCH 75/88] [SM6.9] Enable trivial native vector Dxil Operations plus a few (#7324) This enables the generation of native vector DXIL Operations that are "trivial", meaning they take only a single DXOp Call instruction to implement as well as a few others that either only took such a call and some llvm operations or were of particular interest for other reasons. This involves allowing the overloads by adding the vector indication in hctdb, altering the lowering to maintain the vectors instead of scalarizing them, and a few sundry changes to fix issues along the way. The "trivial" dxil operations that return a different value from the overload type had to be moved out of the way and given their own lowering function so that the main function could generate vectors conditional on the version and vector type. These will be added in a later change. While the long vector supporting intrinsics that weren't given this treatment will continue to generate scalarized operations, some of them needed some work as well. The dot product for float vectors longer than 4 had to take the integer fallback path, which required some small modificaitons and a rename. Additionally, a heuristic for pow that malfunctioned with too many elements had to have a limit placed on it. Since the or()/and()/select() intrinsics translate directly to LLVM ops, they can have their lowering scalarization removed and what future scalarization might be needed by the current version can be done by later passes as with other LLVM operators. An issue with a special value used to represent unassined dimensions had to be addressed since new dimensions can exceed that value. It's now MAX_INT. Contributes to #7120, but I'd prefer to leave it open until all intrinsics are covered Primary work by @pow2clk Fixes #7297 & #7120 --------- Co-authored-by: Greg Roth --- lib/DXIL/DxilOperations.cpp | 140 +++--- lib/HLSL/HLOperationLower.cpp | 197 ++++---- tools/clang/lib/Sema/SemaHLSL.cpp | 8 +- .../hlsl/types/longvec-intrinsics.hlsl | 394 ++++++++++++++++ .../types/longvec-scalarized-intrinsics.hlsl | 115 +++++ ...ngvec-trivial-binary-float-intrinsics.hlsl | 69 +++ ...longvec-trivial-binary-int-intrinsics.hlsl | 116 +++++ ...longvec-trivial-scalarized-intrinsics.hlsl | 77 ++++ ...vec-trivial-tertiary-float-intrinsics.hlsl | 86 ++++ ...ngvec-trivial-tertiary-int-intrinsics.hlsl | 131 ++++++ ...ongvec-trivial-unary-float-intrinsics.hlsl | 83 ++++ .../longvec-trivial-unary-int-intrinsics.hlsl | 86 ++++ .../passes/longvec-intrinsics.hlsl | 186 ++++++++ .../CodeGenDXIL/passes/longvec-intrinsics.ll | 434 ++++++++++++++++++ utils/hct/hctdb.py | 24 +- 15 files changed, 1983 insertions(+), 163 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 0b4c7218d4..7047d9fe59 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -96,16 +96,16 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { "unary", Attribute::ReadNone, 1, - {{0x7}}, - {{0x0}}}, // Overloads: hfd + {{0x407}}, + {{0x7}}}, // Overloads: hfd refArgs, Type *Ty, Type *RetTy, OP *hlslOP, IRBuilder<> &Builder) { @@ -459,12 +467,40 @@ Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, } } } -// Generates a DXIL operation over an overloaded type (Ty), returning a -// RetTy value; when Ty is a vector, it will replicate per-element operations -// into RetTy to rebuild it. + +// Creates a native vector call to for a "trivial" operation where only a single +// call instruction is needed. The overload and return types are the same vector +// type `Ty`. +// Utility objects `HlslOp` and `Builder` are used to create a call to the given +// `DxilFunc` with `RefArgs` arguments. +Value *TrivialDxilVectorOperation(Function *Func, OP::OpCode Opcode, + ArrayRef Args, Type *Ty, OP *OP, + IRBuilder<> &Builder) { + if (!Ty->isVoidTy()) + return Builder.CreateCall(Func, Args, OP->GetOpCodeName(Opcode)); + return Builder.CreateCall(Func, Args); // Cannot add name to void. +} + +// Generates a DXIL operation with the overloaded type based on `Ty` and return +// type `RetTy`. When Ty is a vector, it will either generate per-element calls +// for each vector element and reconstruct the vector type from those results or +// operate on and return native vectors depending on vector size and the +// legality of the vector overload. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, Type *Ty, Type *RetTy, OP *hlslOP, IRBuilder<> &Builder) { + + // If supported and the overload type is a vector with more than 1 element, + // create a native vector operation. + if (Ty->isVectorTy() && Ty->getVectorNumElements() > 1 && + hlslOP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() && + OP::IsOverloadLegal(opcode, Ty)) { + Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty); + return TrivialDxilVectorOperation(dxilFunc, opcode, refArgs, Ty, hlslOP, + Builder); + } + + // Set overload type to the scalar type of `Ty` and generate call(s). Type *EltTy = Ty->getScalarType(); Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy); @@ -484,20 +520,34 @@ Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B); } -Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { - Type *Ty = src->getType(); +// Translate call that converts to a dxil unary operation with a different +// return type from the overload by passing the argument, explicit return type, +// and helper objects to the scalarizing unary dxil operation creation. +Value *TrivialUnaryOperationRet(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *, + bool &Translated) { + Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Type *Ty = Src->getType(); - Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); - Value *args[] = {opArg, src}; + IRBuilder<> Builder(CI); + hlsl::OP *OP = &Helper.hlslOP; + Type *RetTy = CI->getType(); + Constant *OpArg = OP->GetU32Const((unsigned)OpCode); + Value *Args[] = {OpArg, Src}; - return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder); + return TrivialDxilOperation(OpCode, Args, Ty, RetTy, OP, Builder); } -Value *TrivialDxilUnaryOperation(OP::OpCode opcode, Value *src, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { - return TrivialDxilUnaryOperationRet(opcode, src, src->getType(), hlslOP, - Builder); +Value *TrivialDxilUnaryOperation(OP::OpCode OpCode, Value *Src, hlsl::OP *Op, + IRBuilder<> &Builder) { + Type *Ty = Src->getType(); + + Constant *OpArg = Op->GetU32Const((unsigned)OpCode); + Value *Args[] = {OpArg, Src}; + + return TrivialDxilOperation(OpCode, Args, Ty, Ty, Op, Builder); } Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, @@ -521,6 +571,9 @@ Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); } +// Translate call that trivially converts to a dxil unary operation by passing +// argument, return type, and helper objects to either scalarizing or native +// vector dxil operation creation depending on version and vector size. Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, @@ -528,11 +581,13 @@ Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); IRBuilder<> Builder(CI); hlsl::OP *hlslOP = &helper.hlslOP; - Value *retVal = TrivialDxilUnaryOperationRet(opcode, src0, CI->getType(), - hlslOP, Builder); - return retVal; + + return TrivialDxilUnaryOperation(opcode, src0, hlslOP, Builder); } +// Translate call that trivially converts to a dxil binary operation by passing +// arguments, return type, and helper objects to either scalarizing or native +// vector dxil operation creation depending on version and vector size. Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, @@ -547,6 +602,10 @@ Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return binOp; } +// Translate call that trivially converts to a dxil trinary (aka tertiary) +// operation by passing arguments, return type, and helper objects to either +// scalarizing or native vector dxil operation creation depending on version +// and vector size. Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, @@ -738,6 +797,12 @@ bool CanUseFxcMulOnlyPatternForPow(IRBuilder<> &Builder, Value *x, Value *pow, } } + // Only apply on aggregates of 16 or fewer elements, + // representing the max 4x4 matrix size. + Type *Ty = x->getType(); + if (Ty->isVectorTy() && Ty->getVectorNumElements() > 16) + return false; + APFloat powAPF = isa(pow) ? cast(pow)->getElementAsAPFloat(0) : // should be a splat value @@ -2019,7 +2084,7 @@ Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Value *firstbitHi = - TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated); + TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated); // firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi); IRBuilder<> Builder(CI); Constant *neg1 = Builder.getInt32(-1); @@ -2052,7 +2117,7 @@ Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Value *firstbitLo = - TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated); + TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated); return firstbitLo; } @@ -2431,17 +2496,22 @@ Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, Value *src1, return dotOP; } -Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, - hlsl::OP *hlslOP, IRBuilder<> &Builder, - bool Unsigned = false) { - auto madOpCode = Unsigned ? DXIL::OpCode::UMad : DXIL::OpCode::IMad; +// Instead of using a DXIL intrinsic, implement a dot product operation using +// multiply and add operations. Used for integer dots and long vectors. +Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, + IRBuilder<> &Builder, + DXIL::OpCode MadOpCode = DXIL::OpCode::IMad) { Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0); Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0); - Value *Result = Builder.CreateMul(Elt0, Elt1); - for (unsigned iVecElt = 1; iVecElt < vecSize; ++iVecElt) { - Elt0 = Builder.CreateExtractElement(arg0, iVecElt); - Elt1 = Builder.CreateExtractElement(arg1, iVecElt); - Result = TrivialDxilTrinaryOperation(madOpCode, Elt0, Elt1, Result, hlslOP, + Value *Result; + if (Elt0->getType()->isFloatingPointTy()) + Result = Builder.CreateFMul(Elt0, Elt1); + else + Result = Builder.CreateMul(Elt0, Elt1); + for (unsigned Elt = 1; Elt < vecSize; ++Elt) { + Elt0 = Builder.CreateExtractElement(arg0, Elt); + Elt1 = Builder.CreateExtractElement(arg1, Elt); + Result = TrivialDxilTrinaryOperation(MadOpCode, Elt0, Elt1, Result, hlslOP, Builder); } @@ -2480,12 +2550,16 @@ Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, unsigned vecSize = Ty->getVectorNumElements(); Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); IRBuilder<> Builder(CI); - if (Ty->getScalarType()->isFloatingPointTy()) { + Type *EltTy = Ty->getScalarType(); + if (EltTy->isFloatingPointTy() && Ty->getVectorNumElements() <= 4) return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); - } else { - return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, - IOP == IntrinsicOp::IOP_udot); - } + + DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; + if (IOP == IntrinsicOp::IOP_udot) + MadOpCode = DXIL::OpCode::UMad; + else if (EltTy->isFloatingPointTy()) + MadOpCode = DXIL::OpCode::FMad; + return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode); } Value *TranslateNormalize(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -3032,8 +3106,10 @@ Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, if (arg0Ty->getScalarType()->isFloatingPointTy()) { return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); } else { - return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, - IOP == IntrinsicOp::IOP_umul); + DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; + if (IOP == IntrinsicOp::IOP_umul) + MadOpCode = DXIL::OpCode::UMad; + return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode); } } else { // mul(vector, scalar) == vector * scalar-splat @@ -6150,20 +6226,8 @@ Value *TranslateAnd(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, bool &Translated) { Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltX = Builder.CreateExtractElement(x, i); - Value *EltY = Builder.CreateExtractElement(y, i); - Value *tmp = Builder.CreateAnd(EltX, EltY); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateAnd(x, y); } Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -6171,20 +6235,8 @@ Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltX = Builder.CreateExtractElement(x, i); - Value *EltY = Builder.CreateExtractElement(y, i); - Value *tmp = Builder.CreateOr(EltX, EltY); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateOr(x, y); } Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -6194,21 +6246,8 @@ Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *cond = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); Value *t = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); Value *f = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltCond = Builder.CreateExtractElement(cond, i); - Value *EltTrue = Builder.CreateExtractElement(t, i); - Value *EltFalse = Builder.CreateExtractElement(f, i); - Value *tmp = Builder.CreateSelect(EltCond, EltTrue, EltFalse); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateSelect(cond, t, f); } } // namespace @@ -6467,18 +6506,20 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos}, {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos}, - {IntrinsicOp::IOP_countbits, TrivialUnaryOperation, + {IntrinsicOp::IOP_countbits, TrivialUnaryOperationRet, DXIL::OpCode::Countbits}, {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes}, - {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX}, - {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddx, TrivialUnaryOperationRet, DXIL::OpCode::DerivCoarseX}, - {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperationRet, + DXIL::OpCode::DerivCoarseX}, + {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperationRet, DXIL::OpCode::DerivFineX}, - {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY}, - {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddy, TrivialUnaryOperationRet, + DXIL::OpCode::DerivCoarseY}, + {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperationRet, DXIL::OpCode::DerivCoarseY}, - {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation, + {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperationRet, DXIL::OpCode::DerivFineY}, {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes}, diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 6796badcb6..72dd6d41aa 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -6606,8 +6606,10 @@ bool HLSLExternalSource::MatchArguments( argTypes.clear(); const bool isVariadic = IsVariadicIntrinsicFunction(pIntrinsic); - static const UINT UnusedSize = 0xFF; - static const BYTE MaxIntrinsicArgs = g_MaxIntrinsicParamCount + 1; + static const uint32_t UnusedSize = std::numeric_limits::max(); + static const uint32_t MaxIntrinsicArgs = g_MaxIntrinsicParamCount + 1; + assert(MaxIntrinsicArgs < std::numeric_limits::max() && + "This should be a pretty small number"); #define CAB(cond, arg) \ { \ if (!(cond)) { \ @@ -6622,7 +6624,7 @@ bool HLSLExternalSource::MatchArguments( ArBasicKind ComponentType[MaxIntrinsicArgs]; // Component type for each argument, // AR_BASIC_UNKNOWN if unspecified. - UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UNUSED_INDEX32 + UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UnusedSize // if unspecified. badArgIdx = MaxIntrinsicArgs; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl new file mode 100644 index 0000000000..0b7f0d6b2f --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl @@ -0,0 +1,394 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=2 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=125 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=256 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=1024 %s | FileCheck %s + +// Test vector-enabled non-trivial intrinsics that take parameters of various types. + +RWByteAddressBuffer buf; +RWByteAddressBuffer ibuf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> +// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double> + +[numthreads(8,1,1)] +void main() { + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle {{%.*}}, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec1 = buf.Load >(0); + vector hVec2 = buf.Load >(512); + vector hVec3 = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec1 = buf.Load >(2048); + vector fVec2 = buf.Load >(2560); + vector fVec3 = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + vector dVec1 = buf.Load >(4096); + vector dVec2 = buf.Load >(4608); + vector dVec3 = buf.Load >(5120); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle {{%.*}}, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[svec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec1 = ibuf.Load >(0); + vector sVec2 = ibuf.Load >(512); + vector sVec3 = ibuf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1025 + // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536 + // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[usvec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec1 = ibuf.Load >(1025); + vector usVec2 = ibuf.Load >(1536); + vector usVec3 = ibuf.Load >(2048); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2049 + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec1 = ibuf.Load >(2049); + vector iVec2 = ibuf.Load >(2560); + vector iVec3 = ibuf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3073 + // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584 + // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[uivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec1 = ibuf.Load >(3073); + vector uiVec2 = ibuf.Load >(3584); + vector uiVec3 = ibuf.Load >(4096); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4097 + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[lvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec1 = ibuf.Load >(4097); + vector lVec2 = ibuf.Load >(4608); + vector lVec3 = ibuf.Load >(5120); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5121 + // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632 + // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 6144 + // CHECK: [[ulvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec1 = ibuf.Load >(5121); + vector ulVec2 = ibuf.Load >(5632); + vector ulVec3 = ibuf.Load >(6144); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 35, <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]]) ; FMax(a,b) + // CHECK: call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 36, <[[NUM]] x half> [[tmp]], <[[NUM]] x half> [[hvec3]]) ; FMin(a,b) + vector hRes = clamp(hVec1, hVec2, hVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 35, <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]]) ; FMax(a,b) + // CHECK: call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 36, <[[NUM]] x float> [[tmp]], <[[NUM]] x float> [[fvec3]]) ; FMin(a,b) + vector fRes = clamp(fVec1, fVec2, fVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 35, <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]]) ; FMax(a,b) + // CHECK: call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 36, <[[NUM]] x double> [[tmp]], <[[NUM]] x double> [[dvec3]]) ; FMin(a,b) + vector dRes = clamp(dVec1, dVec2, dVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 37, <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]]) ; IMax(a,b) + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 38, <[[NUM]] x i16> [[tmp]], <[[NUM]] x i16> [[svec3]]) ; IMin(a,b) + vector sRes = clamp(sVec1, sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 39, <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]]) ; UMax(a,b) + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 40, <[[NUM]] x i16> [[tmp]], <[[NUM]] x i16> [[usvec3]]) ; UMin(a,b) + vector usRes = clamp(usVec1, usVec2, usVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 37, <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]]) ; IMax(a,b) + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 38, <[[NUM]] x i32> [[tmp]], <[[NUM]] x i32> [[ivec3]]) ; IMin(a,b) + vector iRes = clamp(iVec1, iVec2, iVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 39, <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]]) ; UMax(a,b) + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 40, <[[NUM]] x i32> [[tmp]], <[[NUM]] x i32> [[uivec3]]) ; UMin(a,b) + vector uiRes = clamp(uiVec1, uiVec2, uiVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 37, <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]]) ; IMax(a,b) + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 38, <[[NUM]] x i64> [[tmp]], <[[NUM]] x i64> [[lvec3]]) ; IMin(a,b) + vector lRes = clamp(lVec1, lVec2, lVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 39, <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]]) ; UMax(a,b) + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 40, <[[NUM]] x i64> [[tmp]], <[[NUM]] x i64> [[ulvec3]]) ; UMin(a,b) + vector ulRes = clamp(ulVec1, ulVec2, ulVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec2]], [[hvec1]] + // CHECK: select <[[NUM]] x i1> [[tmp]], <[[NUM]] x half> zeroinitializer, <[[NUM]] x half> [[fvec2]], [[fvec1]] + // CHECK: select <[[NUM]] x i1> [[tmp]], <[[NUM]] x float> zeroinitializer, <[[NUM]] x float> [[hvec1]], @dx.op.unary.[[HTY]](i32 21, <[[NUM]] x half> [[tmp]]) ; Exp(value) + hRes += exp(hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fmul fast <[[NUM]] x float> [[fvec1]], @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[tmp]]) ; Exp(value) + fRes += exp(fVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 23, <[[NUM]] x half> [[hvec1]]) ; Log(value) + // CHECK: fmul fast <[[NUM]] x half> [[tmp]], @dx.op.unary.[[FTY]](i32 23, <[[NUM]] x float> [[fvec1]]) ; Log(value) + // CHECK: fmul fast <[[NUM]] x float> [[tmp]], [[hvec2]], [[hvec1]] + // CHECK: [[xsub:%.*]] = fsub fast <[[NUM]] x half> [[hvec3]], [[hvec1]] + // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x half> [[xsub]], [[sub]] + // CHECK: [[sat:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 7, <[[NUM]] x half> [[div]]) ; Saturate(value) + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x half> [[sat]], , [[mul]] + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x half> [[sat]], [[sat]] + // CHECK: fmul fast <[[NUM]] x half> [[mul]], [[sub]] + hRes += smoothstep(hVec1, hVec2, hVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x float> [[fvec2]], [[fvec1]] + // CHECK: [[xsub:%.*]] = fsub fast <[[NUM]] x float> [[fvec3]], [[fvec1]] + // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x float> [[xsub]], [[sub]] + // CHECK: [[sat:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 7, <[[NUM]] x float> [[div]]) ; Saturate(value) + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x float> [[sat]], , [[mul]] + // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x float> [[sat]], [[sat]] + // CHECK: fmul fast <[[NUM]] x float> [[mul]], [[sub]] + fRes += smoothstep(fVec1, fVec2, fVec3); + + // Intrinsics that expand into llvm ops. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: fmul fast <[[NUM]] x half> [[hvec2]], [[fvec2]], [[hvec3]], [[fvec3]], [[fvec1]], zeroinitializer + // CHECK: [[f2i:%.*]] = bitcast <[[NUM]] x float> [[fvec1]] to <[[NUM]] x i32> + // CHECK: [[and:%.*]] = and <[[NUM]] x i32> [[f2i]], [[and]], [[add]], [[shr]] to <[[NUM]] x float> + // CHECK: [[sel:%.*]] = select <[[NUM]] x i1> [[cmp]], <[[NUM]] x float> [[i2f]], <[[NUM]] x float> zeroinitializer + // CHECK: [[and:%.*]] = and <[[NUM]] x i32> [[f2i]], [[and]], exp = fVec3; + fRes += frexp(fVec1, exp); + fRes += exp; + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fsub fast <[[NUM]] x half> [[hvec3]], [[hvec2]] + // CHECK: fmul fast <[[NUM]] x half> [[tmp]], [[hvec1]] + hRes += lerp(hVec2, hVec3, hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = fsub fast <[[NUM]] x float> [[fvec3]], [[fvec2]] + // CHECK: fmul fast <[[NUM]] x float> [[tmp]], [[fvec1]] + fRes += lerp(fVec2, fVec3, fVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: fdiv fast <[[NUM]] x half> , [[hvec1]] + hRes += rcp(hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: fdiv fast <[[NUM]] x float> , [[fvec1]] + fRes += rcp(fVec1); + + vector signs = 1; + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x half> [[hvec1]], zeroinitializer + // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec1]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x float> [[fvec1]], zeroinitializer + // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x float> [[fvec1]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(fVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x double> [[dvec1]], zeroinitializer + // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x double> [[dvec1]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(dVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i16> [[svec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i16> [[svec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(sVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i16> [[usvec2]], zeroinitializer + // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32> + signs *= sign(usVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i32> [[ivec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i32> [[ivec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: [[sub:%.*]] = sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(iVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i32> [[uivec2]], zeroinitializer + // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32> + signs *= sign(uiVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i64> [[lvec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i64> [[lvec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32> + // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32> + // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]] + signs *= sign(lVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i64> [[ulvec2]], zeroinitializer + // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32> + signs *= sign(ulVec2); + + iRes += signs; + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i16> [[svec2]], zeroinitializer + // CHECK: [[bvec1:%.*]] = icmp ne <[[NUM]] x i16> [[svec1]], zeroinitializer + // CHECK: or <[[NUM]] x i1> [[bvec2]], [[bvec1]] + sRes += or(sVec1, sVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i16> [[svec3]], zeroinitializer + // CHECK: and <[[NUM]] x i1> [[bvec3]], [[bvec2]] + sRes += and(sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: select <[[NUM]] x i1> [[bvec1]], <[[NUM]] x i16> [[svec2]], <[[NUM]] x i16> [[svec3]] + sRes += select(sVec1, sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(2048, fRes); + buf.Store >(4096, dRes); + + ibuf.Store >(0, sRes); + ibuf.Store >(1024, usRes); + ibuf.Store >(2048, iRes); + ibuf.Store >(3072, uiRes); + ibuf.Store >(4096, lRes); + ibuf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl new file mode 100644 index 0000000000..2ae3c92e85 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl @@ -0,0 +1,115 @@ +// RUN: %dxc -T lib_6_9 %s | FileCheck %s + +// Long vector tests for vec ops that scalarize to something more complex +// than a simple repetition of the same dx.op calls. + +// CHECK-LABEL: test_atan2 +// CHECK: fdiv fast <8 x float> +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 17, <8 x float> %{{.*}}) ; Atan(value) +// CHECK: fadd fast <8 x float> %{{.*}}, %{{.*}}, +// CHECK: fcmp fast oeq <8 x float> +// CHECK: fcmp fast oge <8 x float> +// CHECK: fcmp fast olt <8 x float> +// CHECK: and <8 x i1> +// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> +// CHECK: and <8 x i1> +// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> +// CHECK: and <8 x i1> +// CHECK: select <8 x i1> %{{.*}}, <8 x float> +// CHECK: select <8 x i1> %{{.*}}, <8 x float> vec1, vector vec2) { + vec1 = atan2(vec1, vec2); +} + +// CHECK-LABEL: test_fmod +// CHECK: fdiv fast <8 x float> +// CHECK: fsub fast <8 x float> +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 6, <8 x float> %{{.*}}) ; FAbs(value) +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 22, <8 x float> %{{.*}}) ; Frc(value) + +// CHECK: fsub fast <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> +// CHECK: fmul fast <8 x float> +export void test_fmod(inout vector vec1, vector vec2) { + vec1 = fmod(vec1, vec2); +} + +// CHECK-LABEL: test_ldexp +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 21, <8 x float> %{{.*}}) ; Exp(value) +// CHECK: fmul fast <8 x float> + +export void test_ldexp(inout vector vec1, vector vec2) { + vec1 = ldexp(vec1, vec2); +} + + +// CHECK-LABEL: test_pow +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 23, <8 x float> %{{.*}}) ; Log(value) +// CHECK: fmul fast <8 x float> +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 21, <8 x float> %{{.*}}) ; Exp(value) +export void test_pow(inout vector vec1, vector vec2) { + vec1 = pow(vec1, vec2); +} + +// CHECK-LABEL: test_modf +// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 29, <8 x float> %{{.*}}) ; Round_z(value) +// CHECK: fsub fast <8 x float> +export void test_modf(inout vector vec1, vector vec2) { + vec1 = modf(vec1, vec2); +} + +// CHECK-LABEL: test_dot +// CHECK: [[el:%.*]] = extractelement <8 x float> +// CHECK: [[mul:%.*]] = fmul fast float [[el]] +// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c) +// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c) +// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c) +// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c) +// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c) +// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c) +// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c) +export void test_dot(inout vector vec1, vector vec2) { + vec1 = dot(vec1, vec2); +} + +// CHECK-LABEL: test_any +// CHECK: or i1 +// CHECK: or i1 +// CHECK: or i1 +// CHECK: or i1 +// CHECK: or i1 +// CHECK: or i1 +// CHECK: or i1 +export void test_any(vector vec1, inout vector bvec) { + bvec &= any(vec1); +} + +// CHECK-LABEL: test_all +// CHECK: and i1 +// CHECK: and i1 +// CHECK: and i1 +// CHECK: and i1 +// CHECK: and i1 +// CHECK: and i1 +// CHECK: and i1 +export void test_all(vector vec1, inout vector bvec) { + bvec &= all(vec1); +} + +// CHECK-LABEL: test_WaveMatch +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +// call {{.*}} @dx.op.wave +export uint4 test_WaveMatch(vector bvec) { + return WaveMatch(bvec); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl new file mode 100644 index 0000000000..02cad5b894 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl @@ -0,0 +1,69 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=35 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=35 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=36 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=36 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled binary intrinsics that take float-like parameters and +// and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> +// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode number. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec1 = buf.Load >(0); + vector hVec2 = buf.Load >(512); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec1 = buf.Load >(2048); + vector fVec2 = buf.Load >(2560); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + vector dVec1 = buf.Load >(4096); + vector dVec2 = buf.Load >(4608); + + // Test simple matching type overloads. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]]) + vector hRes = FUNC(hVec1, hVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]]) + vector fRes = FUNC(fVec1, fVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 [[OP]], <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]]) + vector dRes = FUNC(dVec1, dVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(2048, fRes); + buf.Store >(4096, dRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl new file mode 100644 index 0000000000..994246b753 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl @@ -0,0 +1,116 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=37 -DUOP=39 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max -DOP=37 -DUOP=39 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=38 -DUOP=40 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min -DOP=38 -DUOP=40 -DNUM=1022 %s | FileCheck %s + +#ifndef UOP +#define UOP OP +#endif + +// Test vector-enabled binary intrinsics that take signed and unsigned integer parameters of +// different widths and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode numbers. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 888, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(888, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[UOP:[0-9]*]] + buf.Store(999, UOP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec1 = buf.Load >(0); + vector sVec2 = buf.Load >(512); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536 + // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec1 = buf.Load >(1024); + vector usVec2 = buf.Load >(1536); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec1 = buf.Load >(2048); + vector iVec2 = buf.Load >(2560); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584 + // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec1 = buf.Load >(3072); + vector uiVec2 = buf.Load >(3584); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec1 = buf.Load >(4096); + vector lVec2 = buf.Load >(4608); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632 + // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec1 = buf.Load >(5120); + vector ulVec2 = buf.Load >(5632); + + // Test simple matching type overloads. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]]) + vector sRes = FUNC(sVec1, sVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 [[UOP]], <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]]) + vector usRes = FUNC(usVec1, usVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]]) + vector iRes = FUNC(iVec1, iVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 [[UOP]], <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]]) + vector uiRes = FUNC(uiVec1, uiVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]]) + vector lRes = FUNC(lVec1, lVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 [[UOP]], <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]]) + vector ulRes = FUNC(ulVec1, ulVec2); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, sRes); + buf.Store >(1024, usRes); + buf.Store >(2048, iRes); + buf.Store >(3072, uiRes); + buf.Store >(4096, lRes); + buf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl new file mode 100644 index 0000000000..6ebb511b00 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl @@ -0,0 +1,77 @@ +// The binary part of some of these is all just a vector math ops with as many unary dxops as elements. +// These will have apparent mismatches between the ARITY define and the check prefix. + +// RUN: %dxc -DFUNC=f16tof32 -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,LEGACY +// RUN: %dxc -DFUNC=f32tof16 -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,LEGACY +// RUN: %dxc -DFUNC=isfinite -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT +// RUN: %dxc -DFUNC=isinf -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT +// RUN: %dxc -DFUNC=isnan -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT +// RUN: %dxc -DFUNC=countbits -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=firstbithigh -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=firstbitlow -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=QuadReadLaneAt -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=QuadReadAcrossX -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=QuadReadAcrossY -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=QuadReadAcrossDiagonal -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD +// RUN: %dxc -DFUNC=WaveActiveBitAnd -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveBitOr -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveBitXor -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveProduct -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveSum -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveMin -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveMax -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixBitAnd -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixBitOr -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixBitXor -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixProduct -DARITY=5 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveMultiPrefixSum -DARITY=5 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WavePrefixSum -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WavePrefixProduct -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveReadLaneAt -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveReadLaneFirst -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE +// RUN: %dxc -DFUNC=WaveActiveAllEqual -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE + +#ifndef TYPE +#define TYPE float +#endif + +#if ARITY == 1 +#define CALLARGS(x,y,z) x +#elif ARITY == 2 +#define CALLARGS(x,y,z) x, y +#elif ARITY == 3 +#define CALLARGS(x,y,z) x, y, z +// ARITY 4 is used for 1 vec + scalar +#elif ARITY == 4 +#define CALLARGS(x,y,z) x, i +// ARITY 5 is used for 1 vec + uint4 mask for wavemultiprefix* +#elif ARITY == 5 +#define CALLARGS(x,y,z) x, m +#endif + +StructuredBuffer< vector > buf; +ByteAddressBuffer rbuf; + +float4 main(uint i : SV_PrimitiveID, uint4 m : M) : SV_Target { + vector arg1 = rbuf.Load< vector >(i++*32); + vector arg2 = rbuf.Load< vector >(i++*32); + vector arg3 = rbuf.Load< vector >(i++*32); + + // UNARY: call {{.*}} [[DXOP:@dx.op.unary]] + // BINARY: call {{.*}} [[DXOP:@dx.op.binary]] + // TERTIARY: call {{.*}} [[DXOP:@dx.op.tertiary]] + // LEGACY: call {{.*}} [[DXOP:@dx.op.legacy]] + // SPECFLT: call {{.*}} [[DXOP:@dx.op.isSpecialFloat]] + // QUAD: call {{.*}} [[DXOP:@dx.op.quad]] + // WAVE: call {{.*}} [[DXOP:@dx.op.wave]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + // CHECK: call {{.*}} [[DXOP]] + + vector ret = FUNC(CALLARGS(arg1, arg2, arg3)); + return float4(ret[0] + ret[1], ret[2] + ret[3], ret[4] + ret[5], ret[6] + ret[7]); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl new file mode 100644 index 0000000000..e32ebc1db2 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl @@ -0,0 +1,86 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=46 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=46 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled ternary intrinsics that take float-like parameters and +// and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +// Given that all we have at the moment are fmad and fma and the latter only takes doubles, +// fma is tacked on as an additional check. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> +// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode number. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec1 = buf.Load >(0); + vector hVec2 = buf.Load >(512); + vector hVec3 = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec1 = buf.Load >(2048); + vector fVec2 = buf.Load >(2560); + vector fVec3 = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0 + vector dVec1 = buf.Load >(4096); + vector dVec2 = buf.Load >(4608); + vector dVec3 = buf.Load >(5120); + + // Test simple matching type overloads. + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x half> @dx.op.tertiary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]], <[[NUM]] x half> [[hvec3]]) + vector hRes = FUNC(hVec1, hVec2, hVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x float> @dx.op.tertiary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]], <[[NUM]] x float> [[fvec3]]) + vector fRes = FUNC(fVec1, fVec2, fVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x double> @dx.op.tertiary.[[DTY]](i32 [[OP]], <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]], <[[NUM]] x double> [[dvec3]]) + vector dRes = FUNC(dVec1, dVec2, dVec3); + + // Tacked on fma() check since it only takes doubles. + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x double> @dx.op.tertiary.[[DTY]](i32 47, <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]], <[[NUM]] x double> [[dvec3]]) + vector dRes2 = fma(dVec1, dVec2, dVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(2048, fRes); + buf.Store >(4096, dRes); + buf.Store >(5120, dRes2); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl new file mode 100644 index 0000000000..50f98715e4 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl @@ -0,0 +1,131 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=48 -DUOP=49 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad -DOP=48 -DUOP=49 -DNUM=1022 %s | FileCheck %s + +#ifndef UOP +#define UOP OP +#endif + +// Test vector-enabled tertiary intrinsics that take signed and unsigned integer parameters of +// different widths and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode numbers. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 888, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(888, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[UOP:[0-9]*]] + buf.Store(999, UOP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512 + // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[svec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec1 = buf.Load >(0); + vector sVec2 = buf.Load >(512); + vector sVec3 = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1025 + // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536 + // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[usvec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec1 = buf.Load >(1025); + vector usVec2 = buf.Load >(1536); + vector usVec3 = buf.Load >(2048); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2049 + // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560 + // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec1 = buf.Load >(2049); + vector iVec2 = buf.Load >(2560); + vector iVec3 = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3073 + // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584 + // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[uivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec1 = buf.Load >(3073); + vector uiVec2 = buf.Load >(3584); + vector uiVec3 = buf.Load >(4096); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4097 + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608 + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[lvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec1 = buf.Load >(4097); + vector lVec2 = buf.Load >(4608); + vector lVec3 = buf.Load >(5120); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5121 + // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632 + // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 6144 + // CHECK: [[ulvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec1 = buf.Load >(5121); + vector ulVec2 = buf.Load >(5632); + vector ulVec3 = buf.Load >(6144); + + // Test simple matching type overloads. + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.tertiary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]], <[[NUM]] x i16> [[svec3]]) + vector sRes = FUNC(sVec1, sVec2, sVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.tertiary.[[STY]](i32 [[UOP]], <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]], <[[NUM]] x i16> [[usvec3]]) + vector usRes = FUNC(usVec1, usVec2, usVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.tertiary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]], <[[NUM]] x i32> [[ivec3]]) + vector iRes = FUNC(iVec1, iVec2, iVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.tertiary.[[ITY]](i32 [[UOP]], <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]], <[[NUM]] x i32> [[uivec3]]) + vector uiRes = FUNC(uiVec1, uiVec2, uiVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.tertiary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]], <[[NUM]] x i64> [[lvec3]]) + vector lRes = FUNC(lVec1, lVec2, lVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.tertiary.[[LTY]](i32 [[UOP]], <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]], <[[NUM]] x i64> [[ulvec3]]) + vector ulRes = FUNC(ulVec1, ulVec2, ulVec3); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, sRes); + buf.Store >(1024, usRes); + buf.Store >(2048, iRes); + buf.Store >(3072, uiRes); + buf.Store >(4096, lRes); + buf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl new file mode 100644 index 0000000000..91ab631a7e --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl @@ -0,0 +1,83 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos -DOP=12 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos -DOP=12 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sin -DOP=13 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sin -DOP=13 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tan -DOP=14 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tan -DOP=14 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=acos -DOP=15 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=acos -DOP=15 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=asin -DOP=16 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=asin -DOP=16 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=atan -DOP=17 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=atan -DOP=17 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cosh -DOP=18 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cosh -DOP=18 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sinh -DOP=19 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sinh -DOP=19 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tanh -DOP=20 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tanh -DOP=20 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=exp2 -DOP=21 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=exp2 -DOP=21 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=frac -DOP=22 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=frac -DOP=22 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log2 -DOP=23 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log2 -DOP=23 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log10 -DOP=23 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log10 -DOP=23 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sqrt -DOP=24 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sqrt -DOP=24 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=rsqrt -DOP=25 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=rsqrt -DOP=25 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=round -DOP=26 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=round -DOP=26 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=floor -DOP=27 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=floor -DOP=27 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ceil -DOP=28 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ceil -DOP=28 -DNUM=1022 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled unary intrinsics that take float-like parameters and +// and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half> +// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float> + +[numthreads(8,1,1)] +void main() { + + // Capture opcode number. + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[hvec:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0 + vector hVec = buf.Load >(0); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[fvec:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0 + vector fVec = buf.Load >(1024); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec]]) + vector hRes = FUNC(hVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec]]) + vector fRes = FUNC(fVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, hRes); + buf.Store >(1024, fRes); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl new file mode 100644 index 0000000000..ef0b250745 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl @@ -0,0 +1,86 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=reversebits -DOP=30 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=reversebits -DOP=30 -DNUM=1022 %s | FileCheck %s + +// Test vector-enabled unary intrinsics that take signed and unsigned integer parameters of +// different widths and are "trivial" in that they can be implemented with a single call +// instruction with the same parameter and return types. + +RWByteAddressBuffer buf; + +// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16> +// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32> +// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64> + +[numthreads(8,1,1)] +void main() { + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // Capture opcode number. + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]] + buf.Store(999, OP); + + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK: [[svec:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector sVec = buf.Load >(0); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024 + // CHECK: [[usvec:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0 + vector usVec = buf.Load >(1024); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2048 + // CHECK: [[ivec:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector iVec = buf.Load >(2048); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072 + // CHECK: [[uivec:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0 + vector uiVec = buf.Load >(3072); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4096 + // CHECK: [[lvec:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector lVec = buf.Load >(4096); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120 + // CHECK: [[ulvec:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0 + vector ulVec = buf.Load >(5120); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.unary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec]]) + vector sRes = FUNC(sVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i16> @dx.op.unary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[usvec]]) + vector usRes = FUNC(usVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.unary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec]]) + vector iRes = FUNC(iVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i32> @dx.op.unary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[uivec]]) + vector uiRes = FUNC(uiVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.unary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec]]) + vector lRes = FUNC(lVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: call <[[NUM]] x i64> @dx.op.unary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[ulvec]]) + vector ulRes = FUNC(ulVec); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + buf.Store >(0, sRes); + buf.Store >(1024, usRes); + buf.Store >(2048, iRes); + buf.Store >(3072, uiRes); + buf.Store >(4096, lRes); + buf.Store >(5120, ulRes); +} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl new file mode 100644 index 0000000000..11d705305d --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl @@ -0,0 +1,186 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=13 %s | FileCheck %s + +// Source for dxilgen test CodeGenDXIL/passes/longvec-intrinsics.ll. +// Some targetted filecheck testing as an incidental. + +RWStructuredBuffer > hBuf; +RWStructuredBuffer > fBuf; +RWStructuredBuffer > dBuf; + +RWStructuredBuffer > bBuf; +RWStructuredBuffer > uBuf; +RWStructuredBuffer > lBuf; + +[numthreads(8,1,1)] +void main() { + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 11, i32 0, i32 4) + // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 12, i32 0, i32 4) + // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 13, i32 0, i32 4) + // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0 + vector fVec1 = fBuf[11]; + vector fVec2 = fBuf[12]; + vector fVec3 = fBuf[13]; + + // CHECK: [[tmp:%.*]] = call <13 x float> @dx.op.binary.v13f32(i32 35, <13 x float> [[fvec1]], <13 x float> [[fvec2]]) ; FMax(a,b) + // CHECK: call <13 x float> @dx.op.binary.v13f32(i32 36, <13 x float> [[tmp]], <13 x float> [[fvec3]]) ; FMin(a,b) + vector fRes = clamp(fVec1, fVec2, fVec3); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 14, i32 0, i32 2) + // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 15, i32 0, i32 2) + // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 16, i32 0, i32 2) + // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0 + vector hVec1 = hBuf[14]; + vector hVec2 = hBuf[15]; + vector hVec3 = hBuf[16]; + + // CHECK: [[tmp:%.*]] = fcmp fast olt <13 x half> [[hvec2]], [[hvec1]] + // CHECK: select <13 x i1> [[tmp]], <13 x half> zeroinitializer, <13 x half> hRes = step(hVec1, hVec2); + + // CHECK: [[tmp:%.*]] = fmul fast <13 x float> [[fvec1]], @dx.op.unary.v13f32(i32 21, <13 x float> [[tmp]]) ; Exp(value) + fRes += exp(fVec1); + + // CHECK: [[tmp:%.*]] = call <13 x half> @dx.op.unary.v13f16(i32 23, <13 x half> [[hvec1]]) ; Log(value) + // CHECK: fmul fast <13 x half> [[tmp]], [[fvec2]], [[fvec1]] + // CHECK: [[xsub:%.*]] = fsub fast <13 x float> [[fvec3]], [[fvec1]] + // CHECK: [[div:%.*]] = fdiv fast <13 x float> [[xsub]], [[sub]] + // CHECK: [[sat:%.*]] = call <13 x float> @dx.op.unary.v13f32(i32 7, <13 x float> [[div]]) ; Saturate(value) + // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], , [[mul]] + // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], [[sat]] + // CHECK: fmul fast <13 x float> [[mul]], [[sub]] + fRes += smoothstep(fVec1, fVec2, fVec3); + + // Intrinsics that expand into llvm ops. + + // CHECK: fmul fast <13 x float> [[fvec3]], [[fvec1]], zeroinitializer + // CHECK: [[f2i:%.*]] = bitcast <13 x float> [[fvec1]] to <13 x i32> + // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], [[and]], [[add]], [[shr]] to <13 x float> + // CHECK: [[sel:%.*]] = select <13 x i1> [[cmp]], <13 x float> [[i2f]], <13 x float> zeroinitializer + // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], [[and]], exp = fVec3; + fRes += frexp(fVec1, exp); + fRes += exp; + + // CHECK: [[tmp:%.*]] = fsub fast <13 x half> [[hvec3]], [[hvec2]] + // CHECK: fmul fast <13 x half> [[tmp]], [[hvec1]] + hRes += lerp(hVec2, hVec3, hVec1); + + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4) + // CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 18, i32 0, i32 4) + // CHECK: [[uvec2:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + vector uVec1 = uBuf[17]; + vector uVec2 = uBuf[18]; + + vector signs = 1; + // CHECK: [[cmp:%.*]] = icmp ne <13 x i32> [[uvec2]], zeroinitializer + // CHECK: zext <13 x i1> [[cmp]] to <13 x i32> + signs *= sign(uVec2); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 19, i32 0, i32 8) + // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 20, i32 0, i32 8) + // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0 + vector lVec1 = lBuf[19]; + vector lVec2 = lBuf[20]; + + // CHECK: [[gt:%.*]] = icmp sgt <13 x i64> [[lvec2]], zeroinitializer + // CHECK: [[lt:%.*]] = icmp slt <13 x i64> [[lvec2]], zeroinitializer + // CHECK: [[igt:%.*]] = zext <13 x i1> [[gt]] to <13 x i32> + // CHECK: [[ilt:%.*]] = zext <13 x i1> [[lt]] to <13 x i32> + // CHECK: sub nsw <13 x i32> [[igt]], [[ilt]] + signs *= sign(lVec2); + + vector uRes = signs; + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 21, i32 0, i32 4) + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer + // CHECK: [[vec1:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 22, i32 0, i32 4) + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer + // CHECK: [[vec2:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 23, i32 0, i32 4) + // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0 + // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer + // CHECK: [[vec3:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32> + vector bVec1 = bBuf[21]; + vector bVec2 = bBuf[22]; + vector bVec3 = bBuf[23]; + + // CHECK: [[bvec2:%.*]] = icmp ne <13 x i32> [[vec2]], zeroinitializer + // CHECK: [[bvec1:%.*]] = icmp ne <13 x i32> [[vec1]], zeroinitializer + // CHECK: or <13 x i1> [[bvec2]], [[bvec1]] + uRes += or(bVec1, bVec2); + + // CHECK: [[bvec3:%.*]] = icmp ne <13 x i32> [[vec3]], zeroinitializer + // CHECK: and <13 x i1> [[bvec3]], [[bvec2]] + uRes += and(bVec2, bVec3); + + // CHECK: select <13 x i1> [[bvec3]], <13 x i64> [[lvec1]], <13 x i64> [[lvec2]] + vector lRes = select(bVec3, lVec1, lVec2); + + // CHECK: [[el1:%.*]] = extractelement <13 x float> [[fvec1]] + // CHECK: [[el2:%.*]] = extractelement <13 x float> [[fvec2]] + // CHECK: [[mul:%.*]] = fmul fast float [[el2]], [[el1]] + // CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c) + // CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad1]]) ; FMad(a,b,c) + // CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad2]]) ; FMad(a,b,c) + // CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad3]]) ; FMad(a,b,c) + // CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad4]]) ; FMad(a,b,c) + // CHECK: [[mad6:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad5]]) ; FMad(a,b,c) + // CHECK: [[mad7:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad6]]) ; FMad(a,b,c) + // CHECK: [[mad8:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad7]]) ; FMad(a,b,c) + // CHECK: [[mad9:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad8]]) ; FMad(a,b,c) + // CHECK: [[mad10:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad9]]) ; FMad(a,b,c) + // CHECK: [[mad11:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad10]]) ; FMad(a,b,c) + // CHECK: [[mad12:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad11]]) ; FMad(a,b,c) + fRes += dot(fVec1, fVec2); + + // CHECK: call <13 x float> @dx.op.unary.v13f32(i32 17, <13 x float> [[fvec1]]) ; Atan(value) + fRes += atan(fVec1); + + // CHECK: call <13 x i32> @dx.op.binary.v13i32(i32 40, <13 x i32> [[uvec1]], <13 x i32> [[uvec2]]) ; UMin(a,b) + uRes += min(uVec1, uVec2); + + // CHECK: call <13 x float> @dx.op.tertiary.v13f32(i32 46, <13 x float> [[fvec1]], <13 x float> [[fvec2]], <13 x float> [[fvec3]]) ; FMad(a,b,c) + fRes += mad(fVec1, fVec2, fVec3); + + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8) + // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8) + // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 + // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 26, i32 0, i32 8) + // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0 + vector dVec1 = dBuf[24]; + vector dVec2 = dBuf[25]; + vector dVec3 = dBuf[26]; + + // CHECK: call <13 x double> @dx.op.tertiary.v13f64(i32 47, <13 x double> [[dvec1]], <13 x double> [[dvec2]], <13 x double> [[dvec3]]) + vector dRes = fma(dVec1, dVec2, dVec3); + + hBuf[0] = hRes; + fBuf[0] = fRes; + dBuf[0] = dRes; + uBuf[0] = uRes; + lBuf[0] = lRes; +} diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll new file mode 100644 index 0000000000..8f9dcbbdbc --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll @@ -0,0 +1,434 @@ +; RUN: %dxopt %s -dxilgen -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWStructuredBuffer >" = type { <7 x half> } +%"class.RWStructuredBuffer >" = type { <7 x float> } +%"class.RWStructuredBuffer >" = type { <7 x double> } +%"class.RWStructuredBuffer >" = type { <7 x i32> } +%"class.RWStructuredBuffer >" = type { <7 x i32> } +%"class.RWStructuredBuffer >" = type { <7 x i64> } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" = external global %"class.RWStructuredBuffer >", align 2 +@"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" = external global %"class.RWStructuredBuffer >", align 8 +@"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" = external global %"class.RWStructuredBuffer >", align 8 + +; CHECK-LABEL: define void @main() +define void @main() #0 { +bb: + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 11, i32 0, i32 4) + ; CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 12, i32 0, i32 4) + ; CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 13, i32 0, i32 4) + ; CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0 + + %exp = alloca <7 x float>, align 4 + %tmp = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:23 col:30 + %tmp1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp) ; line:23 col:30 + %tmp2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:23 col:30 + %tmp3 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp2, i32 11) ; line:23 col:30 + %tmp4 = load <7 x float>, <7 x float>* %tmp3 ; line:23 col:30 + %tmp5 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:24 col:30 + %tmp6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp5) ; line:24 col:30 + %tmp7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:24 col:30 + %tmp8 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp7, i32 12) ; line:24 col:30 + %tmp9 = load <7 x float>, <7 x float>* %tmp8 ; line:24 col:30 + %tmp10 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:25 col:30 + %tmp11 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp10) ; line:25 col:30 + %tmp12 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp11, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:25 col:30 + %tmp13 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp12, i32 13) ; line:25 col:30 + %tmp14 = load <7 x float>, <7 x float>* %tmp13 ; line:25 col:30 + + ; Clamp operation. + ; CHECK: [[max:%.*]] = call <7 x float> @dx.op.binary.v7f32(i32 35, <7 x float> [[fvec1]], <7 x float> [[fvec2]]) + ; CHECK: call <7 x float> @dx.op.binary.v7f32(i32 36, <7 x float> [[max]], <7 x float> [[fvec3]]) + %tmp15 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 119, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:29 col:29 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 14, i32 0, i32 2) + ; CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 15, i32 0, i32 2) + ; CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 16, i32 0, i32 2) + ; CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0 + %tmp16 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:37 col:34 + %tmp17 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp16) ; line:37 col:34 + %tmp18 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp17, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:37 col:34 + %tmp19 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp18, i32 14) ; line:37 col:34 + %tmp20 = load <7 x half>, <7 x half>* %tmp19 ; line:37 col:34 + %tmp21 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:38 col:34 + %tmp22 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp21) ; line:38 col:34 + %tmp23 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp22, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:38 col:34 + %tmp24 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp23, i32 15) ; line:38 col:34 + %tmp25 = load <7 x half>, <7 x half>* %tmp24 ; line:38 col:34 + %tmp26 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:39 col:34 + %tmp27 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp26) ; line:39 col:34 + %tmp28 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp27, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:39 col:34 + %tmp29 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp28, i32 16) ; line:39 col:34 + %tmp30 = load <7 x half>, <7 x half>* %tmp29 ; line:39 col:34 + + ; Step operation. + ; CHECK: [[cmp:%.*]] = fcmp fast olt <7 x half> [[hvec2]], [[hvec1]] + ; CHECK: select <7 x i1> [[cmp]], <7 x half> zeroinitializer, <7 x half> + %tmp31 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>)"(i32 192, <7 x half> %tmp20, <7 x half> %tmp25) ; line:43 col:33 + + ; Exp operation. + ; CHECK: [[mul:%.*]] = fmul fast <7 x float> , [[fvec1]] + ; CHECK call <7 x float> @dx.op.unary.v7f32(i32 21, <7 x float> [[mul]]) + %tmp32 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 139, <7 x float> %tmp4) ; line:47 col:11 + %tmp33 = fadd <7 x float> %tmp15, %tmp32 ; line:47 col:8 + + ; Log operation. + ; CHECK: [[log:%.*]] = call <7 x half> @dx.op.unary.v7f16(i32 23, <7 x half> [[hvec1]]) + ; CHECK: fmul fast <7 x half> , [[log]] + %tmp34 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>)"(i32 159, <7 x half> %tmp20) ; line:51 col:11 + %tmp35 = fadd <7 x half> %tmp31, %tmp34 ; line:51 col:8 + + ; Smoothstep operation. + ; CHECK: [[sub1:%.*]] = fsub fast <7 x float> [[fvec2]], [[fvec1]] + ; CHECK: [[sub2:%.*]] = fsub fast <7 x float> [[fvec3]], [[fvec1]] + ; CHECK: [[div:%.*]] = fdiv fast <7 x float> [[sub2]], [[sub1]] + ; CHECK: [[sat:%.*]] = call <7 x float> @dx.op.unary.v7f32(i32 7, <7 x float> [[div]]) + ; CHECK: [[mul:%.*]] = fmul fast <7 x float> [[sat]], + ; CHECK: [[sub:%.*]] = fsub fast <7 x float> , [[mul]] + ; CHECK: [[mul:%.*]] = fmul fast <7 x float> [[sat]], [[sub]] + ; CHECK: fmul fast <7 x float> %Saturate, [[mul]] + %tmp36 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 189, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:61 col:11 + %tmp37 = fadd <7 x float> %tmp33, %tmp36 ; line:61 col:8 + + ; Radians operation. + ; CHECK: fmul fast <7 x float> , [[fvec3]] + %tmp38 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 176, <7 x float> %tmp14) ; line:66 col:11 + %tmp39 = fadd <7 x float> %tmp37, %tmp38 ; line:66 col:8 + store <7 x float> %tmp14, <7 x float>* %exp, align 4 ; line:77 col:22 + + ; Frexp operation. + ; CHECK: [[cmp:%.*]] = fcmp fast une <7 x float> [[fvec1]], zeroinitializer + ; CHECK: [[ext:%.*]] = sext <7 x i1> [[cmp]] to <7 x i32> + ; CHECK: [[bct:%.*]] = bitcast <7 x float> [[fvec1]] to <7 x i32> + ; CHECK: [[and:%.*]] = and <7 x i32> [[bct]], + ; CHECK: [[add:%.*]] = add <7 x i32> [[and]], + ; CHECK: [[and:%.*]] = and <7 x i32> [[add]], [[ext]] + ; CHECK: [[shr:%.*]] = ashr <7 x i32> [[and]], + ; CHECK: [[i2f:%.*]] = sitofp <7 x i32> [[shr]] to <7 x float> + ; CHECK: store <7 x float> [[i2f]], <7 x float>* %exp + ; CHECK: [[and:%.*]] = and <7 x i32> [[bct]], + ; CHECK: [[or:%.*]] = or <7 x i32> [[and]], + ; CHECK: [[and:%.*]] = and <7 x i32> [[or]], [[ext]] + ; CHECK: bitcast <7 x i32> [[and]] to <7 x float> + %tmp41 = call <7 x float> @"dx.hl.op..<7 x float> (i32, <7 x float>, <7 x float>*)"(i32 150, <7 x float> %tmp4, <7 x float>* %exp) ; line:78 col:11 + %tmp42 = fadd <7 x float> %tmp39, %tmp41 ; line:78 col:8 + %tmp43 = load <7 x float>, <7 x float>* %exp, align 4 ; line:79 col:11 + %tmp44 = fadd <7 x float> %tmp42, %tmp43 ; line:79 col:8 + + ; Lerp operation. + ; CHECK: [[sub:%.*]] = fsub fast <7 x half> [[hvec3]], [[hvec2]] + ; CHECK: fmul fast <7 x half> [[hvec1]], [[sub]] + %tmp45 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>, <7 x half>)"(i32 157, <7 x half> %tmp25, <7 x half> %tmp30, <7 x half> %tmp20) ; line:83 col:11 + %tmp46 = fadd <7 x half> %tmp35, %tmp45 ; line:83 col:8 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4) + ; CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 18, i32 0, i32 4) + ; CHECK: [[uvec2:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + %tmp47 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:90 col:29 + %tmp48 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp47) ; line:90 col:29 + %tmp49 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp48, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:90 col:29 + %tmp50 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp49, i32 17) ; line:90 col:29 + %tmp51 = load <7 x i32>, <7 x i32>* %tmp50 ; line:90 col:29 + %tmp52 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:91 col:29 + %tmp53 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp52) ; line:91 col:29 + %tmp54 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp53, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:91 col:29 + %tmp55 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp54, i32 18) ; line:91 col:29 + %tmp56 = load <7 x i32>, <7 x i32>* %tmp55 ; line:91 col:29 + + ; Unsigned int sign operation. + ; CHECK: [[cmp:%.*]] = icmp ne <7 x i32> [[uvec2]], zeroinitializer + ; CHECK: zext <7 x i1> [[cmp]] to <7 x i32> + %tmp57 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>)"(i32 355, <7 x i32> %tmp56) ; line:96 col:12 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i64 @dx.op.rawBufferVectorLoad.v7i64(i32 303, %dx.types.Handle {{%.*}}, i32 19, i32 0, i32 8) + ; CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.v7i64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i64 @dx.op.rawBufferVectorLoad.v7i64(i32 303, %dx.types.Handle {{%.*}}, i32 20, i32 0, i32 8) + ; CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.v7i64 [[ld]], 0 + %tmp58 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:102 col:32 + %tmp59 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp58) ; line:102 col:32 + %tmp60 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp59, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:102 col:32 + %tmp61 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp60, i32 19) ; line:102 col:32 + %tmp62 = load <7 x i64>, <7 x i64>* %tmp61 ; line:102 col:32 + %tmp63 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:103 col:32 + %tmp64 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp63) ; line:103 col:32 + %tmp65 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp64, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:103 col:32 + %tmp66 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp65, i32 20) ; line:103 col:32 + %tmp67 = load <7 x i64>, <7 x i64>* %tmp66 ; line:103 col:32 + + ; Signed int sign operation. + ; CHECK: [[lt1:%.*]] = icmp slt <7 x i64> zeroinitializer, [[lvec2]] + ; CHECK: [[lt2:%.*]] = icmp slt <7 x i64> [[lvec2]], zeroinitializer + ; CHECK: [[ilt1:%.*]] = zext <7 x i1> [[lt1]] to <7 x i32> + ; CHECK: [[ilt2:%.*]] = zext <7 x i1> [[lt2]] to <7 x i32> + ; CHECK: sub <7 x i32> [[ilt1]], [[ilt2]] + %tmp68 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i64>)"(i32 185, <7 x i64> %tmp67) ; line:110 col:12 + %tmp69 = mul <7 x i32> %tmp57, %tmp68 ; line:110 col:9 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 21, i32 0, i32 4) + ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer + ; CHECK: [[vec1:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32> + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 22, i32 0, i32 4) + ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer + ; CHECK: [[vec2:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32> + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 23, i32 0, i32 4) + ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0 + ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer + ; CHECK: [[vec3:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32> + %tmp70 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:126 col:29 + %tmp71 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp70) ; line:126 col:29 + %tmp72 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp71, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:126 col:29 + %tmp73 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp72, i32 21) ; line:126 col:29 + %tmp74 = load <7 x i32>, <7 x i32>* %tmp73 ; line:126 col:29 + %tmp75 = icmp ne <7 x i32> %tmp74, zeroinitializer ; line:126 col:29 + %tmp76 = zext <7 x i1> %tmp75 to <7 x i32> ; line:126 col:21 + %tmp77 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:127 col:29 + %tmp78 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp77) ; line:127 col:29 + %tmp79 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp78, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:127 col:29 + %tmp80 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp79, i32 22) ; line:127 col:29 + %tmp81 = load <7 x i32>, <7 x i32>* %tmp80 ; line:127 col:29 + %tmp82 = icmp ne <7 x i32> %tmp81, zeroinitializer ; line:127 col:29 + %tmp83 = zext <7 x i1> %tmp82 to <7 x i32> ; line:127 col:21 + %tmp84 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:128 col:29 + %tmp85 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp84) ; line:128 col:29 + %tmp86 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp85, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:128 col:29 + %tmp87 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp86, i32 23) ; line:128 col:29 + %tmp88 = load <7 x i32>, <7 x i32>* %tmp87 ; line:128 col:29 + %tmp89 = icmp ne <7 x i32> %tmp88, zeroinitializer ; line:128 col:29 + %tmp90 = zext <7 x i1> %tmp89 to <7 x i32> ; line:128 col:21 + + + ; Or() operation. + ; CHECK: [[bvec2:%.*]] = icmp ne <7 x i32> [[vec2]], zeroinitializer + ; CHECK: [[bvec1:%.*]] = icmp ne <7 x i32> [[vec1]], zeroinitializer + ; CHECK: or <7 x i1> [[bvec1]], [[bvec2]] + %tmp91 = icmp ne <7 x i32> %tmp83, zeroinitializer ; line:133 col:21 + %tmp92 = icmp ne <7 x i32> %tmp76, zeroinitializer ; line:133 col:14 + %tmp93 = call <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32 169, <7 x i1> %tmp92, <7 x i1> %tmp91) ; line:133 col:11 + %tmp94 = zext <7 x i1> %tmp93 to <7 x i32> ; line:133 col:11 + %tmp95 = add <7 x i32> %tmp69, %tmp94 ; line:133 col:8 + + ; And() operation. + ; CHECK: [[bvec3:%.*]] = icmp ne <7 x i32> [[vec3]], zeroinitializer + ; CHECK: [[bvec2:%.*]] = icmp ne <7 x i32> [[vec2]], zeroinitializer + ; CHECK: and <7 x i1> [[bvec2]], [[bvec3]] + %tmp96 = icmp ne <7 x i32> %tmp90, zeroinitializer ; line:137 col:22 + %tmp97 = icmp ne <7 x i32> %tmp83, zeroinitializer ; line:137 col:15 + %tmp98 = call <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32 106, <7 x i1> %tmp97, <7 x i1> %tmp96) ; line:137 col:11 + %tmp99 = zext <7 x i1> %tmp98 to <7 x i32> ; line:137 col:11 + %tmp100 = add <7 x i32> %tmp95, %tmp99 ; line:137 col:8 + + ; Select() operation. + ; CHECK: [[bvec3:%.*]] = icmp ne <7 x i32> [[vec3]], zeroinitializer + ; CHECK: select <7 x i1> [[bvec3]], <7 x i64> [[lvec1]], <7 x i64> [[lvec2]] + %tmp101 = icmp ne <7 x i32> %tmp90, zeroinitializer ; line:140 col:38 + %tmp102 = call <7 x i64> @"dx.hl.op.rn.<7 x i64> (i32, <7 x i1>, <7 x i64>, <7 x i64>)"(i32 184, <7 x i1> %tmp101, <7 x i64> %tmp62, <7 x i64> %tmp67) ; line:140 col:31 + %tmp103 = call float @"dx.hl.op.rn.float (i32, <7 x float>, <7 x float>)"(i32 134, <7 x float> %tmp4, <7 x float> %tmp9) ; line:152 col:11 + + ; Dot operation. + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 0 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 0 + ; CHECK: [[mul:%.*]] = fmul fast float [[el1]], [[el2]] + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 1 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 1 + ; CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mul]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 2 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 2 + ; CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad1]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 3 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 3 + ; CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad2]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 4 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 4 + ; CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad3]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 5 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 5 + ; CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad4]]) + ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 6 + ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 6 + ; CHECK: call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad5]]) + %tmp104 = insertelement <7 x float> undef, float %tmp103, i32 0 ; line:152 col:11 + %tmp105 = shufflevector <7 x float> %tmp104, <7 x float> undef, <7 x i32> zeroinitializer ; line:152 col:11 + %tmp106 = fadd <7 x float> %tmp44, %tmp105 ; line:152 col:8 + + ; Atan operation. + ; CHECK: call <7 x float> @dx.op.unary.v7f32(i32 17, <7 x float> [[fvec1]]) + %tmp107 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 116, <7 x float> %tmp4) ; line:155 col:11 + %tmp108 = fadd <7 x float> %tmp106, %tmp107 ; line:155 col:8 + + ; Min operation. + ; CHECK: call <7 x i32> @dx.op.binary.v7i32(i32 40, <7 x i32> [[uvec1]], <7 x i32> [[uvec2]]) + %tmp109 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>, <7 x i32>)"(i32 353, <7 x i32> %tmp51, <7 x i32> %tmp56) ; line:158 col:11 + %tmp110 = add <7 x i32> %tmp100, %tmp109 ; line:158 col:8 + + ; Mad operation. + ; CHECK: call <7 x float> @dx.op.tertiary.v7f32(i32 46, <7 x float> [[fvec1]], <7 x float> [[fvec2]], <7 x float> [[fvec3]]) + %tmp111 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 162, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:161 col:11 + %tmp112 = fadd <7 x float> %tmp108, %tmp111 ; line:161 col:8 + + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8) + ; CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8) + ; CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0 + ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 26, i32 0, i32 8) + ; CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0 + %tmp113 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:169 col:31 + %tmp114 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp113) ; line:169 col:31 + %tmp115 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp114, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:169 col:31 + %tmp116 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp115, i32 24) ; line:169 col:31 + %tmp117 = load <7 x double>, <7 x double>* %tmp116 ; line:169 col:31 + %tmp118 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:170 col:31 + %tmp119 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp118) ; line:170 col:31 + %tmp120 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp119, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:170 col:31 + %tmp121 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp120, i32 25) ; line:170 col:31 + %tmp122 = load <7 x double>, <7 x double>* %tmp121 ; line:170 col:31 + %tmp123 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:171 col:31 + %tmp124 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp123) ; line:171 col:31 + %tmp125 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp124, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:171 col:31 + %tmp126 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp125, i32 26) ; line:171 col:31 + %tmp127 = load <7 x double>, <7 x double>* %tmp126 ; line:171 col:31 + + ; FMA operation. + ; CHECK: call <7 x double> @dx.op.tertiary.v7f64(i32 47, <7 x double> [[dvec1]], <7 x double> [[dvec2]], <7 x double> [[dvec3]]) + %tmp128 = call <7 x double> @"dx.hl.op.rn.<7 x double> (i32, <7 x double>, <7 x double>, <7 x double>)"(i32 147, <7 x double> %tmp117, <7 x double> %tmp122, <7 x double> %tmp127) ; line:174 col:30 + %tmp129 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:176 col:3 + %tmp130 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp129) ; line:176 col:3 + %tmp131 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp130, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:176 col:3 + %tmp132 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp131, i32 0) ; line:176 col:3 + store <7 x half> %tmp46, <7 x half>* %tmp132 ; line:176 col:11 + %tmp133 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:177 col:3 + %tmp134 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp133) ; line:177 col:3 + %tmp135 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp134, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:177 col:3 + %tmp136 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp135, i32 0) ; line:177 col:3 + store <7 x float> %tmp112, <7 x float>* %tmp136 ; line:177 col:11 + %tmp137 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:178 col:3 + %tmp138 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp137) ; line:178 col:3 + %tmp139 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp138, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:178 col:3 + %tmp140 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp139, i32 0) ; line:178 col:3 + store <7 x double> %tmp128, <7 x double>* %tmp140 ; line:178 col:11 + %tmp141 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:179 col:3 + %tmp142 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp141) ; line:179 col:3 + %tmp143 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp142, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:179 col:3 + %tmp144 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp143, i32 0) ; line:179 col:3 + store <7 x i32> %tmp110, <7 x i32>* %tmp144 ; line:179 col:11 + %tmp145 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:180 col:3 + %tmp146 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %tmp145) ; line:180 col:3 + %tmp147 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %tmp146, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer >" zeroinitializer) ; line:180 col:3 + %tmp148 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp147, i32 0) ; line:180 col:3 + store <7 x i64> %tmp102, <7 x i64>* %tmp148 ; line:180 col:11 + ret void ; line:181 col:1 +} + +declare <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32, <7 x float>, <7 x float>, <7 x float>) #1 +declare <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>)"(i32, <7 x half>, <7 x half>) #1 +declare <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32, <7 x float>) #1 +declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>)"(i32, <7 x half>) #1 +declare <7 x float> @"dx.hl.op..<7 x float> (i32, <7 x float>, <7 x float>*)"(i32, <7 x float>, <7 x float>*) #0 +declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>, <7 x half>)"(i32, <7 x half>, <7 x half>, <7 x half>) #1 +declare <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>)"(i32, <7 x i32>) #1 +declare <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i64>)"(i32, <7 x i64>) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32, <7 x i1>, <7 x i1>) #1 +declare <7 x i64> @"dx.hl.op.rn.<7 x i64> (i32, <7 x i1>, <7 x i64>, <7 x i64>)"(i32, <7 x i1>, <7 x i64>, <7 x i64>) #1 +declare float @"dx.hl.op.rn.float (i32, <7 x float>, <7 x float>)"(i32, <7 x float>, <7 x float>) #1 +declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>, <7 x i32>)"(i32, <7 x i32>, <7 x i32>) #1 +declare <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 +declare <7 x double> @"dx.hl.op.rn.<7 x double> (i32, <7 x double>, <7 x double>, <7 x double>)"(i32, <7 x double>, <7 x double>, <7 x double>) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!pauseresume = !{!1} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !36} +!dx.entryPoints = !{!40} +!dx.fnprops = !{!52} +!dx.options = !{!53, !54} + +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!3 = !{i32 1, i32 9} +!4 = !{!"cs", i32 6, i32 9} +!5 = !{i32 0, %"class.RWStructuredBuffer >" undef, !6, %"class.RWStructuredBuffer >" undef, !11, %"class.RWStructuredBuffer >" undef, !16, %"class.RWStructuredBuffer >" undef, !21, %"class.RWStructuredBuffer >" undef, !26, %"class.RWStructuredBuffer >" undef, !31} +!6 = !{i32 14, !7, !8} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 8, i32 13, i32 7} +!8 = !{i32 0, !9} +!9 = !{!10} +!10 = !{i32 0, <7 x half> undef} +!11 = !{i32 28, !12, !13} +!12 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9, i32 13, i32 7} +!13 = !{i32 0, !14} +!14 = !{!15} +!15 = !{i32 0, <7 x float> undef} +!16 = !{i32 56, !17, !18} +!17 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 10, i32 13, i32 7} +!18 = !{i32 0, !19} +!19 = !{!20} +!20 = !{i32 0, <7 x double> undef} +!21 = !{i32 28, !22, !23} +!22 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 1, i32 13, i32 7} +!23 = !{i32 0, !24} +!24 = !{!25} +!25 = !{i32 0, <7 x i1> undef} +!26 = !{i32 28, !27, !28} +!27 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5, i32 13, i32 7} +!28 = !{i32 0, !29} +!29 = !{!30} +!30 = !{i32 0, <7 x i32> undef} +!31 = !{i32 56, !32, !33} +!32 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 6, i32 13, i32 7} +!33 = !{i32 0, !34} +!34 = !{!35} +!35 = !{i32 0, <7 x i64> undef} +!36 = !{i32 1, void ()* @main, !37} +!37 = !{!38} +!38 = !{i32 1, !39, !39} +!39 = !{} +!40 = !{void ()* @main, !"main", null, !41, null} +!41 = !{null, !42, null, null} +!42 = !{!43, !45, !47, !49, !50, !51} +!43 = !{i32 0, %"class.RWStructuredBuffer >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A", !"hBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !44} +!44 = !{i32 1, i32 14} +!45 = !{i32 1, %"class.RWStructuredBuffer >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A", !"fBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46} +!46 = !{i32 1, i32 28} +!47 = !{i32 2, %"class.RWStructuredBuffer >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A", !"dBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !48} +!48 = !{i32 1, i32 56} +!49 = !{i32 3, %"class.RWStructuredBuffer >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A", !"bBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46} +!50 = !{i32 4, %"class.RWStructuredBuffer >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A", !"uBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46} +!51 = !{i32 5, %"class.RWStructuredBuffer >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A", !"lBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !48} +!52 = !{void ()* @main, i32 5, i32 8, i32 1, i32 1} +!53 = !{i32 0} +!54 = !{i32 -1} +!59 = !{!60, !60, i64 0} +!60 = !{!"omnipotent char", !61, i64 0} +!61 = !{!"Simple C/C++ TBAA"} diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 0008b752b1..a6cc52df1a 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1503,7 +1503,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the " + i, - "hfd", + "hfd<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1537,7 +1537,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the " + i, - "hf", + "hf<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1554,7 +1554,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the reverse bit pattern of the input value", - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1601,7 +1601,7 @@ def UFI(name, **mappings): next_op_idx, "Binary", "returns the " + i + " of the input values", - "hfd", + "hfd<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1619,7 +1619,7 @@ def UFI(name, **mappings): next_op_idx, "Binary", "returns the " + i + " of the input values", - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1674,7 +1674,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", - "hfd", + "hfd<", "rn", [ db_dxil_param( @@ -1691,7 +1691,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", - "d", + "d<", "rn", [ db_dxil_param( @@ -1715,7 +1715,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs an integral " + i, - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "the operation result"), @@ -2608,7 +2608,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -2626,7 +2626,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -2644,7 +2644,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf", + "hf<", "rn", [ db_dxil_param( @@ -2662,7 +2662,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf", + "hf<", "rn", [ db_dxil_param( From 90bfb669fd98e35993cc11bf3f0ae04b2194196e Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Thu, 10 Apr 2025 18:21:09 +0200 Subject: [PATCH 76/88] [SER] 'reordercoherent' HLSL attribute and DXIL encoding (#7250) Specification: https://github.com/microsoft/hlsl-specs/blob/main/proposals/0027-shader-execution-reordering.md 'reordercoherent' encoding hlsl-specs PR: https://github.com/microsoft/hlsl-specs/pull/453 DXC SER implementation tracker: #7214 --- include/dxc/DXIL/DxilMetadataHelper.h | 1 + include/dxc/DXIL/DxilResource.h | 3 + include/dxc/DXIL/DxilResourceProperties.h | 3 +- .../dxc/DxilContainer/RDAT_LibraryTypes.inl | 1 + lib/DXIL/DxilMetadataHelper.cpp | 10 ++ lib/DXIL/DxilResource.cpp | 8 +- lib/DXIL/DxilResourceProperties.cpp | 3 + lib/DxilContainer/DxilContainerAssembler.cpp | 3 + lib/DxilPIXPasses/PixPassHelpers.cpp | 1 + lib/HLSL/DxilCondenseResources.cpp | 4 +- lib/HLSL/DxilGenerationPass.cpp | 1 + lib/HLSL/DxilPatchShaderRecordBindings.cpp | 1 + lib/HLSL/HLModule.cpp | 1 + tools/clang/include/clang/AST/HlslTypes.h | 1 + tools/clang/include/clang/AST/Type.h | 3 +- tools/clang/include/clang/Basic/Attr.td | 6 ++ .../clang/Basic/DiagnosticSemaKinds.td | 20 +++- .../clang/include/clang/Basic/TokenKinds.def | 1 + tools/clang/include/clang/Sema/Sema.h | 5 +- tools/clang/include/clang/Sema/SemaHLSL.h | 3 +- tools/clang/lib/AST/HlslTypes.cpp | 12 +++ tools/clang/lib/AST/Type.cpp | 4 +- tools/clang/lib/AST/TypePrinter.cpp | 3 + tools/clang/lib/CodeGen/CGHLSLMS.cpp | 51 +++++---- .../lib/CodeGen/CGHLSLMSFinishCodeGen.cpp | 9 +- tools/clang/lib/CodeGen/CGHLSLMSHelper.h | 3 +- tools/clang/lib/CodeGen/CGHLSLRuntime.h | 2 +- tools/clang/lib/CodeGen/CGStmt.cpp | 4 +- tools/clang/lib/Parse/ParseDecl.cpp | 3 + tools/clang/lib/Parse/ParseExpr.cpp | 2 + tools/clang/lib/Parse/ParseStmt.cpp | 1 + tools/clang/lib/Parse/ParseTentative.cpp | 1 + tools/clang/lib/Sema/SemaChecking.cpp | 6 +- tools/clang/lib/Sema/SemaDecl.cpp | 7 +- tools/clang/lib/Sema/SemaDeclAttr.cpp | 11 ++ tools/clang/lib/Sema/SemaHLSL.cpp | 82 +++++++++++--- tools/clang/lib/Sema/SemaStmt.cpp | 2 +- tools/clang/lib/Sema/SemaType.cpp | 31 ++++-- .../attributes/reordercoherent_for_arg.hlsl | 19 ++++ .../hlsl/attributes/reordercoherent_uav.hlsl | 17 +++ .../attributes/reordercoherent_uav_array.hlsl | 16 +++ .../attributes/reordercoherent_ast.hlsl | 17 +++ ...dercoherent-globallycoherent-mismatch.hlsl | 96 +++++++++++++++++ .../SemaHLSL/reordercoherent-implied.hlsl | 41 +++++++ .../SemaHLSL/reordercoherent-mismatch.hlsl | 101 ++++++++++++++++++ .../SemaHLSL/reordercoherent-type-errors.hlsl | 26 +++++ .../tools/dxcompiler/dxcdisassembler.cpp | 7 +- .../unittests/HLSL/DxilContainerTest.cpp | 8 +- 48 files changed, 585 insertions(+), 76 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_for_arg.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav_array.hlsl create mode 100644 tools/clang/test/SemaHLSL/attributes/reordercoherent_ast.hlsl create mode 100644 tools/clang/test/SemaHLSL/reordercoherent-globallycoherent-mismatch.hlsl create mode 100644 tools/clang/test/SemaHLSL/reordercoherent-implied.hlsl create mode 100644 tools/clang/test/SemaHLSL/reordercoherent-mismatch.hlsl create mode 100644 tools/clang/test/SemaHLSL/reordercoherent-type-errors.hlsl diff --git a/include/dxc/DXIL/DxilMetadataHelper.h b/include/dxc/DXIL/DxilMetadataHelper.h index 9df155e6e7..e17db016d8 100644 --- a/include/dxc/DXIL/DxilMetadataHelper.h +++ b/include/dxc/DXIL/DxilMetadataHelper.h @@ -233,6 +233,7 @@ class DxilMDHelper { static const unsigned kDxilStructuredBufferElementStrideTag = 1; static const unsigned kDxilSamplerFeedbackKindTag = 2; static const unsigned kDxilAtomic64UseTag = 3; + static const unsigned kDxilReorderCoherentTag = 4; // Type system. static const char kDxilTypeSystemMDName[]; diff --git a/include/dxc/DXIL/DxilResource.h b/include/dxc/DXIL/DxilResource.h index 49db65caed..dcf70333da 100644 --- a/include/dxc/DXIL/DxilResource.h +++ b/include/dxc/DXIL/DxilResource.h @@ -63,6 +63,8 @@ class DxilResource : public DxilResourceBase { bool IsGloballyCoherent() const; void SetGloballyCoherent(bool b); + bool IsReorderCoherent() const; + void SetReorderCoherent(bool b); bool HasCounter() const; void SetHasCounter(bool b); @@ -97,6 +99,7 @@ class DxilResource : public DxilResourceBase { CompType m_CompType; DXIL::SamplerFeedbackType m_SamplerFeedbackType; bool m_bGloballyCoherent; + bool m_bReorderCoherent; bool m_bHasCounter; bool m_bROV; bool m_bHasAtomic64Use; diff --git a/include/dxc/DXIL/DxilResourceProperties.h b/include/dxc/DXIL/DxilResourceProperties.h index 21a705f077..2f4ff58969 100644 --- a/include/dxc/DXIL/DxilResourceProperties.h +++ b/include/dxc/DXIL/DxilResourceProperties.h @@ -47,7 +47,8 @@ struct DxilResourceProperties { uint8_t SamplerCmpOrHasCounter : 1; // BYTE 2 - uint8_t Reserved2; + uint8_t IsReorderCoherent : 1; + uint8_t Reserved2 : 7; // BYTE 3 uint8_t Reserved3; diff --git a/include/dxc/DxilContainer/RDAT_LibraryTypes.inl b/include/dxc/DxilContainer/RDAT_LibraryTypes.inl index 132d272a8e..4b58b406c2 100644 --- a/include/dxc/DxilContainer/RDAT_LibraryTypes.inl +++ b/include/dxc/DxilContainer/RDAT_LibraryTypes.inl @@ -22,6 +22,7 @@ RDAT_ENUM_START(DxilResourceFlag, uint32_t) RDAT_ENUM_VALUE(UAVRasterizerOrderedView, 1 << 2) RDAT_ENUM_VALUE(DynamicIndexing, 1 << 3) RDAT_ENUM_VALUE(Atomics64Use, 1 << 4) + RDAT_ENUM_VALUE(UAVReorderCoherent, 1 << 5) RDAT_ENUM_END() RDAT_ENUM_START(DxilShaderStageFlags, uint32_t) diff --git a/lib/DXIL/DxilMetadataHelper.cpp b/lib/DXIL/DxilMetadataHelper.cpp index 19d199ee29..c1282a980a 100644 --- a/lib/DXIL/DxilMetadataHelper.cpp +++ b/lib/DXIL/DxilMetadataHelper.cpp @@ -3110,6 +3110,13 @@ void DxilExtraPropertyHelper::EmitUAVProperties( DxilMDHelper::kDxilAtomic64UseTag, m_Ctx)); MDVals.emplace_back(DxilMDHelper::Uint32ToConstMD((unsigned)true, m_Ctx)); } + // Whether resource is reordercoherent. + if (DXIL::CompareVersions(m_ValMajor, m_ValMinor, 1, 9) >= 0 && + UAV.IsReorderCoherent()) { + MDVals.emplace_back(DxilMDHelper::Uint32ToConstMD( + DxilMDHelper::kDxilReorderCoherentTag, m_Ctx)); + MDVals.emplace_back(DxilMDHelper::BoolToConstMD(true, m_Ctx)); + } } void DxilExtraPropertyHelper::LoadUAVProperties(const MDOperand &MDO, @@ -3147,6 +3154,9 @@ void DxilExtraPropertyHelper::LoadUAVProperties(const MDOperand &MDO, case DxilMDHelper::kDxilAtomic64UseTag: UAV.SetHasAtomic64Use(DxilMDHelper::ConstMDToBool(MDO)); break; + case DxilMDHelper::kDxilReorderCoherentTag: + UAV.SetReorderCoherent(DxilMDHelper::ConstMDToBool(MDO)); + break; default: DXASSERT(false, "Unknown resource record tag"); m_bExtraMetadata = true; diff --git a/lib/DXIL/DxilResource.cpp b/lib/DXIL/DxilResource.cpp index 3ab71030bb..0e6f1df877 100644 --- a/lib/DXIL/DxilResource.cpp +++ b/lib/DXIL/DxilResource.cpp @@ -25,8 +25,8 @@ namespace hlsl { DxilResource::DxilResource() : DxilResourceBase(DxilResourceBase::Class::Invalid), m_SampleCount(0), m_ElementStride(0), m_SamplerFeedbackType((DXIL::SamplerFeedbackType)0), - m_bGloballyCoherent(false), m_bHasCounter(false), m_bROV(false), - m_bHasAtomic64Use(false) {} + m_bGloballyCoherent(false), m_bReorderCoherent(false), + m_bHasCounter(false), m_bROV(false), m_bHasAtomic64Use(false) {} CompType DxilResource::GetCompType() const { return m_CompType; } @@ -74,6 +74,10 @@ bool DxilResource::IsGloballyCoherent() const { return m_bGloballyCoherent; } void DxilResource::SetGloballyCoherent(bool b) { m_bGloballyCoherent = b; } +bool DxilResource::IsReorderCoherent() const { return m_bReorderCoherent; } + +void DxilResource::SetReorderCoherent(bool b) { m_bReorderCoherent = b; } + bool DxilResource::HasCounter() const { return m_bHasCounter; } void DxilResource::SetHasCounter(bool b) { m_bHasCounter = b; } diff --git a/lib/DXIL/DxilResourceProperties.cpp b/lib/DXIL/DxilResourceProperties.cpp index 2d1bf95014..54ab24f36e 100644 --- a/lib/DXIL/DxilResourceProperties.cpp +++ b/lib/DXIL/DxilResourceProperties.cpp @@ -190,6 +190,7 @@ DxilResourceProperties loadPropsFromResourceBase(const DxilResourceBase *Res) { RP.Basic.IsUAV = true; RP.Basic.ResourceKind = (uint8_t)Res->GetKind(); RP.Basic.IsGloballyCoherent = UAV->IsGloballyCoherent(); + RP.Basic.IsReorderCoherent = UAV->IsReorderCoherent(); RP.Basic.SamplerCmpOrHasCounter = UAV->HasCounter(); RP.Basic.IsROV = UAV->IsROV(); SetResProperties(*UAV); @@ -234,6 +235,8 @@ DxilResourceProperties tryMergeProps(DxilResourceProperties curProps, prevProps.Basic.IsGloballyCoherent) { curProps.Basic.IsGloballyCoherent = prevProps.Basic.IsGloballyCoherent; } + if (curProps.Basic.IsReorderCoherent != prevProps.Basic.IsReorderCoherent) + curProps.Basic.IsReorderCoherent = prevProps.Basic.IsReorderCoherent; } if (curProps.Basic.ResourceKind == (uint8_t)DXIL::ResourceKind::CBuffer) { diff --git a/lib/DxilContainer/DxilContainerAssembler.cpp b/lib/DxilContainer/DxilContainerAssembler.cpp index f0d7bf6d23..48d8872733 100644 --- a/lib/DxilContainer/DxilContainerAssembler.cpp +++ b/lib/DxilContainer/DxilContainerAssembler.cpp @@ -1057,6 +1057,9 @@ class DxilRDATWriter : public DxilPartWriter { if (pRes->IsGloballyCoherent()) info.Flags |= static_cast(RDAT::DxilResourceFlag::UAVGloballyCoherent); + if (pRes->IsReorderCoherent()) + info.Flags |= + static_cast(RDAT::DxilResourceFlag::UAVReorderCoherent); if (pRes->IsROV()) info.Flags |= static_cast( RDAT::DxilResourceFlag::UAVRasterizerOrderedView); diff --git a/lib/DxilPIXPasses/PixPassHelpers.cpp b/lib/DxilPIXPasses/PixPassHelpers.cpp index 65d9a660cc..c7c99cf763 100644 --- a/lib/DxilPIXPasses/PixPassHelpers.cpp +++ b/lib/DxilPIXPasses/PixPassHelpers.cpp @@ -324,6 +324,7 @@ hlsl::DxilResource *CreateGlobalUAVResource(hlsl::DxilModule &DM, (unsigned int)-2); // This is the reserved-for-tools register space pUAV->SetSampleCount(0); // This is what compiler generates for a raw UAV pUAV->SetGloballyCoherent(false); + pUAV->SetReorderCoherent(false); pUAV->SetHasCounter(false); pUAV->SetCompType( CompType::getInvalid()); // This is what compiler generates for a raw UAV diff --git a/lib/HLSL/DxilCondenseResources.cpp b/lib/HLSL/DxilCondenseResources.cpp index 82d5e14d00..529c203bdc 100644 --- a/lib/HLSL/DxilCondenseResources.cpp +++ b/lib/HLSL/DxilCondenseResources.cpp @@ -2061,7 +2061,8 @@ void DxilLowerCreateHandleForLib::ReplaceResourceUserWithHandle( }; // Search all users for update counter - bool updateAnnotateHandle = res.IsGloballyCoherent(); + bool updateAnnotateHandle = + res.IsGloballyCoherent() || res.IsReorderCoherent(); if (!res.HasCounter()) { for (User *U : handle->users()) { if (IsDxilOp(U, hlsl::OP::OpCode::BufferUpdateCounter)) { @@ -2321,6 +2322,7 @@ void InitTBuffer(const DxilCBuffer *pSource, DxilResource *pDest) { pDest->SetSampleCount(0); pDest->SetElementStride(0); pDest->SetGloballyCoherent(false); + pDest->SetReorderCoherent(false); pDest->SetHasCounter(false); pDest->SetRW(false); pDest->SetROV(false); diff --git a/lib/HLSL/DxilGenerationPass.cpp b/lib/HLSL/DxilGenerationPass.cpp index 7d902a4ed7..c3a6ad7dfc 100644 --- a/lib/HLSL/DxilGenerationPass.cpp +++ b/lib/HLSL/DxilGenerationPass.cpp @@ -88,6 +88,7 @@ void InitResource(const DxilResource *pSource, DxilResource *pDest) { pDest->SetSampleCount(pSource->GetSampleCount()); pDest->SetElementStride(pSource->GetElementStride()); pDest->SetGloballyCoherent(pSource->IsGloballyCoherent()); + pDest->SetReorderCoherent(pSource->IsReorderCoherent()); pDest->SetHasCounter(pSource->HasCounter()); pDest->SetRW(pSource->IsRW()); pDest->SetROV(pSource->IsROV()); diff --git a/lib/HLSL/DxilPatchShaderRecordBindings.cpp b/lib/HLSL/DxilPatchShaderRecordBindings.cpp index 1873dcbcc4..e07a41a5c0 100644 --- a/lib/HLSL/DxilPatchShaderRecordBindings.cpp +++ b/lib/HLSL/DxilPatchShaderRecordBindings.cpp @@ -341,6 +341,7 @@ unsigned int DxilPatchShaderRecordBindings::AddHandle( if (pHandle) { pHandle->SetGloballyCoherent(false); + pHandle->SetReorderCoherent(false); pHandle->SetHasCounter(false); pHandle->SetCompType(CompType::getF32()); // TODO: Need to handle all types } diff --git a/lib/HLSL/HLModule.cpp b/lib/HLSL/HLModule.cpp index a67877ef3e..bab6e23a30 100644 --- a/lib/HLSL/HLModule.cpp +++ b/lib/HLSL/HLModule.cpp @@ -700,6 +700,7 @@ HLModule::AddResourceWithGlobalVariableAndProps(llvm::Constant *GV, Res->SetRW(true); Res->SetROV(RP.Basic.IsROV); Res->SetGloballyCoherent(RP.Basic.IsGloballyCoherent); + Res->SetReorderCoherent(RP.Basic.IsReorderCoherent); Res->SetHasCounter(RP.Basic.SamplerCmpOrHasCounter); Res->SetKind(RK); Res->SetGlobalSymbol(GV); diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index ab29e4bde7..3a02824b3a 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -470,6 +470,7 @@ bool IsHLSLUnsigned(clang::QualType type); bool IsHLSLMinPrecision(clang::QualType type); bool HasHLSLUNormSNorm(clang::QualType type, bool *pIsSNorm = nullptr); bool HasHLSLGloballyCoherent(clang::QualType type); +bool HasHLSLReorderCoherent(clang::QualType type); bool IsHLSLInputPatchType(clang::QualType type); bool IsHLSLOutputPatchType(clang::QualType type); bool IsHLSLPointStreamType(clang::QualType type); diff --git a/tools/clang/include/clang/AST/Type.h b/tools/clang/include/clang/AST/Type.h index f393f88ce9..2c96bbc295 100644 --- a/tools/clang/include/clang/AST/Type.h +++ b/tools/clang/include/clang/AST/Type.h @@ -3652,7 +3652,8 @@ class AttributedType : public Type, public llvm::FoldingSetNode { attr_hlsl_row_major, attr_hlsl_column_major, attr_hlsl_globallycoherent, - // HLSL Change Ends + attr_hlsl_reordercoherent, + // HLSL Change Ends }; private: diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index 9c117fb3ce..2518423565 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -854,6 +854,12 @@ def HLSLGloballyCoherent : InheritableAttr { let Documentation = [Undocumented]; } +def HLSLReorderCoherent : InheritableAttr { + let Spellings = [CXX11<"", "reordercoherent", 2015>]; + let Subjects = SubjectList<[Var, Function]>; + let Documentation = [Undocumented]; +} + def HLSLShader : InheritableAttr { let Spellings = [CXX11<"", "shader", 2017>]; let Args = [StringArgument<"stage">]; // one of compute, pixel, vertex, hull, domain, geometry, node diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 4f4dc28a4c..21a1b707c6 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7706,8 +7706,10 @@ def err_hlsl_varmodifierna : Error< "%0 is not a valid modifier for a %1">; def err_hlsl_varmodifierna_decltype : Error< "%0 is not a valid modifier for a declaration of type %1">; -def note_hlsl_globallycoherent_applies_to : Note< - "'globallycoherent' can only be applied to UAV or RWDispatchNodeInputRecord objects">; +def note_hlsl_coherence_applies_to : Note< + "'%select{reordercoherent|globallycoherent}0' can only be applied to UAV%select{| or RWDispatchNodeInputRecord}0 objects">; +def warn_hlsl_gc_implies_rc_attribute : Warning< + "attribute 'reordercoherent' implied by 'globallycoherent' in %0. 'reordercoherent' ignored.">; def err_hlsl_varmodifiersna : Error< "%0 and %1 cannot be used together for a %2">; def err_hlsl_vla : Error< // Patterened after err_opencl_vla @@ -7756,9 +7758,17 @@ def warn_hlsl_semantic_attribute_position_misuse_hint: Warning< def warn_hlsl_unary_negate_unsigned : Warning< "unary negate of unsigned value is still unsigned">, InGroup, DefaultWarn; -def warn_hlsl_impcast_glc_mismatch : Warning< - "implicit conversion from %0 to %1 %select{loses|adds}2 globallycoherent annotation">, - InGroup, DefaultWarn; +def warn_hlsl_impcast_coherence_mismatch : Warning< + "implicit conversion from %0 to %1 %select{" + "demotes globallycoherent to reordercoherent|" + "promotes reordercoherent to globallycoherent|" + "loses reordercoherent|" + "loses globallycoherent|" + "adds reordercoherent|" + "adds globallycoherent}2 annotation">, + InGroup; +def warn_hlsl_glc_implies_rdc : Warning< + "attribute 'globallycoherent' implies 'reordercoherent'">, InGroup; def warn_hlsl_narrowing : Warning< "conversion from larger type %0 to smaller type %1, possible loss of data">, InGroup, DefaultWarn; diff --git a/tools/clang/include/clang/Basic/TokenKinds.def b/tools/clang/include/clang/Basic/TokenKinds.def index 2267b12b74..6933c965cf 100644 --- a/tools/clang/include/clang/Basic/TokenKinds.def +++ b/tools/clang/include/clang/Basic/TokenKinds.def @@ -508,6 +508,7 @@ KEYWORD(lineadj , KEYHLSL) KEYWORD(triangle , KEYHLSL) KEYWORD(triangleadj , KEYHLSL) KEYWORD(globallycoherent , KEYHLSL) +KEYWORD(reordercoherent , KEYHLSL) KEYWORD(interface , KEYHLSL) KEYWORD(sampler_state , KEYHLSL) KEYWORD(technique , KEYHLSL) diff --git a/tools/clang/include/clang/Sema/Sema.h b/tools/clang/include/clang/Sema/Sema.h index 42ab80b617..755c7e0755 100644 --- a/tools/clang/include/clang/Sema/Sema.h +++ b/tools/clang/include/clang/Sema/Sema.h @@ -3804,9 +3804,8 @@ class Sema { bool CheckHLSLUnaryExprOrTypeTraitOperand(QualType ExprType, SourceLocation Loc, UnaryExprOrTypeTrait ExprKind); void DiagnoseHLSLDeclAttr(const Decl *D, const Attr *A); - void DiagnoseGloballyCoherentMismatch(const Expr *SrcExpr, - QualType TargetType, - SourceLocation Loc); + void DiagnoseCoherenceMismatch(const Expr *SrcExpr, QualType TargetType, + SourceLocation Loc); void CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, const FunctionProtoType *Proto); void DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM, diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h index ac6e08b3fa..59d99ab4c5 100644 --- a/tools/clang/include/clang/Sema/SemaHLSL.h +++ b/tools/clang/include/clang/Sema/SemaHLSL.h @@ -203,7 +203,8 @@ void Indent(unsigned int Indentation, llvm::raw_ostream &Out); void GetHLSLAttributedTypes(clang::Sema *self, clang::QualType type, const clang::AttributedType **ppMatrixOrientation, const clang::AttributedType **ppNorm, - const clang::AttributedType **ppGLC); + const clang::AttributedType **ppGLC, + const clang::AttributedType **ppRDC); bool IsMatrixType(clang::Sema *self, clang::QualType type); bool IsVectorType(clang::Sema *self, clang::QualType type); diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index d853125954..5b19e064a3 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -278,6 +278,18 @@ bool HasHLSLGloballyCoherent(clang::QualType type) { return false; } +bool HasHLSLReorderCoherent(clang::QualType type) { + const AttributedType *AT = type->getAs(); + while (AT) { + AttributedType::Kind kind = AT->getAttrKind(); + if (kind == AttributedType::attr_hlsl_reordercoherent) + return true; + AT = AT->getLocallyUnqualifiedSingleStepDesugaredType() + ->getAs(); + } + return false; +} + /// Checks whether the pAttributes indicate a parameter is inout or out; if /// inout, pIsIn will be set to true. bool IsParamAttributedAsOut(clang::AttributeList *pAttributes, bool *pIsIn); diff --git a/tools/clang/lib/AST/Type.cpp b/tools/clang/lib/AST/Type.cpp index 06db4747ff..51c20218cc 100644 --- a/tools/clang/lib/AST/Type.cpp +++ b/tools/clang/lib/AST/Type.cpp @@ -2945,6 +2945,7 @@ bool AttributedType::isHLSLTypeSpec() const { case attr_hlsl_snorm: case attr_hlsl_unorm: case attr_hlsl_globallycoherent: + case attr_hlsl_reordercoherent: return true; } llvm_unreachable("invalid attr kind"); @@ -2975,7 +2976,8 @@ bool AttributedType::isCallingConv() const { case attr_hlsl_snorm: case attr_hlsl_unorm: case attr_hlsl_globallycoherent: - // HLSL Change Ends + case attr_hlsl_reordercoherent: + // HLSL Change Ends return false; case attr_pcs: diff --git a/tools/clang/lib/AST/TypePrinter.cpp b/tools/clang/lib/AST/TypePrinter.cpp index 621e1d46a0..ca9e15bfd7 100644 --- a/tools/clang/lib/AST/TypePrinter.cpp +++ b/tools/clang/lib/AST/TypePrinter.cpp @@ -1174,6 +1174,9 @@ void TypePrinter::printAttributedBefore(const AttributedType *T, case AttributedType::attr_hlsl_globallycoherent: OS << "globallycoherent "; break; + case AttributedType::attr_hlsl_reordercoherent: + OS << "reordercoherent "; + break; default: // Only HLSL attribute types are covered. break; diff --git a/tools/clang/lib/CodeGen/CGHLSLMS.cpp b/tools/clang/lib/CodeGen/CGHLSLMS.cpp index b041db95a7..16ddeaec60 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp +++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp @@ -300,7 +300,7 @@ class CGMSHLSLRuntime : public CGHLSLRuntime { clang::QualType QaulTy) override; void FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D, llvm::Value *V) override; - const clang::Expr *CheckReturnStmtGLCMismatch( + const clang::Expr *CheckReturnStmtCoherenceMismatch( CodeGenFunction &CGF, const Expr *RV, const clang::ReturnStmt &S, clang::QualType FnRetTy, const std::function &TmpArgMap) @@ -2803,16 +2803,20 @@ void CGMSHLSLRuntime::MarkPotentialResourceTemp(CodeGenFunction &CGF, AddValToPropertyMap(V, QualTy); } -static bool isGLCMismatch(QualType Ty0, QualType Ty1, const Expr *SrcExp, - clang::SourceLocation Loc, DiagnosticsEngine &Diags) { - if (HasHLSLGloballyCoherent(Ty0) == HasHLSLGloballyCoherent(Ty1)) - return false; +static std::pair getCoherenceMismatch(QualType Ty0, QualType Ty1, + const Expr *SrcExp) { + std::pair Mismatch{ + HasHLSLGloballyCoherent(Ty0) != HasHLSLGloballyCoherent(Ty1), + HasHLSLReorderCoherent(Ty0) != HasHLSLReorderCoherent(Ty1)}; + if (!Mismatch.first && !Mismatch.second) + return {false, false}; + if (const CastExpr *Cast = dyn_cast(SrcExp)) { // Skip flat conversion which is for createHandleFromHeap. if (Cast->getCastKind() == CastKind::CK_FlatConversion) - return false; + return {false, false}; } - return true; + return Mismatch; } void CGMSHLSLRuntime::FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D, @@ -2829,19 +2833,23 @@ void CGMSHLSLRuntime::FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D, AddValToPropertyMap(V, D.getType()); if (D.hasInit()) { - if (isGLCMismatch(D.getType(), D.getInit()->getType(), D.getInit(), - D.getLocation(), CGM.getDiags())) { - objectProperties.updateGLC(V); + auto [glcMismatch, rdcMismatch] = + getCoherenceMismatch(D.getType(), D.getInit()->getType(), D.getInit()); + + if (glcMismatch || rdcMismatch) { + objectProperties.updateCoherence(V, glcMismatch, rdcMismatch); } } } -const clang::Expr *CGMSHLSLRuntime::CheckReturnStmtGLCMismatch( +const clang::Expr *CGMSHLSLRuntime::CheckReturnStmtCoherenceMismatch( CodeGenFunction &CGF, const Expr *RV, const clang::ReturnStmt &S, clang::QualType FnRetTy, const std::function &TmpArgMap) { - if (!isGLCMismatch(RV->getType(), FnRetTy, RV, S.getReturnLoc(), - CGM.getDiags())) { + auto [glcMismatch, rdcMismatch] = + getCoherenceMismatch(RV->getType(), FnRetTy, RV); + + if (!glcMismatch && !rdcMismatch) { return RV; } const FunctionDecl *FD = cast(CGF.CurFuncDecl); @@ -2913,10 +2921,11 @@ void CGMSHLSLRuntime::addResource(Decl *D) { if (VD->hasInit() && resClass != DXIL::ResourceClass::Invalid) { if (resClass == DXIL::ResourceClass::UAV) { - if (isGLCMismatch(VD->getType(), VD->getInit()->getType(), - VD->getInit(), D->getLocation(), CGM.getDiags())) { + auto [glcMismatch, rdcMismatch] = getCoherenceMismatch( + VD->getType(), VD->getInit()->getType(), VD->getInit()); + if (glcMismatch || rdcMismatch) { GlobalVariable *GV = cast(CGM.GetAddrOfGlobalVar(VD)); - objectProperties.updateGLC(GV); + objectProperties.updateCoherence(GV, glcMismatch, rdcMismatch); } } return; @@ -3463,8 +3472,11 @@ bool CGMSHLSLRuntime::SetUAVSRV(SourceLocation loc, } } } + // 'globallycoherent' implies 'reordercoherent' if (HasHLSLGloballyCoherent(QualTy)) { hlslRes->SetGloballyCoherent(true); + } else if (HasHLSLReorderCoherent(QualTy)) { + hlslRes->SetReorderCoherent(true); } if (resClass == hlsl::DxilResourceBase::Class::SRV) { hlslRes->SetRW(false); @@ -3497,6 +3509,8 @@ uint32_t CGMSHLSLRuntime::AddUAVSRV(VarDecl *decl, if (decl->hasAttr()) { hlslRes->SetGloballyCoherent(true); } + if (decl->hasAttr()) + hlslRes->SetReorderCoherent(true); if (!SetUAVSRV(decl->getLocation(), resClass, hlslRes.get(), VarTy)) return 0; @@ -6140,8 +6154,9 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit( bool isObject = dxilutil::IsHLSLObjectType(CGF.ConvertTypeForMem(ParamTy)); bool bAnnotResource = false; if (isObject) { - if (isGLCMismatch(Param->getType(), Arg->getType(), Arg, - Arg->getExprLoc(), CGM.getDiags())) { + auto [glcMismatch, rdcMismatch] = + getCoherenceMismatch(Param->getType(), Arg->getType(), Arg); + if (glcMismatch || rdcMismatch) { // NOTE: if function is noinline, resource parameter is not allowed. // Here assume function will be always inlined. // This can only take care resource as parameter. When parameter is diff --git a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp index 532ec01458..13edadf9df 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp +++ b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp @@ -4034,12 +4034,17 @@ hlsl::DxilResourceProperties DxilObjectProperties::GetResource(llvm::Value *V) { return it->second; return DxilResourceProperties(); } -void DxilObjectProperties::updateGLC(llvm::Value *V) { +void DxilObjectProperties::updateCoherence(llvm::Value *V, + bool updateGloballyCoherent, + bool updateReorderCoherent) { auto it = resMap.find(V); if (it == resMap.end()) return; - it->second.Basic.IsGloballyCoherent ^= 1; + if (updateGloballyCoherent) + it->second.Basic.IsGloballyCoherent ^= 1; + if (updateReorderCoherent) + it->second.Basic.IsReorderCoherent ^= 1; } } // namespace CGHLSLMSHelper diff --git a/tools/clang/lib/CodeGen/CGHLSLMSHelper.h b/tools/clang/lib/CodeGen/CGHLSLMSHelper.h index 9058ed4f6d..7fca5d4025 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMSHelper.h +++ b/tools/clang/lib/CodeGen/CGHLSLMSHelper.h @@ -159,7 +159,8 @@ struct DxilObjectProperties { bool AddResource(llvm::Value *V, const hlsl::DxilResourceProperties &RP); bool IsResource(llvm::Value *V); hlsl::DxilResourceProperties GetResource(llvm::Value *V); - void updateGLC(llvm::Value *V); + void updateCoherence(llvm::Value *V, bool updateGloballyCoherent, + bool updateReorderCoherent); // MapVector for deterministic iteration order. llvm::MapVector resMap; diff --git a/tools/clang/lib/CodeGen/CGHLSLRuntime.h b/tools/clang/lib/CodeGen/CGHLSLRuntime.h index 3e27951e86..b100d93579 100644 --- a/tools/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/tools/clang/lib/CodeGen/CGHLSLRuntime.h @@ -146,7 +146,7 @@ class CGHLSLRuntime { virtual void FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D, llvm::Value *V) = 0; - virtual const clang::Expr *CheckReturnStmtGLCMismatch( + virtual const clang::Expr *CheckReturnStmtCoherenceMismatch( CodeGenFunction &CGF, const clang::Expr *RV, const clang::ReturnStmt &S, clang::QualType FnRetTy, const std::function &TmpArgMap) = 0; diff --git a/tools/clang/lib/CodeGen/CGStmt.cpp b/tools/clang/lib/CodeGen/CGStmt.cpp index 340550dbdd..1b1f593271 100644 --- a/tools/clang/lib/CodeGen/CGStmt.cpp +++ b/tools/clang/lib/CodeGen/CGStmt.cpp @@ -1178,8 +1178,8 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { auto MapTemp = [&](const VarDecl *LocalVD, llvm::Value *TmpArg) { OutParamScope.addTemp(LocalVD, TmpArg); }; - RV = CGM.getHLSLRuntime().CheckReturnStmtGLCMismatch(*this, RV, S, - FnRetTy, MapTemp); + RV = CGM.getHLSLRuntime().CheckReturnStmtCoherenceMismatch( + *this, RV, S, FnRetTy, MapTemp); // HLSL Change Ends. CharUnits Alignment = getContext().getTypeAlignInChars(RV->getType()); EmitAggExpr(RV, AggValueSlot::forAddr(ReturnValue, Alignment, diff --git a/tools/clang/lib/Parse/ParseDecl.cpp b/tools/clang/lib/Parse/ParseDecl.cpp index 4ca80fcec6..59be41a484 100644 --- a/tools/clang/lib/Parse/ParseDecl.cpp +++ b/tools/clang/lib/Parse/ParseDecl.cpp @@ -3877,6 +3877,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, case tok::kw_precise: case tok::kw_sample: case tok::kw_globallycoherent: + case tok::kw_reordercoherent: case tok::kw_center: case tok::kw_indices: case tok::kw_vertices: @@ -5321,6 +5322,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { case tok::kw_shared: case tok::kw_groupshared: case tok::kw_globallycoherent: + case tok::kw_reordercoherent: case tok::kw_uniform: case tok::kw_in: case tok::kw_out: @@ -6125,6 +6127,7 @@ void Parser::ParseDirectDeclarator(Declarator &D) { switch (Tok.getKind()) { case tok::kw_center: case tok::kw_globallycoherent: + case tok::kw_reordercoherent: case tok::kw_precise: case tok::kw_sample: case tok::kw_indices: diff --git a/tools/clang/lib/Parse/ParseExpr.cpp b/tools/clang/lib/Parse/ParseExpr.cpp index 745b506468..8f51dd4b6c 100644 --- a/tools/clang/lib/Parse/ParseExpr.cpp +++ b/tools/clang/lib/Parse/ParseExpr.cpp @@ -795,6 +795,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, case tok::kw_precise: case tok::kw_sample: case tok::kw_globallycoherent: + case tok::kw_reordercoherent: case tok::kw_center: case tok::kw_indices: case tok::kw_vertices: @@ -1740,6 +1741,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { switch (auto tk = Tok.getKind()) { case tok::kw_center: case tok::kw_globallycoherent: + case tok::kw_reordercoherent: case tok::kw_precise: case tok::kw_sample: case tok::kw_indices: diff --git a/tools/clang/lib/Parse/ParseStmt.cpp b/tools/clang/lib/Parse/ParseStmt.cpp index 95dea4ab2c..6fa33d7108 100644 --- a/tools/clang/lib/Parse/ParseStmt.cpp +++ b/tools/clang/lib/Parse/ParseStmt.cpp @@ -179,6 +179,7 @@ Parser::ParseStatementOrDeclarationAfterAttributes(StmtVector &Stmts, case tok::kw_precise: case tok::kw_sample: case tok::kw_globallycoherent: + case tok::kw_reordercoherent: case tok::kw_center: case tok::kw_indices: case tok::kw_vertices: diff --git a/tools/clang/lib/Parse/ParseTentative.cpp b/tools/clang/lib/Parse/ParseTentative.cpp index 29c6e49770..6bdef3a547 100644 --- a/tools/clang/lib/Parse/ParseTentative.cpp +++ b/tools/clang/lib/Parse/ParseTentative.cpp @@ -1275,6 +1275,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult, case tok::kw_precise: case tok::kw_center: case tok::kw_globallycoherent: + case tok::kw_reordercoherent: case tok::kw_indices: case tok::kw_vertices: case tok::kw_primitives: diff --git a/tools/clang/lib/Sema/SemaChecking.cpp b/tools/clang/lib/Sema/SemaChecking.cpp index 2fde458499..9e64732336 100644 --- a/tools/clang/lib/Sema/SemaChecking.cpp +++ b/tools/clang/lib/Sema/SemaChecking.cpp @@ -6772,8 +6772,8 @@ static void AnalyzeAssignment(Sema &S, BinaryOperator *E) { // Just recurse on the LHS. AnalyzeImplicitConversions(S, E->getLHS(), E->getOperatorLoc()); - S.DiagnoseGloballyCoherentMismatch(E->getRHS(), E->getLHS()->getType(), - E->getOperatorLoc()); + S.DiagnoseCoherenceMismatch(E->getRHS(), E->getLHS()->getType(), + E->getOperatorLoc()); // We want to recurse on the RHS as normal unless we're assigning to // a bitfield. @@ -6887,7 +6887,7 @@ void CheckImplicitArgumentConversions(Sema &S, CallExpr *TheCall, ++ArgIdx, ++ParmIdx) { ParmVarDecl *PD = FD->getParamDecl(ParmIdx); Expr *CurrA = TheCall->getArg(ArgIdx); - S.DiagnoseGloballyCoherentMismatch(CurrA, PD->getType(), CC); + S.DiagnoseCoherenceMismatch(CurrA, PD->getType(), CC); } } // HLSL CHange End diff --git a/tools/clang/lib/Sema/SemaDecl.cpp b/tools/clang/lib/Sema/SemaDecl.cpp index 06bdeb491a..e09bf4623c 100644 --- a/tools/clang/lib/Sema/SemaDecl.cpp +++ b/tools/clang/lib/Sema/SemaDecl.cpp @@ -9167,9 +9167,10 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, // HLSL Change begin // When initializing an HLSL resource type we should diagnose mismatches in - // globally coherent annotations _unless_ the source is a dynamic resource - // placeholder type where we safely infer the globallycoherent annotaiton. - DiagnoseGloballyCoherentMismatch(Init, DclT, Init->getExprLoc()); + // globally and reorder coherent annotations _unless_ the source is a dynamic + // resource placeholder type where we safely infer the coherence + // annotations. + DiagnoseCoherenceMismatch(Init, DclT, Init->getExprLoc()); // HLSL Change end // Expressions default to 'id' when we're in a debugger diff --git a/tools/clang/lib/Sema/SemaDeclAttr.cpp b/tools/clang/lib/Sema/SemaDeclAttr.cpp index 723900cd07..085874a0ed 100644 --- a/tools/clang/lib/Sema/SemaDeclAttr.cpp +++ b/tools/clang/lib/Sema/SemaDeclAttr.cpp @@ -5105,6 +5105,17 @@ void Sema::ProcessDeclAttributeList(Scope *S, Decl *D, for (const AttributeList* l = AttrList; l; l = l->getNext()) ProcessDeclAttribute(*this, S, D, *l, IncludeCXX11Attributes); + // HLSL Change Starts - Warn of redundant reorder / globally coherent + // attributes + if (D->hasAttr() && + D->hasAttr()) { + Diag(AttrList->getLoc(), diag::warn_hlsl_gc_implies_rc_attribute) + << cast(D); + D->dropAttr(); + return; + } + // HLSL Change Ends + // FIXME: We should be able to handle these cases in TableGen. // GCC accepts // static int a9 __attribute__((weakref)); diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 72dd6d41aa..2bd4462f2f 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -13674,8 +13674,9 @@ ValidateMaxRecordsSharedWithAttributes(Sema &S, Decl *D, void Sema::DiagnoseHLSLDeclAttr(const Decl *D, const Attr *A) { HLSLExternalSource *ExtSource = HLSLExternalSource::FromSema(this); - if (const HLSLGloballyCoherentAttr *HLSLGCAttr = - dyn_cast(A)) { + const bool IsGCAttr = isa(A); + const bool IsRCAttr = isa(A); + if (IsGCAttr || IsRCAttr) { const ValueDecl *TD = cast(D); if (TD->getType()->isDependentType()) return; @@ -13684,23 +13685,25 @@ void Sema::DiagnoseHLSLDeclAttr(const Decl *D, const Attr *A) { DeclType = FD->getReturnType(); while (DeclType->isArrayType()) DeclType = QualType(DeclType->getArrayElementTypeNoTypeQual(), 0); + const bool IsAllowedNodeIO = + IsGCAttr && + GetNodeIOType(DeclType) == DXIL::NodeIOKind::RWDispatchNodeInputRecord; + const bool IsUAV = + hlsl::GetResourceClassForType(getASTContext(), DeclType) == + hlsl::DXIL::ResourceClass::UAV; if (ExtSource->GetTypeObjectKind(DeclType) != AR_TOBJ_OBJECT || - (hlsl::GetResourceClassForType(getASTContext(), DeclType) != - hlsl::DXIL::ResourceClass::UAV && - GetNodeIOType(DeclType) != - DXIL::NodeIOKind::RWDispatchNodeInputRecord)) { + (!IsUAV && !IsAllowedNodeIO)) { Diag(A->getLocation(), diag::err_hlsl_varmodifierna_decltype) << A << DeclType->getCanonicalTypeUnqualified() << A->getRange(); - Diag(A->getLocation(), diag::note_hlsl_globallycoherent_applies_to) - << A << A->getRange(); + Diag(A->getLocation(), diag::note_hlsl_coherence_applies_to) + << (int)IsGCAttr << A << A->getRange(); } return; } } -void Sema::DiagnoseGloballyCoherentMismatch(const Expr *SrcExpr, - QualType TargetType, - SourceLocation Loc) { +void Sema::DiagnoseCoherenceMismatch(const Expr *SrcExpr, QualType TargetType, + SourceLocation Loc) { QualType SrcTy = SrcExpr->getType(); QualType DstTy = TargetType; if (SrcTy->isArrayType() && DstTy->isArrayType()) { @@ -13712,9 +13715,39 @@ void Sema::DiagnoseGloballyCoherentMismatch(const Expr *SrcExpr, GetNodeIOType(DstTy) == DXIL::NodeIOKind::RWDispatchNodeInputRecord) { bool SrcGL = hlsl::HasHLSLGloballyCoherent(SrcTy); bool DstGL = hlsl::HasHLSLGloballyCoherent(DstTy); - if (SrcGL != DstGL) - Diag(Loc, diag::warn_hlsl_impcast_glc_mismatch) - << SrcExpr->getType() << TargetType << /*loses|adds*/ DstGL; + // 'reordercoherent' attribute dropped earlier in presence of + // 'globallycoherent' + bool SrcRD = hlsl::HasHLSLReorderCoherent(SrcTy); + bool DstRD = hlsl::HasHLSLReorderCoherent(DstTy); + + enum { + NoMismatch = -1, + DemoteToRD = 0, + PromoteToGL = 1, + LosesRD = 2, + LosesGL = 3, + AddsRD = 4, + AddsGL = 5 + } MismatchType = NoMismatch; + + if (SrcGL && DstRD) + MismatchType = DemoteToRD; + else if (SrcRD && DstGL) + MismatchType = PromoteToGL; + else if (SrcRD && !DstRD) + MismatchType = LosesRD; + else if (SrcGL && !DstGL) + MismatchType = LosesGL; + else if (!SrcRD && DstRD) + MismatchType = AddsRD; + else if (!SrcGL && DstGL) + MismatchType = AddsGL; + + if (MismatchType == NoMismatch) + return; + + Diag(Loc, diag::warn_hlsl_impcast_coherence_mismatch) + << SrcExpr->getType() << TargetType << MismatchType; } } @@ -13863,6 +13896,10 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A, declAttr = ::new (S.Context) HLSLGloballyCoherentAttr( A.getRange(), S.Context, A.getAttributeSpellingListIndex()); break; + case AttributeList::AT_HLSLReorderCoherent: + declAttr = ::new (S.Context) HLSLReorderCoherentAttr( + A.getRange(), S.Context, A.getAttributeSpellingListIndex()); + break; case AttributeList::AT_HLSLIndices: declAttr = ::new (S.Context) HLSLIndicesAttr( A.getRange(), S.Context, A.getAttributeSpellingListIndex()); @@ -14927,6 +14964,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, } break; case AttributeList::AT_HLSLGloballyCoherent: // Handled elsewhere + case AttributeList::AT_HLSLReorderCoherent: // Handled elsewhere break; case AttributeList::AT_HLSLUniform: if (!(isGlobal || isParameter)) { @@ -15322,15 +15360,17 @@ static QualType getUnderlyingType(QualType Type) { void hlsl::GetHLSLAttributedTypes( clang::Sema *self, clang::QualType type, const clang::AttributedType **ppMatrixOrientation, - const clang::AttributedType **ppNorm, const clang::AttributedType **ppGLC) { + const clang::AttributedType **ppNorm, const clang::AttributedType **ppGLC, + const clang::AttributedType **ppRDC) { AssignOpt(nullptr, ppMatrixOrientation); AssignOpt(nullptr, ppNorm); AssignOpt(nullptr, ppGLC); + AssignOpt(nullptr, ppRDC); // Note: we clear output pointers once set so we can stop searching QualType Desugared = getUnderlyingType(type); const AttributedType *AT = dyn_cast(Desugared); - while (AT && (ppMatrixOrientation || ppNorm || ppGLC)) { + while (AT && (ppMatrixOrientation || ppNorm || ppGLC || ppRDC)) { AttributedType::Kind Kind = AT->getAttrKind(); if (Kind == AttributedType::attr_hlsl_row_major || @@ -15350,6 +15390,11 @@ void hlsl::GetHLSLAttributedTypes( *ppGLC = AT; ppGLC = nullptr; } + } else if (Kind == AttributedType::attr_hlsl_reordercoherent) { + if (ppRDC) { + *ppRDC = AT; + ppRDC = nullptr; + } } Desugared = getUnderlyingType(AT->getEquivalentType()); @@ -15734,6 +15779,10 @@ void hlsl::CustomPrintHLSLAttr(const clang::Attr *A, llvm::raw_ostream &Out, Out << "globallycoherent "; break; + case clang::attr::HLSLReorderCoherent: + Out << "reordercoherent "; + break; + case clang::attr::HLSLIndices: Out << "indices "; break; @@ -15941,6 +15990,7 @@ bool hlsl::IsHLSLAttr(clang::attr::Kind AttrKind) { case clang::attr::HLSLNodeLocalRootArgumentsTableIndex: case clang::attr::HLSLNodeShareInputOf: case clang::attr::HLSLNodeTrackRWInputSharing: + case clang::attr::HLSLReorderCoherent: case clang::attr::VKBinding: case clang::attr::VKBuiltIn: case clang::attr::VKConstantId: diff --git a/tools/clang/lib/Sema/SemaStmt.cpp b/tools/clang/lib/Sema/SemaStmt.cpp index ce1e55bb0e..4e47a68888 100644 --- a/tools/clang/lib/Sema/SemaStmt.cpp +++ b/tools/clang/lib/Sema/SemaStmt.cpp @@ -3184,7 +3184,7 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { // HLSL Change begin - Diagnose mismatched globallycoherent attrs on return. if (RetValExp) - DiagnoseGloballyCoherentMismatch(RetValExp, FnRetType, ReturnLoc); + DiagnoseCoherenceMismatch(RetValExp, FnRetType, ReturnLoc); // HLSL Change end bool HasDependentReturnType = FnRetType->isDependentType(); diff --git a/tools/clang/lib/Sema/SemaType.cpp b/tools/clang/lib/Sema/SemaType.cpp index 5a8f9d13b3..ff3b0dbac7 100644 --- a/tools/clang/lib/Sema/SemaType.cpp +++ b/tools/clang/lib/Sema/SemaType.cpp @@ -4528,7 +4528,9 @@ static AttributeList::Kind getAttrListKind(AttributedType::Kind kind) { return AttributeList::AT_HLSLColumnMajor; case AttributedType::attr_hlsl_globallycoherent: return AttributeList::AT_HLSLGloballyCoherent; - // HLSL Change Ends + case AttributedType::attr_hlsl_reordercoherent: + return AttributeList::AT_HLSLReorderCoherent; + // HLSL Change Ends } llvm_unreachable("unexpected attribute kind!"); } @@ -5771,6 +5773,7 @@ static bool isHLSLTypeAttr(AttributeList::Kind Kind) { case AttributeList::AT_HLSLSnorm: case AttributeList::AT_HLSLUnorm: case AttributeList::AT_HLSLGloballyCoherent: + case AttributeList::AT_HLSLReorderCoherent: return true; default: // Only meant to catch attr handled by handleHLSLTypeAttr, ignore the rest @@ -5802,7 +5805,9 @@ static bool handleHLSLTypeAttr(TypeProcessingState &State, const AttributedType *pMatrixOrientation = nullptr; const AttributedType *pNorm = nullptr; const AttributedType *pGLC = nullptr; - hlsl::GetHLSLAttributedTypes(&S, Type, &pMatrixOrientation, &pNorm, &pGLC); + const AttributedType *pRDC = nullptr; + hlsl::GetHLSLAttributedTypes(&S, Type, &pMatrixOrientation, &pNorm, &pGLC, + &pRDC); if (pMatrixOrientation && (Kind == AttributeList::AT_HLSLColumnMajor || @@ -5836,13 +5841,18 @@ static bool handleHLSLTypeAttr(TypeProcessingState &State, return true; } - if (pGLC && Kind == AttributeList::AT_HLSLGloballyCoherent) { - AttributedType::Kind CurAttrKind = pGLC->getAttrKind(); - if (Kind == getAttrListKind(CurAttrKind)) { - S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute_exact) - << Attr.getName() << Attr.getRange(); - } - } + const bool hasGLC = pGLC; + const bool addsGLC = Kind == AttributeList::AT_HLSLGloballyCoherent; + const bool hasRDC = pRDC; + const bool addsRDC = Kind == AttributeList::AT_HLSLReorderCoherent; + + const bool hasMismatchingAttrs = hasGLC && hasRDC; + const bool addsMismatchingAttr = (hasGLC && addsRDC) || (hasRDC && addsGLC); + if ((hasGLC && addsGLC) || (hasRDC && addsRDC)) + S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute_exact) + << Attr.getName() << Attr.getRange(); + else if (!hasMismatchingAttrs && addsMismatchingAttr) + S.Diag(Attr.getLoc(), diag::warn_hlsl_glc_implies_rdc) << Attr.getRange(); AttributedType::Kind TAK; switch (Kind) { @@ -5853,6 +5863,9 @@ static bool handleHLSLTypeAttr(TypeProcessingState &State, case AttributeList::AT_HLSLSnorm: TAK = AttributedType::attr_hlsl_snorm; break; case AttributeList::AT_HLSLGloballyCoherent: TAK = AttributedType::attr_hlsl_globallycoherent; break; + case AttributeList::AT_HLSLReorderCoherent: + TAK = AttributedType::attr_hlsl_reordercoherent; + break; } Type = S.Context.getAttributedType(TAK, Type, Type); diff --git a/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_for_arg.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_for_arg.hlsl new file mode 100644 index 0000000000..d92ce7b9ca --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_for_arg.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -E main -T lib_6_9 %s | FileCheck %s +// REQUIRES: dxil-1-9 + +// CHECK: %[[uH:[^ ]+]] = load %dx.types.Handle, %dx.types.Handle* @"\01?u@@3V?$RWBuffer@M@@A", align 4 +// CHECK: %[[uLIBH:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %[[uH]]) ; CreateHandleForLib(Resource) +// CHECK: %[[uANNOT:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[uLIBH]], %dx.types.ResourceProperties { i32 69642, i32 265 }) ; AnnotateHandle(res,props) resource: reordercoherent RWTypedBuffer +// CHECK: %{{[^ ]+}} = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle %[[uANNOT]], i32 0, i32 undef) ; BufferLoad(srv,index,wot) + +RWBuffer OutBuf : register(u1); +reordercoherent RWBuffer u : register(u2); + +float read(RWBuffer buf) { + return buf[0]; +} + +[shader("raygeneration")] +void main() { + OutBuf[0] = read(u); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav.hlsl new file mode 100644 index 0000000000..ea47281d0d --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav.hlsl @@ -0,0 +1,17 @@ +// RUN: %dxc -E main -T lib_6_9 %s | FileCheck %s +// REQUIRES: dxil-1-9 + +// CHECK: !"uav1", {{.+}}, ![[TAGMD:[0-9]+]]} +// CHECK: ![[TAGMD]] = !{i32 0, i32 9, i32 4, i1 true + +reordercoherent RWTexture1D uav1 : register(u3); +RWBuffer uav2; + +[shader("raygeneration")] +void main() +{ + reordercoherent RWTexture1D uav3 = uav1; + uav3[0] = 5; + uav1[0] = 2; + uav2[1] = 3; +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav_array.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav_array.hlsl new file mode 100644 index 0000000000..8b60c0cd67 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav_array.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -E main -T lib_6_9 %s | FileCheck %s +// REQUIRES: dxil-1-9 + +// Make sure uav array can have reordercoherent. +// CHECK: !{{.*}} = !{i32 1, [12 x %"class.RWTexture2D"]* bitcast ([12 x %dx.types.Handle]* @"\01?tex@@3PAV?$RWTexture2D@M@@A" to [12 x %"class.RWTexture2D"]*), !"tex", i32 0, i32 2, i32 12, i32 2, i1 false, i1 false, i1 false, ![[TAGMD:.*]]} +// CHECK: ![[TAGMD]] = !{i32 0, i32 9, i32 4, i1 true} + + +RWBuffer OutBuf: register(u1); +reordercoherent RWTexture2D tex[12] : register(u2); + +[shader("raygeneration")] +void main() { + int2 c = DispatchRaysIndex().xy; + OutBuf[0] = tex[0][c]; +} diff --git a/tools/clang/test/SemaHLSL/attributes/reordercoherent_ast.hlsl b/tools/clang/test/SemaHLSL/attributes/reordercoherent_ast.hlsl new file mode 100644 index 0000000000..53366de828 --- /dev/null +++ b/tools/clang/test/SemaHLSL/attributes/reordercoherent_ast.hlsl @@ -0,0 +1,17 @@ +// RUN: %dxc -T lib_6_9 -ast-dump %s | FileCheck %s +// REQUIRES: dxil-1-9 + +// CHECK: |-VarDecl {{.*}} used uav1 'reordercoherent RWTexture1D':'RWTexture1D >' +// CHECK-NEXT: | |-HLSLReorderCoherentAttr +reordercoherent RWTexture1D uav1 : register(u3); +RWBuffer uav2; + +[shader("raygeneration")] +void main() +{ + // CHECK: | `-VarDecl {{.*}} uav3 'reordercoherent RWTexture1D':'RWTexture1D >' cinit + // CHECK-NEXT: | | + // CHECK-NEXT: | | + // CHECK-NEXT: | `-HLSLReorderCoherentAttr + reordercoherent RWTexture1D uav3 = uav1; +} diff --git a/tools/clang/test/SemaHLSL/reordercoherent-globallycoherent-mismatch.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-globallycoherent-mismatch.hlsl new file mode 100644 index 0000000000..0192154b78 --- /dev/null +++ b/tools/clang/test/SemaHLSL/reordercoherent-globallycoherent-mismatch.hlsl @@ -0,0 +1,96 @@ +// RUN: %dxc -Tlib_6_9 -verify %s + +RWByteAddressBuffer NonCBuf; +globallycoherent RWByteAddressBuffer GCBuf; +reordercoherent RWByteAddressBuffer RCBuf; +// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}} +// expected-warning@+1{{attribute 'reordercoherent' implied by 'globallycoherent' in 'RCGCBuf'. 'reordercoherent' ignored.}} +reordercoherent globallycoherent RWByteAddressBuffer RCGCBuf; + +globallycoherent RWByteAddressBuffer getPromoteRC() { + return RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}} +} + +reordercoherent RWByteAddressBuffer getDemoteGC() { + return GCBuf; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}} +} + +globallycoherent RWByteAddressBuffer GCBufArr[2]; +reordercoherent RWByteAddressBuffer RCBufArr[2]; + +reordercoherent RWByteAddressBuffer RCBufMultiArr[2][2]; +globallycoherent RWByteAddressBuffer GCBufMultiArr[2][2]; + +globallycoherent RWByteAddressBuffer getPromoteRCArr() { + return RCBufArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}} +} + +reordercoherent RWByteAddressBuffer getDemoteGCArr() { + return GCBufArr[0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}} +} + +globallycoherent RWByteAddressBuffer getPromoteRCMultiArr() { + return RCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}} +} + +reordercoherent RWByteAddressBuffer getDemoteGCMultiArr() { + return GCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}} +} + +void NonGCStore(RWByteAddressBuffer Buf) { + Buf.Store(0, 0); +} + +void RCStore(reordercoherent RWByteAddressBuffer Buf) { + Buf.Store(0, 0); +} + +void GCStore(globallycoherent RWByteAddressBuffer Buf) { + Buf.Store(0, 0); +} + +void getPromoteToGCParam(inout globallycoherent RWByteAddressBuffer PGCBuf) { + PGCBuf = RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer __restrict' promotes reordercoherent to globallycoherent annotation}} +} +void getDemoteToRCParam(inout reordercoherent RWByteAddressBuffer PRCBuf) { + PRCBuf = GCBuf; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer __restrict' demotes globallycoherent to reordercoherent annotation}} +} + +static reordercoherent RWByteAddressBuffer SRCDemoteBufArr[2] = GCBufArr; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' demotes globallycoherent to reordercoherent annotation}} +static reordercoherent RWByteAddressBuffer SRCDemoteBufMultiArr0[2] = GCBufMultiArr[0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' demotes globallycoherent to reordercoherent annotation}} +static reordercoherent RWByteAddressBuffer SRCDemoteBufMultiArr1[2][2] = GCBufMultiArr; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2][2]' to 'reordercoherent RWByteAddressBuffer [2][2]' demotes globallycoherent to reordercoherent annotation}} + +static globallycoherent RWByteAddressBuffer SRCPromoteBufArr[2] = RCBufArr; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2]' to 'globallycoherent RWByteAddressBuffer [2]' promotes reordercoherent to globallycoherent annotation}} +static globallycoherent RWByteAddressBuffer SRCPromoteBufMultiArr0[2] = RCBufMultiArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2]' to 'globallycoherent RWByteAddressBuffer [2]' promotes reordercoherent to globallycoherent annotation}} +static globallycoherent RWByteAddressBuffer SRCPromoteBufMultiArr1[2][2] = RCBufMultiArr; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2][2]' to 'globallycoherent RWByteAddressBuffer [2][2]' promotes reordercoherent to globallycoherent annotation}} + +void getPromoteToGCParamArr(inout globallycoherent RWByteAddressBuffer PGCBufArr[2]) { + PGCBufArr = RCBufArr; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2]' to 'globallycoherent RWByteAddressBuffer __restrict[2]' promotes reordercoherent to globallycoherent annotation}} +} +void getDemoteToRCParamArr(inout reordercoherent RWByteAddressBuffer PRCBufArr[2]) { + PRCBufArr = GCBufArr; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer __restrict[2]' demotes globallycoherent to reordercoherent annotation}} +} + +globallycoherent RWByteAddressBuffer getGCBuf() { + return GCBuf; +} + +reordercoherent RWByteAddressBuffer getRCBuf() { + return RCBuf; +} + +[shader("raygeneration")] +void main() +{ + GCStore(RCBuf); // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}} + RCStore(GCBuf); // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}} + + reordercoherent RWByteAddressBuffer RCCopyGC = GCBuf; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}} + globallycoherent RWByteAddressBuffer GCCopyRC = RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}} + + reordercoherent RWByteAddressBuffer RCCopyGCReturn = getGCBuf(); // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}} + globallycoherent RWByteAddressBuffer GCCopyRCReturn = getRCBuf(); // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}} + + reordercoherent RWByteAddressBuffer RCCopyGC0 = GCBufArr[0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}} + globallycoherent RWByteAddressBuffer GCCopyRC0 = RCBufArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}} +} diff --git a/tools/clang/test/SemaHLSL/reordercoherent-implied.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-implied.hlsl new file mode 100644 index 0000000000..130b0efee7 --- /dev/null +++ b/tools/clang/test/SemaHLSL/reordercoherent-implied.hlsl @@ -0,0 +1,41 @@ +// RUN: %dxc -E main -T lib_6_9 -verify %s +// REQUIRES: dxil-1-9 + +using Ty = RWTexture1D; + +using GTy = globallycoherent Ty; +using RTy = reordercoherent Ty; + +// expected-warning@+1{{attribute 'globallycoherent' is already applied}} +using GGTy = globallycoherent GTy; +// expected-warning@+1{{attribute 'reordercoherent' is already applied}} +using RRTy = reordercoherent RTy; + +// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}} +using GRTy = globallycoherent RTy; +// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}} +using RGTy = reordercoherent GTy; + +// expected-warning@+1{{attribute 'globallycoherent' is already applied}} +using GGRTy = globallycoherent GRTy; +// expected-warning@+1{{attribute 'reordercoherent' is already applied}} +using RRGTy = reordercoherent RGTy; + +// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}} +using GRTy2 = globallycoherent reordercoherent Ty; +// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}} +using RGTy2 = reordercoherent globallycoherent Ty; + +// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}} +// expected-warning@+1{{attribute 'globallycoherent' is already applied}} +using GGRTy2 = globallycoherent globallycoherent reordercoherent Ty; +// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}} +// expected-warning@+1{{attribute 'globallycoherent' is already applied}} +using GRGTy2 = globallycoherent reordercoherent globallycoherent Ty; + +// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}} +// expected-warning@+1{{attribute 'reordercoherent' is already applied}} +using RGRTy2 = reordercoherent globallycoherent reordercoherent Ty; +// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}} +// expected-warning@+1{{attribute 'reordercoherent' is already applied}} +using RRGTy2 = reordercoherent reordercoherent globallycoherent Ty; diff --git a/tools/clang/test/SemaHLSL/reordercoherent-mismatch.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-mismatch.hlsl new file mode 100644 index 0000000000..447e496c6e --- /dev/null +++ b/tools/clang/test/SemaHLSL/reordercoherent-mismatch.hlsl @@ -0,0 +1,101 @@ +// RUN: %dxc -Tlib_6_9 -verify %s + +RWByteAddressBuffer NonRCBuf; +reordercoherent RWByteAddressBuffer RCBuf; + +RWByteAddressBuffer NonRCBufArr[2]; +reordercoherent RWByteAddressBuffer RCBufArr[2]; + +RWByteAddressBuffer NonRCBufMultiArr[2][2]; +reordercoherent RWByteAddressBuffer RCBufMultiArr[2][2]; + +RWByteAddressBuffer getNonRCBuf() { + return NonRCBuf; +} + +reordercoherent RWByteAddressBuffer getRCBuf() { + return RCBuf; +} + +RWByteAddressBuffer getNonRCBufArr() { + return NonRCBufArr[0]; +} + +reordercoherent RWByteAddressBuffer getRCBufArr() { + return RCBufArr[0]; +} + +RWByteAddressBuffer getNonRCBufMultiArr() { + return NonRCBufMultiArr[0][0]; +} + +reordercoherent RWByteAddressBuffer getRCBufMultiArr() { + return RCBufMultiArr[0][0]; +} + +RWByteAddressBuffer getNonGCRCBuf() { + return RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}} +} + +reordercoherent RWByteAddressBuffer getGCNonRCBuf() { + return NonRCBuf; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}} +} + +RWByteAddressBuffer getNonGCRCBufArr() { + return RCBufArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}} +} + +reordercoherent RWByteAddressBuffer getGCNonRCBufArr() { + return NonRCBufArr[0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}} +} + +RWByteAddressBuffer getNonGCRCBufMultiArr() { + return RCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}} +} + +reordercoherent RWByteAddressBuffer getGCNonRCBufMultiArr() { + return NonRCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}} +} + +void NonGCStore(RWByteAddressBuffer Buf) { + Buf.Store(0, 0); +} + +void GCStore(reordercoherent RWByteAddressBuffer Buf) { + Buf.Store(0, 0); +} + +void getNonRCBufPAram(inout reordercoherent RWByteAddressBuffer PRCBuf) { + PRCBuf = NonRCBuf; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer __restrict' adds reordercoherent annotation}} +} + +static reordercoherent RWByteAddressBuffer SRCBufArr[2] = NonRCBufArr; // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' adds reordercoherent annotation}} +static reordercoherent RWByteAddressBuffer SRCBufMultiArr0[2] = NonRCBufMultiArr[0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' adds reordercoherent annotation}} +static reordercoherent RWByteAddressBuffer SRCBufMultiArr1[2][2] = NonRCBufMultiArr; // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2][2]' to 'reordercoherent RWByteAddressBuffer [2][2]' adds reordercoherent annotation}} + +void getNonRCBufArrParam(inout reordercoherent RWByteAddressBuffer PRCBufArr[2]) { + PRCBufArr = NonRCBufArr; // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer __restrict[2]' adds reordercoherent annotation}} +} + +[shader("raygeneration")] void main() { + NonGCStore(NonRCBuf); // No diagnostic + GCStore(NonRCBuf); // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}} + NonGCStore(RCBuf); // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}} + GCStore(RCBuf); // No diagnostic + + RWByteAddressBuffer NonGCCopyNonGC = NonRCBuf; // No diagnostic + RWByteAddressBuffer NonGCCopyGC = RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}} + + reordercoherent RWByteAddressBuffer GCCopyNonGC = NonRCBuf; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}} + reordercoherent RWByteAddressBuffer GCCopyGC = RCBuf; // No diagnostic + + reordercoherent RWByteAddressBuffer GCCopyNonGCReturn = getNonRCBuf(); // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}} + + RWByteAddressBuffer NonGCCopyGCReturn = getRCBuf(); // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}} + + RWByteAddressBuffer NonGCCopyNonGC0 = NonRCBufArr[0]; // No diagnostic + RWByteAddressBuffer NonGCCopyGC0 = RCBufArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}} + + reordercoherent RWByteAddressBuffer GCCopyNonGC0 = NonRCBufArr[0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}} + reordercoherent RWByteAddressBuffer GCCopyGC0 = RCBufArr[0]; // No diagnostic +} diff --git a/tools/clang/test/SemaHLSL/reordercoherent-type-errors.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-type-errors.hlsl new file mode 100644 index 0000000000..57fd33fb13 --- /dev/null +++ b/tools/clang/test/SemaHLSL/reordercoherent-type-errors.hlsl @@ -0,0 +1,26 @@ +// RUN: %dxc -Tlib_6_9 -verify %s + +reordercoherent RWTexture1D uav1 : register(u3); + +// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'Buffer >'}} +// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}} +reordercoherent Buffer srv; + +// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'float'}} +// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}} +reordercoherent float m; + +reordercoherent RWTexture2D tex[12]; +reordercoherent RWTexture2D texMD[12][12]; + +// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'float'}} +// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}} +reordercoherent float One() { + return 1.0; +} + +struct Record { uint index; }; + +// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'RWDispatchNodeInputRecord'}} +// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}} +void func2(reordercoherent RWDispatchNodeInputRecord funcInputData) {} diff --git a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp index 3af305d52a..16d8b1dadd 100644 --- a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp +++ b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp @@ -1220,6 +1220,7 @@ void PrintResourceProperties(DxilResourceProperties &RP, bool bUAV = RP.isUAV(); LPCSTR RW = bUAV ? (RP.Basic.IsROV ? "ROV" : "RW") : ""; LPCSTR GC = bUAV && RP.Basic.IsGloballyCoherent ? "globallycoherent " : ""; + LPCSTR RC = bUAV && RP.Basic.IsReorderCoherent ? "reordercoherent " : ""; LPCSTR COUNTER = bUAV && RP.Basic.SamplerCmpOrHasCounter ? ", counter" : ""; switch (RP.getResourceKind()) { @@ -1233,7 +1234,7 @@ void PrintResourceProperties(DxilResourceProperties &RP, case DXIL::ResourceKind::TypedBuffer: case DXIL::ResourceKind::Texture2DMS: case DXIL::ResourceKind::Texture2DMSArray: - OS << GC << RW << ResourceKindToString(RP.getResourceKind()); + OS << GC << RC << RW << ResourceKindToString(RP.getResourceKind()); OS << "<"; if (RP.Typed.CompCount > 1) OS << std::to_string(RP.Typed.CompCount) << "x"; @@ -1241,11 +1242,11 @@ void PrintResourceProperties(DxilResourceProperties &RP, break; case DXIL::ResourceKind::RawBuffer: - OS << GC << RW << ResourceKindToString(RP.getResourceKind()); + OS << GC << RC << RW << ResourceKindToString(RP.getResourceKind()); break; case DXIL::ResourceKind::StructuredBuffer: - OS << GC << RW << ResourceKindToString(RP.getResourceKind()); + OS << GC << RC << RW << ResourceKindToString(RP.getResourceKind()); OS << ""; break; diff --git a/tools/clang/unittests/HLSL/DxilContainerTest.cpp b/tools/clang/unittests/HLSL/DxilContainerTest.cpp index a1533ae19f..339b33c655 100644 --- a/tools/clang/unittests/HLSL/DxilContainerTest.cpp +++ b/tools/clang/unittests/HLSL/DxilContainerTest.cpp @@ -1454,6 +1454,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) { "ConsumeStructuredBuffer consume_buf;" "RasterizerOrderedByteAddressBuffer rov_buf;" "globallycoherent RWByteAddressBuffer gc_buf;" + "reordercoherent RWByteAddressBuffer rc_buf;" "float function_import(float x);" "export float function0(min16float x) { " " return x + 1 + tex[0].x; }" @@ -1465,6 +1466,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) { " f.f2 += 0.5; append_buf.Append(f);" " rov_buf.Store(i, f.i2.x);" " gc_buf.Store(i, f.i2.y);" + " rc_buf.Store(i, f.i2.y);" " b_buf.Store(i, f.i2.x + f.i2.y); }"; CComPtr pCompiler; CComPtr pSource; @@ -1477,7 +1479,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) { hlsl::DXIL::ResourceKind kind; hlsl::RDAT::DxilResourceFlag flag; }; - const unsigned numResFlagCheck = 5; + const unsigned numResFlagCheck = 6; CheckResFlagInfo resFlags[numResFlagCheck] = { {"b_buf", hlsl::DXIL::ResourceKind::RawBuffer, hlsl::RDAT::DxilResourceFlag::None}, @@ -1487,6 +1489,8 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) { hlsl::RDAT::DxilResourceFlag::UAVCounter}, {"gc_buf", hlsl::DXIL::ResourceKind::RawBuffer, hlsl::RDAT::DxilResourceFlag::UAVGloballyCoherent}, + {"rc_buf", hlsl::DXIL::ResourceKind::RawBuffer, + hlsl::RDAT::DxilResourceFlag::UAVReorderCoherent}, {"rov_buf", hlsl::DXIL::ResourceKind::RawBuffer, hlsl::RDAT::DxilResourceFlag::UAVRasterizerOrderedView}}; @@ -1575,7 +1579,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) { IFTBOOLMSG(false, E_FAIL, "unknown function name"); } } - VERIFY_ARE_EQUAL(resTable.Count(), 8U); + VERIFY_ARE_EQUAL(resTable.Count(), 9U); } } IFTBOOLMSG(blobFound, E_FAIL, "failed to find RDAT blob after compiling"); From bc9044adc7356896eeb1f37a3846f4fef8ed241e Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Thu, 10 Apr 2025 18:21:30 +0200 Subject: [PATCH 77/88] [SER] REORDER_SCOPE Barrier semantic flag (#7263) - HLSL REORDER_SCOPE flag (available from SM6.9) - Make validator accept REORDER_SCOPE from DXIL 1.9 --- include/dxc/DXIL/DxilConstants.h | 4 +- include/dxc/DXIL/DxilOperations.h | 1 + lib/DXIL/DxilOperations.cpp | 40 ++++++++++- lib/DxilValidation/DxilValidation.cpp | 16 +++-- .../clang/Basic/DiagnosticSemaKinds.td | 2 +- tools/clang/lib/AST/ASTContextHLSL.cpp | 6 +- tools/clang/lib/Sema/SemaHLSL.cpp | 22 ++++-- .../rdat_mintarget/sm69_barriers.hlsl | 53 +++++++++++++++ .../ser_reorder_scope_sm69_passing.ll | 68 +++++++++++++++++++ .../reorder_scope_sm68_unavailable.hlsl | 8 +++ .../barrier/reorder_scope_sm69_passing.hlsl | 12 ++++ 11 files changed, 218 insertions(+), 14 deletions(-) create mode 100644 tools/clang/test/HLSLFileCheck/d3dreflect/rdat_mintarget/sm69_barriers.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll create mode 100644 tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm68_unavailable.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm69_passing.hlsl diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 4f8c521851..2c1d309650 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -1905,7 +1905,9 @@ enum class BarrierSemanticFlag : uint32_t { GroupSync = 0x00000001, // GROUP_SYNC GroupScope = 0x00000002, // GROUP_SCOPE DeviceScope = 0x00000004, // DEVICE_SCOPE - ValidMask = 0x00000007, + LegacyFlags = 0x00000007, + ReorderScope = 0x00000008, // REORDER_SCOPE + ValidMask = 0x0000000F, GroupFlags = GroupSync | GroupScope, }; diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h index 05021ce789..c8b6762b3f 100644 --- a/include/dxc/DXIL/DxilOperations.h +++ b/include/dxc/DXIL/DxilOperations.h @@ -151,6 +151,7 @@ class OP { static bool IsDxilOpBarrier(OpCode C); static bool BarrierRequiresGroup(const llvm::CallInst *CI); static bool BarrierRequiresNode(const llvm::CallInst *CI); + static bool BarrierRequiresReorder(const llvm::CallInst *CI); static DXIL::BarrierMode TranslateToBarrierMode(const llvm::CallInst *CI); static void GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, unsigned &major, unsigned &minor, diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 7047d9fe59..786d4a5ef6 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -10,6 +10,7 @@ /////////////////////////////////////////////////////////////////////////////// #include "dxc/DXIL/DxilOperations.h" +#include "dxc/DXIL/DxilConstants.h" #include "dxc/DXIL/DxilInstructions.h" #include "dxc/DXIL/DxilModule.h" #include "dxc/Support/Global.h" @@ -3024,6 +3025,30 @@ bool OP::BarrierRequiresNode(const llvm::CallInst *CI) { } } +bool OP::BarrierRequiresReorder(const llvm::CallInst *CI) { + OpCode Opcode = OP::GetDxilOpFuncCallInst(CI); + switch (Opcode) { + case OpCode::BarrierByMemoryType: { + DxilInst_BarrierByMemoryType Barrier(const_cast(CI)); + if (!isa(Barrier.get_SemanticFlags())) + return false; + unsigned SemanticFlags = Barrier.get_SemanticFlags_val(); + return (SemanticFlags & static_cast( + DXIL::BarrierSemanticFlag::ReorderScope)) != 0U; + } + case OpCode::BarrierByMemoryHandle: { + DxilInst_BarrierByMemoryHandle Barrier(const_cast(CI)); + if (!isa(Barrier.get_SemanticFlags())) + return false; + unsigned SemanticFlags = Barrier.get_SemanticFlags_val(); + return (SemanticFlags & static_cast( + DXIL::BarrierSemanticFlag::ReorderScope)) != 0U; + } + default: + return false; + } +} + DXIL::BarrierMode OP::TranslateToBarrierMode(const llvm::CallInst *CI) { OpCode opcode = OP::GetDxilOpFuncCallInst(CI); switch (opcode) { @@ -3046,6 +3071,12 @@ DXIL::BarrierMode OP::TranslateToBarrierMode(const llvm::CallInst *CI) { semanticFlags = barrier.get_SemanticFlags_val(); } + // Disallow SM6.9+ semantic flags. + if (semanticFlags & + ~static_cast(DXIL::BarrierSemanticFlag::LegacyFlags)) { + return DXIL::BarrierMode::Invalid; + } + // Mask to legacy flags, if allowed. memoryTypeFlags = MaskMemoryTypeFlagsIfAllowed( memoryTypeFlags, (unsigned)DXIL::MemoryTypeFlag::LegacyFlags); @@ -3467,10 +3498,17 @@ void OP::GetMinShaderModelAndMask(const llvm::CallInst *CI, minor = 8; } } + if (BarrierRequiresReorder(CI)) { + major = 6; + minor = 9; + mask &= SFLAG(Library) | SFLAG(RayGeneration); + return; + } if (BarrierRequiresNode(CI)) { mask &= SFLAG(Library) | SFLAG(Node); return; - } else if (BarrierRequiresGroup(CI)) { + } + if (BarrierRequiresGroup(CI)) { mask &= SFLAG(Library) | SFLAG(Compute) | SFLAG(Amplification) | SFLAG(Mesh) | SFLAG(Node); return; diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index a788f21d4e..aa7bb398fa 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -1628,6 +1628,15 @@ std::string GetLaunchTypeStr(DXIL::NodeLaunchType LT) { } } +static unsigned getSemanticFlagValidMask(const ShaderModel *pSM) { + unsigned DxilMajor, DxilMinor; + pSM->GetDxilVersion(DxilMajor, DxilMinor); + // DXIL version >= 1.9 + if (hlsl::DXIL::CompareVersions(DxilMajor, DxilMinor, 1, 9) < 0) + return static_cast(hlsl::DXIL::BarrierSemanticFlag::LegacyFlags); + return static_cast(hlsl::DXIL::BarrierSemanticFlag::ValidMask); +} + static void ValidateDxilOperationCallInProfile(CallInst *CI, DXIL::OpCode Opcode, const ShaderModel *pSM, @@ -1838,8 +1847,8 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, (unsigned)hlsl::DXIL::MemoryTypeFlag::ValidMask, "memory type", "BarrierByMemoryType"); ValidateBarrierFlagArg(ValCtx, CI, DI.get_SemanticFlags(), - (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask, - "semantic", "BarrierByMemoryType"); + getSemanticFlagValidMask(pSM), "semantic", + "BarrierByMemoryType"); if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node && OP::BarrierRequiresNode(CI)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode); @@ -1855,8 +1864,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, : "barrierByMemoryHandle"; DxilInst_BarrierByMemoryHandle DIMH(CI); ValidateBarrierFlagArg(ValCtx, CI, DIMH.get_SemanticFlags(), - (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask, - "semantic", OpName); + getSemanticFlagValidMask(pSM), "semantic", OpName); if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node && OP::BarrierRequiresNode(CI)) { ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode); diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 21a1b707c6..6254e5fc71 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7986,7 +7986,7 @@ def err_hlsl_barrier_invalid_memory_flags: Error< "UAV_MEMORY, GROUP_SHARED_MEMORY, NODE_INPUT_MEMORY, NODE_OUTPUT_MEMORY flags">; def err_hlsl_barrier_invalid_semantic_flags: Error< "invalid SemanticFlags for Barrier operation; expected 0 or some combination of " - "GROUP_SYNC, GROUP_SCOPE, DEVICE_SCOPE flags">; + "GROUP_SYNC, GROUP_SCOPE, DEVICE_SCOPE%select{|, REORDER_SCOPE}0 flags">; def warn_hlsl_barrier_group_memory_requires_group: Warning< "GROUP_SHARED_MEMORY specified for Barrier operation when context has no visible group">, InGroup, DefaultError; diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index c7a031a219..2c3c20546f 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -718,6 +718,8 @@ void hlsl::AddSamplerFeedbackConstants(ASTContext &context) { /// Adds all enums for Barrier intrinsic void hlsl::AddBarrierConstants(ASTContext &context) { + VersionTuple VT69 = VersionTuple(6, 9); + AddTypedefPseudoEnum( context, "MEMORY_TYPE_FLAG", {{"UAV_MEMORY", (unsigned)DXIL::MemoryTypeFlag::UavMemory}, @@ -730,7 +732,9 @@ void hlsl::AddBarrierConstants(ASTContext &context) { context, "BARRIER_SEMANTIC_FLAG", {{"GROUP_SYNC", (unsigned)DXIL::BarrierSemanticFlag::GroupSync}, {"GROUP_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::GroupScope}, - {"DEVICE_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::DeviceScope}}); + {"DEVICE_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::DeviceScope}, + {"REORDER_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::ReorderScope, + ConstructAvailabilityAttribute(context, VT69)}}); } static Expr *IntConstantAsBoolExpr(clang::Sema &sema, uint64_t value) { diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 2bd4462f2f..5236a1e3c4 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -11576,7 +11576,8 @@ static bool CheckFinishedCrossGroupSharingCall(Sema &S, CXXMethodDecl *MD, return false; } -static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) { +static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE, + const hlsl::ShaderModel *SM) { DXASSERT(FD->getNumParams() == 2, "otherwise, unknown Barrier overload"); // Emit error when MemoryTypeFlags are known to be invalid. @@ -11606,12 +11607,18 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) { llvm::APSInt SemanticFlagsVal; if (SemanticFlagsExpr->isIntegerConstantExpr(SemanticFlagsVal, S.Context)) { SemanticFlags = SemanticFlagsVal.getLimitedValue(); - if ((uint32_t)SemanticFlags & - ~(uint32_t)DXIL::BarrierSemanticFlag::ValidMask) { + uint32_t ValidMask = 0U; + if (SM->IsSM69Plus()) { + ValidMask = + static_cast(hlsl::DXIL::BarrierSemanticFlag::ValidMask); + } else { + ValidMask = + static_cast(hlsl::DXIL::BarrierSemanticFlag::LegacyFlags); + } + if ((uint32_t)SemanticFlags & ~ValidMask) { S.Diags.Report(SemanticFlagsExpr->getExprLoc(), diag::err_hlsl_barrier_invalid_semantic_flags) - << (uint32_t)SemanticFlags - << (uint32_t)DXIL::BarrierSemanticFlag::ValidMask; + << SM->IsSM69Plus(); return true; } } @@ -11654,6 +11661,9 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, if (!IsBuiltinTable(IntrinsicAttr->getGroup())) return; + const auto *SM = + hlsl::ShaderModel::GetByName(getLangOpts().HLSLProfile.c_str()); + hlsl::IntrinsicOp opCode = (hlsl::IntrinsicOp)IntrinsicAttr->getOpcode(); switch (opCode) { case hlsl::IntrinsicOp::MOP_FinishedCrossGroupSharing: @@ -11661,7 +11671,7 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, TheCall->getLocStart()); break; case hlsl::IntrinsicOp::IOP_Barrier: - CheckBarrierCall(*this, FDecl, TheCall); + CheckBarrierCall(*this, FDecl, TheCall, SM); break; #ifdef ENABLE_SPIRV_CODEGEN case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast: diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/rdat_mintarget/sm69_barriers.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/rdat_mintarget/sm69_barriers.hlsl new file mode 100644 index 0000000000..6cedf44e20 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/d3dreflect/rdat_mintarget/sm69_barriers.hlsl @@ -0,0 +1,53 @@ +// RUN: %dxilver 1.9 | %dxc -T lib_6_9 %s | %D3DReflect %s | %FileCheck %s -check-prefixes=RDAT + +// Check that stage flags are set correctly still for different barrier modes in SM 6.9. + +// RDAT: FunctionTable[{{.*}}] = { + +RWByteAddressBuffer BAB : register(u1, space0); + +// RDAT-LABEL: UnmangledName: "fn_barrier_reorder" +// RDAT: FeatureInfo1: 0 +// RDAT: FeatureInfo2: 0 +// RDAT: ShaderStageFlag: (Library | RayGeneration) +// RDAT: MinShaderTarget: 0x60069 + +[noinline] export +void fn_barrier_reorder() { + Barrier(UAV_MEMORY, REORDER_SCOPE); +} + +// RDAT-LABEL: UnmangledName: "fn_barrier_reorder2" +// RDAT: FeatureInfo1: 0 +// RDAT: FeatureInfo2: 0 +// RDAT: ShaderStageFlag: (Library | RayGeneration) +// RDAT: MinShaderTarget: 0x60069 + +[noinline] export +void fn_barrier_reorder2() { + Barrier(BAB, REORDER_SCOPE); +} + +// RDAT-LABEL: UnmangledName: "rg_barrier_reorder_in_call" +// RDAT: FeatureInfo1: 0 +// RDAT: FeatureInfo2: 0 +// RDAT: ShaderStageFlag: (RayGeneration) +// RDAT: MinShaderTarget: 0x70069 + +[shader("raygeneration")] +void rg_barrier_reorder_in_call() { + fn_barrier_reorder(); + BAB.Store(0, 0); +} + +// RDAT-LABEL: UnmangledName: "rg_barrier_reorder_in_call2" +// RDAT: FeatureInfo1: 0 +// RDAT: FeatureInfo2: 0 +// RDAT: ShaderStageFlag: (RayGeneration) +// RDAT: MinShaderTarget: 0x70069 + +[shader("raygeneration")] +void rg_barrier_reorder_in_call2() { + fn_barrier_reorder2(); + BAB.Store(0, 0); +} diff --git a/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll b/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll new file mode 100644 index 0000000000..cab9942b02 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll @@ -0,0 +1,68 @@ +; RUN: %dxilver 1.9 | %dxv %s + +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; BAB UAV byte r/w U0 u1 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RWByteAddressBuffer = type { i32 } + +@"\01?BAB@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?BAB@@3URWByteAddressBuffer@@A", align 4 + call void @dx.op.barrierByMemoryType(i32 244, i32 1, i32 8) ; BarrierByMemoryType(MemoryTypeFlags,SemanticFlags) + %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + call void @dx.op.barrierByMemoryHandle(i32 245, %dx.types.Handle %3, i32 8) ; BarrierByMemoryHandle(object,SemanticFlags) + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryType(i32, i32, i32) #1 + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryHandle(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #3 + +attributes #0 = { nounwind } +attributes #1 = { noduplicate nounwind } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!5} +!dx.entryPoints = !{!9, !11} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{null, !3, null, null} +!3 = !{!4} +!4 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?BAB@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"BAB", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!5 = !{i32 1, void ()* @"\01?main@@YAXXZ", !6} +!6 = !{!7} +!7 = !{i32 1, !8, !8} +!8 = !{} +!9 = !{null, !"", null, !2, !10} +!10 = !{i32 0, i64 8589934608} +!11 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !12} +!12 = !{i32 8, i32 7, i32 5, !13} +!13 = !{i32 0} diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm68_unavailable.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm68_unavailable.hlsl new file mode 100644 index 0000000000..fc42f99a9a --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm68_unavailable.hlsl @@ -0,0 +1,8 @@ +// RUN: %dxc -Tlib_6_8 -verify %s + +[Shader("compute")] +[numthreads(1, 1, 1)] +void main() { + // expected-error@+1{{invalid SemanticFlags for Barrier operation; expected 0 or some combination of GROUP_SYNC, GROUP_SCOPE, DEVICE_SCOPE flags}} + Barrier(0, REORDER_SCOPE); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm69_passing.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm69_passing.hlsl new file mode 100644 index 0000000000..18271a2b11 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm69_passing.hlsl @@ -0,0 +1,12 @@ +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s + +RWByteAddressBuffer BAB : register(u1, space0); + +[shader("raygeneration")] +void main() { +// CHECK: call void @dx.op.barrierByMemoryType(i32 244, i32 1, i32 8) + Barrier(UAV_MEMORY, REORDER_SCOPE); + +// CHECK: call void @dx.op.barrierByMemoryHandle(i32 245, %dx.types.Handle %{{[^ ]+}}, i32 8) + Barrier(BAB, REORDER_SCOPE); +} From 0168df12c28e1f088fd713d550a82cb35a34f89c Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Sat, 12 Apr 2025 03:07:08 +0200 Subject: [PATCH 78/88] [SER] HitObject_FromRayQuery[WithAttrs] DXIL opcodes and check-pass tests (#7277) Add the DXIL operations and a passing validation test for: - HitObject_FromRayQuery - HitObject_FromRayQueryWithAttrs DXC SER implementation tracker: #7214 --- include/dxc/DXIL/DxilConstants.h | 11 ++- include/dxc/DXIL/DxilInstructions.h | 63 ++++++++++++++ lib/DXIL/DxilOperations.cpp | 56 +++++++------ .../ser_hitobject_fromrayquery_passing.ll | 84 +++++++++++++++++++ utils/hct/hctdb.py | 46 +++++++++- 5 files changed, 230 insertions(+), 30 deletions(-) create mode 100644 tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_passing.ll diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 2c1d309650..9c71eb329e 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -503,7 +503,6 @@ enum class OpCode : unsigned { ReservedA1 = 260, // reserved ReservedA2 = 261, // reserved ReservedB0 = 262, // reserved - ReservedB1 = 263, // reserved ReservedB10 = 272, // reserved ReservedB11 = 273, // reserved ReservedB12 = 274, // reserved @@ -514,7 +513,6 @@ enum class OpCode : unsigned { ReservedB17 = 279, // reserved ReservedB18 = 280, // reserved ReservedB19 = 281, // reserved - ReservedB2 = 264, // reserved ReservedB20 = 282, // reserved ReservedB21 = 283, // reserved ReservedB22 = 284, // reserved @@ -916,6 +914,11 @@ enum class OpCode : unsigned { // operation with a mipmap-level offset // Shader Execution Reordering + HitObject_FromRayQuery = 263, // Creates a new HitObject representing a + // committed hit from a RayQuery + HitObject_FromRayQueryWithAttrs = + 264, // Creates a new HitObject representing a committed hit from a + // RayQuery and committed attributes HitObject_MakeMiss = 265, // Creates a new HitObject representing a miss HitObject_MakeNop = 266, // Creates an empty nop HitObject @@ -1294,6 +1297,8 @@ enum class OpCodeClass : unsigned { WriteSamplerFeedbackLevel, // Shader Execution Reordering + HitObject_FromRayQuery, + HitObject_FromRayQueryWithAttrs, HitObject_MakeMiss, HitObject_MakeNop, @@ -1361,7 +1366,7 @@ enum class OpCodeClass : unsigned { NumOpClasses_Dxil_1_7 = 153, NumOpClasses_Dxil_1_8 = 174, - NumOpClasses = 179 // exclusive last value of enumeration + NumOpClasses = 181 // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 6ee22869a5..15f7a1362b 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -8850,6 +8850,69 @@ struct DxilInst_AllocateRayQuery2 { } }; +/// This instruction Creates a new HitObject representing a committed hit from a +/// RayQuery +struct DxilInst_HitObject_FromRayQuery { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_FromRayQuery(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_FromRayQuery); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_rayQueryHandle = 1, + }; + // Accessors + llvm::Value *get_rayQueryHandle() const { return Instr->getOperand(1); } + void set_rayQueryHandle(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Creates a new HitObject representing a committed hit from a +/// RayQuery and committed attributes +struct DxilInst_HitObject_FromRayQueryWithAttrs { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_FromRayQueryWithAttrs(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_FromRayQueryWithAttrs); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_rayQueryHandle = 1, + arg_HitKind = 2, + arg_CommittedAttribs = 3, + }; + // Accessors + llvm::Value *get_rayQueryHandle() const { return Instr->getOperand(1); } + void set_rayQueryHandle(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_HitKind() const { return Instr->getOperand(2); } + void set_HitKind(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_CommittedAttribs() const { return Instr->getOperand(3); } + void set_CommittedAttribs(llvm::Value *val) { Instr->setOperand(3, val); } +}; + /// This instruction Creates a new HitObject representing a miss struct DxilInst_HitObject_MakeMiss { llvm::Instruction *Instr; diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 786d4a5ef6..7945197eba 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2311,24 +2311,24 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { 0, {}, {}}, // Overloads: v - {OC::ReservedB1, - "ReservedB1", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB2, - "ReservedB2", - OCC::Reserved, - "reserved", - Attribute::None, + + // Shader Execution Reordering + {OC::HitObject_FromRayQuery, + "HitObject_FromRayQuery", + OCC::HitObject_FromRayQuery, + "hitObject_FromRayQuery", + Attribute::ReadOnly, 0, {}, {}}, // Overloads: v - - // Shader Execution Reordering + {OC::HitObject_FromRayQueryWithAttrs, + "HitObject_FromRayQueryWithAttrs", + OCC::HitObject_FromRayQueryWithAttrs, + "hitObject_FromRayQueryWithAttrs", + Attribute::ReadOnly, + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u {OC::HitObject_MakeMiss, "HitObject_MakeMiss", OCC::HitObject_MakeMiss, @@ -3446,8 +3446,10 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, minor = 9; return; } - // Instructions: HitObject_MakeMiss=265, HitObject_MakeNop=266 - if ((265 <= op && op <= 266)) { + // Instructions: HitObject_FromRayQuery=263, + // HitObject_FromRayQueryWithAttrs=264, HitObject_MakeMiss=265, + // HitObject_MakeNop=266 + if ((263 <= op && op <= 266)) { major = 6; minor = 9; mask = @@ -5622,16 +5624,20 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pV); A(pI32); break; - case OpCode::ReservedB1: - A(pV); + + // Shader Execution Reordering + case OpCode::HitObject_FromRayQuery: + A(pHit); + A(pI32); A(pI32); break; - case OpCode::ReservedB2: - A(pV); + case OpCode::HitObject_FromRayQueryWithAttrs: + A(pHit); + A(pI32); + A(pI32); A(pI32); + A(udt); break; - - // Shader Execution Reordering case OpCode::HitObject_MakeMiss: A(pHit); A(pI32); @@ -5997,6 +6003,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { return nullptr; return FT->getParamType(15); case OpCode::ReportHit: + case OpCode::HitObject_FromRayQueryWithAttrs: if (FT->getNumParams() <= 3) return nullptr; return FT->getParamType(3); @@ -6080,8 +6087,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::ReservedA1: case OpCode::ReservedA2: case OpCode::ReservedB0: - case OpCode::ReservedB1: - case OpCode::ReservedB2: + case OpCode::HitObject_FromRayQuery: case OpCode::HitObject_MakeMiss: case OpCode::HitObject_MakeNop: case OpCode::ReservedB5: diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_passing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_passing.ll new file mode 100644 index 0000000000..5b0c65fd6b --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_passing.ll @@ -0,0 +1,84 @@ +; REQUIRES: dxil-1-9 +; RUN: %dxv %s | FileCheck %s + +; CHECK: Validation succeeded. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%struct.Payload = type { <3 x float> } +%struct.CustomAttrs = type { float, float } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.HitObject = type { i8* } +%struct.RaytracingAccelerationStructure = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", align 4 + %2 = alloca %struct.CustomAttrs, align 4 + %3 = call i32 @dx.op.allocateRayQuery(i32 178, i32 5) ; AllocateRayQuery(constRayFlags) + %4 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %5 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %4, %dx.types.ResourceProperties { i32 16, i32 0 }) ; AnnotateHandle(res,props) resource: RTAccelerationStructure + call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %3, %dx.types.Handle %5, i32 0, i32 255, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03) ; RayQuery_TraceRayInline(rayQueryHandle,accelerationStructure,rayFlags,instanceInclusionMask,origin_X,origin_Y,origin_Z,tMin,direction_X,direction_Y,direction_Z,tMax) + %6 = call %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32 263, i32 %3) ; HitObject_FromRayQuery(rayQueryHandle) + %7 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %3, i32 16, %struct.CustomAttrs* nonnull %2) ; HitObject_FromRayQueryWithAttrs(rayQueryHandle,HitKind,CommittedAttribs) + ret void +} + +; Function Attrs: nounwind +declare i32 @dx.op.allocateRayQuery(i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.rayQuery_TraceRayInline(i32, i32, %dx.types.Handle, i32, i32, float, float, float, float, float, float, float, float) #0 + +; Function Attrs: nounwind readonly +declare %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32, i32, i32, %struct.CustomAttrs*) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!6} +!dx.dxrPayloadAnnotations = !{!10} +!dx.entryPoints = !{!13, !15} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{!3, null, null, null} +!3 = !{!4} +!4 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !5} +!5 = !{i32 0, i32 4} +!6 = !{i32 1, void ()* @"\01?main@@YAXXZ", !7} +!7 = !{!8} +!8 = !{i32 1, !9, !9} +!9 = !{} +!10 = !{i32 0, %struct.Payload undef, !11} +!11 = !{!12} +!12 = !{i32 0, i32 8210} +!13 = !{null, !"", null, !2, !14} +!14 = !{i32 0, i64 33554432} +!15 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !16} +!16 = !{i32 8, i32 7, i32 5, !17} +!17 = !{i32 0} diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index a6cc52df1a..b3b9c82528 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -848,7 +848,10 @@ def populate_categories_and_models(self): self.name_idx[i].category = "Extended Command Information" self.name_idx[i].shader_stages = ("vertex",) self.name_idx[i].shader_model = 6, 8 - for i in ("HitObject_MakeMiss,HitObject_MakeNop").split(","): + for i in ( + "HitObject_MakeMiss,HitObject_MakeNop" + + ",HitObject_FromRayQuery,HitObject_FromRayQueryWithAttrs" + ).split(","): self.name_idx[i].category = "Shader Execution Reordering" self.name_idx[i].shader_model = 6, 9 self.name_idx[i].shader_stages = ( @@ -5739,7 +5742,46 @@ def UFI(name, **mappings): next_op_idx = self.reserve_dxil_op_range("ReservedA", next_op_idx, 3) # Shader Execution Reordering - next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 3) + next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 1) + + self.add_dxil_op( + "HitObject_FromRayQuery", + next_op_idx, + "HitObject_FromRayQuery", + "Creates a new HitObject representing a committed hit from a RayQuery", + "v", + "ro", + [ + db_dxil_param( + 0, "hit_object", "", "HitObject created from RayQuery object" + ), + db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_FromRayQueryWithAttrs", + next_op_idx, + "HitObject_FromRayQueryWithAttrs", + "Creates a new HitObject representing a committed hit from a RayQuery and committed attributes", + "u", + "ro", + [ + db_dxil_param( + 0, "hit_object", "", "HitObject created from RayQuery object" + ), + db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"), + db_dxil_param( + 3, + "i32", + "HitKind", + "User-specified value in range of 0-127 to identify the type of hit", + ), + db_dxil_param(4, "udt", "CommittedAttribs", "Committed attributes"), + ], + ) + next_op_idx += 1 self.add_dxil_op( "HitObject_MakeMiss", From 94f9275debff15d3e57d83c60bae16055c2d60c6 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Sun, 13 Apr 2025 02:08:54 +0200 Subject: [PATCH 79/88] [SER] HitObject accessors DXIL opcodes and check-pass tests (#7276) Add the DXIL operations and a passing validation test for: - HitObject_IsMiss, HitObject_IsHit, HitObject_IsNop - HitObject_RayFlags, HitObject_RayTMin, HitObject_RayTCurrent - HitObject_GeometryIndex, HitObject_InstanceIndex, HitObject_InstanceID - HitObject_PrimitiveIndex, HitObject_HitKind, HitObject_ShaderTableIndex - HitObject_WorldRayOrigin, HitObject_WorldRayDirection, - HitObject_ObjectRayOrigin, HitObject_ObjectRayDirection - HitObject_ObjectToWorld3x4, HitObject_WorldToObject3x4 - HitObject_SetShaderTableIndex, HitObject_LoadLocalRootTableConstant - HitObject_Attributes Closes #7310 DXC SER implementation tracker: #7214 --- include/dxc/DXIL/DxilConstants.h | 58 +- include/dxc/DXIL/DxilInstructions.h | 685 ++++++++++++++++++ lib/DXIL/DxilOperations.cpp | 500 +++++++------ .../ser_hitobject_accessors_passing.ll | 110 +++ utils/hct/hctdb.py | 342 ++++++++- 5 files changed, 1446 insertions(+), 249 deletions(-) create mode 100644 tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 9c71eb329e..723abe552f 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -503,32 +503,11 @@ enum class OpCode : unsigned { ReservedA1 = 260, // reserved ReservedA2 = 261, // reserved ReservedB0 = 262, // reserved - ReservedB10 = 272, // reserved - ReservedB11 = 273, // reserved - ReservedB12 = 274, // reserved - ReservedB13 = 275, // reserved - ReservedB14 = 276, // reserved - ReservedB15 = 277, // reserved - ReservedB16 = 278, // reserved - ReservedB17 = 279, // reserved - ReservedB18 = 280, // reserved - ReservedB19 = 281, // reserved - ReservedB20 = 282, // reserved - ReservedB21 = 283, // reserved - ReservedB22 = 284, // reserved - ReservedB23 = 285, // reserved - ReservedB24 = 286, // reserved - ReservedB25 = 287, // reserved - ReservedB26 = 288, // reserved - ReservedB27 = 289, // reserved ReservedB28 = 290, // reserved ReservedB29 = 291, // reserved ReservedB30 = 292, // reserved ReservedB5 = 267, // reserved ReservedB6 = 268, // reserved - ReservedB7 = 269, // reserved - ReservedB8 = 270, // reserved - ReservedB9 = 271, // reserved ReservedC0 = 293, // reserved ReservedC1 = 294, // reserved ReservedC2 = 295, // reserved @@ -914,13 +893,42 @@ enum class OpCode : unsigned { // operation with a mipmap-level offset // Shader Execution Reordering + HitObject_Attributes = 289, // Returns the attributes set for this HitObject HitObject_FromRayQuery = 263, // Creates a new HitObject representing a // committed hit from a RayQuery HitObject_FromRayQueryWithAttrs = 264, // Creates a new HitObject representing a committed hit from a // RayQuery and committed attributes + HitObject_GeometryIndex = 281, // Returns the geometry index committed on hit + HitObject_HitKind = 285, // Returns the HitKind of the hit + HitObject_InstanceID = 283, // Returns the instance id committed on hit + HitObject_InstanceIndex = 282, // Returns the instance index committed on hit + HitObject_IsHit = 270, // Returns `true` if the HitObject is a NOP-HitObject + HitObject_IsMiss = 269, // Returns `true` if the HitObject represents a miss + HitObject_IsNop = 271, // Returns `true` if the HitObject represents a nop + HitObject_LoadLocalRootTableConstant = + 288, // Returns the root table constant for this HitObject and offset HitObject_MakeMiss = 265, // Creates a new HitObject representing a miss HitObject_MakeNop = 266, // Creates an empty nop HitObject + HitObject_ObjectRayDirection = + 278, // Returns the ray direction in object space + HitObject_ObjectRayOrigin = 277, // Returns the ray origin in object space + HitObject_ObjectToWorld3x4 = 279, // Returns the object to world space + // transformation matrix in 3x4 form + HitObject_PrimitiveIndex = + 284, // Returns the primitive index committed on hit + HitObject_RayFlags = 272, // Returns the ray flags set in the HitObject + HitObject_RayTCurrent = + 274, // Returns the current T value set in the HitObject + HitObject_RayTMin = 273, // Returns the TMin value set in the HitObject + HitObject_SetShaderTableIndex = + 287, // Returns a HitObject with updated shader table index + HitObject_ShaderTableIndex = + 286, // Returns the shader table index set for this HitObject + HitObject_WorldRayDirection = 276, // Returns the ray direction in world space + HitObject_WorldRayOrigin = 275, // Returns the ray origin in world space + HitObject_WorldToObject3x4 = 280, // Returns the world to object space + // transformation matrix in 3x4 form // Synchronization AtomicBinOp = 78, // performs an atomic operation on two operands @@ -1297,10 +1305,16 @@ enum class OpCodeClass : unsigned { WriteSamplerFeedbackLevel, // Shader Execution Reordering + HitObject_Attributes, HitObject_FromRayQuery, HitObject_FromRayQueryWithAttrs, + HitObject_LoadLocalRootTableConstant, HitObject_MakeMiss, HitObject_MakeNop, + HitObject_SetShaderTableIndex, + HitObject_StateMatrix, + HitObject_StateScalar, + HitObject_StateVector, // Synchronization AtomicBinOp, @@ -1366,7 +1380,7 @@ enum class OpCodeClass : unsigned { NumOpClasses_Dxil_1_7 = 153, NumOpClasses_Dxil_1_8 = 174, - NumOpClasses = 181 // exclusive last value of enumeration + NumOpClasses = 187 // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 15f7a1362b..2655124c2d 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -8987,6 +8987,691 @@ struct DxilInst_HitObject_MakeNop { bool requiresUniformInputs() const { return false; } }; +/// This instruction Returns `true` if the HitObject represents a miss +struct DxilInst_HitObject_IsMiss { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_IsMiss(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_IsMiss); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns `true` if the HitObject is a NOP-HitObject +struct DxilInst_HitObject_IsHit { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_IsHit(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_IsHit); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns `true` if the HitObject represents a nop +struct DxilInst_HitObject_IsNop { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_IsNop(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_IsNop); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the ray flags set in the HitObject +struct DxilInst_HitObject_RayFlags { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_RayFlags(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_RayFlags); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the TMin value set in the HitObject +struct DxilInst_HitObject_RayTMin { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_RayTMin(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_RayTMin); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the current T value set in the HitObject +struct DxilInst_HitObject_RayTCurrent { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_RayTCurrent(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_RayTCurrent); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the ray origin in world space +struct DxilInst_HitObject_WorldRayOrigin { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_WorldRayOrigin(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_WorldRayOrigin); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_component = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_component() const { return Instr->getOperand(2); } + void set_component(llvm::Value *val) { Instr->setOperand(2, val); } + int32_t get_component_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(2)) + ->getZExtValue()); + } + void set_component_val(int32_t val) { + Instr->setOperand(2, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction Returns the ray direction in world space +struct DxilInst_HitObject_WorldRayDirection { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_WorldRayDirection(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_WorldRayDirection); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_component = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_component() const { return Instr->getOperand(2); } + void set_component(llvm::Value *val) { Instr->setOperand(2, val); } + int32_t get_component_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(2)) + ->getZExtValue()); + } + void set_component_val(int32_t val) { + Instr->setOperand(2, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction Returns the ray origin in object space +struct DxilInst_HitObject_ObjectRayOrigin { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_ObjectRayOrigin(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_ObjectRayOrigin); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_component = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_component() const { return Instr->getOperand(2); } + void set_component(llvm::Value *val) { Instr->setOperand(2, val); } + int32_t get_component_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(2)) + ->getZExtValue()); + } + void set_component_val(int32_t val) { + Instr->setOperand(2, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction Returns the ray direction in object space +struct DxilInst_HitObject_ObjectRayDirection { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_ObjectRayDirection(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_ObjectRayDirection); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_component = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_component() const { return Instr->getOperand(2); } + void set_component(llvm::Value *val) { Instr->setOperand(2, val); } + int32_t get_component_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(2)) + ->getZExtValue()); + } + void set_component_val(int32_t val) { + Instr->setOperand(2, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction Returns the object to world space transformation matrix in +/// 3x4 form +struct DxilInst_HitObject_ObjectToWorld3x4 { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_ObjectToWorld3x4(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_ObjectToWorld3x4); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_row = 2, + arg_col = 3, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_row() const { return Instr->getOperand(2); } + void set_row(llvm::Value *val) { Instr->setOperand(2, val); } + int32_t get_row_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(2)) + ->getZExtValue()); + } + void set_row_val(int32_t val) { + Instr->setOperand(2, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } + llvm::Value *get_col() const { return Instr->getOperand(3); } + void set_col(llvm::Value *val) { Instr->setOperand(3, val); } + int32_t get_col_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(3)) + ->getZExtValue()); + } + void set_col_val(int32_t val) { + Instr->setOperand(3, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction Returns the world to object space transformation matrix in +/// 3x4 form +struct DxilInst_HitObject_WorldToObject3x4 { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_WorldToObject3x4(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_WorldToObject3x4); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_row = 2, + arg_col = 3, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_row() const { return Instr->getOperand(2); } + void set_row(llvm::Value *val) { Instr->setOperand(2, val); } + int32_t get_row_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(2)) + ->getZExtValue()); + } + void set_row_val(int32_t val) { + Instr->setOperand(2, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } + llvm::Value *get_col() const { return Instr->getOperand(3); } + void set_col(llvm::Value *val) { Instr->setOperand(3, val); } + int32_t get_col_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(3)) + ->getZExtValue()); + } + void set_col_val(int32_t val) { + Instr->setOperand(3, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction Returns the geometry index committed on hit +struct DxilInst_HitObject_GeometryIndex { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_GeometryIndex(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_GeometryIndex); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the instance index committed on hit +struct DxilInst_HitObject_InstanceIndex { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_InstanceIndex(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_InstanceIndex); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the instance id committed on hit +struct DxilInst_HitObject_InstanceID { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_InstanceID(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_InstanceID); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the primitive index committed on hit +struct DxilInst_HitObject_PrimitiveIndex { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_PrimitiveIndex(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_PrimitiveIndex); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the HitKind of the hit +struct DxilInst_HitObject_HitKind { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_HitKind(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_HitKind); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns the shader table index set for this HitObject +struct DxilInst_HitObject_ShaderTableIndex { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_ShaderTableIndex(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_ShaderTableIndex); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Returns a HitObject with updated shader table index +struct DxilInst_HitObject_SetShaderTableIndex { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_SetShaderTableIndex(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_SetShaderTableIndex); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_shaderTableIndex = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_shaderTableIndex() const { return Instr->getOperand(2); } + void set_shaderTableIndex(llvm::Value *val) { Instr->setOperand(2, val); } +}; + +/// This instruction Returns the root table constant for this HitObject and +/// offset +struct DxilInst_HitObject_LoadLocalRootTableConstant { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_LoadLocalRootTableConstant(llvm::Instruction *pInstr) + : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_LoadLocalRootTableConstant); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_offset = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_offset() const { return Instr->getOperand(2); } + void set_offset(llvm::Value *val) { Instr->setOperand(2, val); } +}; + +/// This instruction Returns the attributes set for this HitObject +struct DxilInst_HitObject_Attributes { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_Attributes(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::HitObject_Attributes); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_attributes = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_attributes() const { return Instr->getOperand(2); } + void set_attributes(llvm::Value *val) { Instr->setOperand(2, val); } +}; + /// This instruction reads from a raw buffer and structured buffer struct DxilInst_RawBufferVectorLoad { llvm::Instruction *Instr; diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 7945197eba..d9276fc7d6 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2362,174 +2362,177 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { 0, {}, {}}, // Overloads: v - {OC::ReservedB7, - "ReservedB7", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB8, - "ReservedB8", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB9, - "ReservedB9", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB10, - "ReservedB10", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB11, - "ReservedB11", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB12, - "ReservedB12", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB13, - "ReservedB13", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB14, - "ReservedB14", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB15, - "ReservedB15", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB16, - "ReservedB16", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB17, - "ReservedB17", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB18, - "ReservedB18", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB19, - "ReservedB19", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB20, - "ReservedB20", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB21, - "ReservedB21", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB22, - "ReservedB22", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB23, - "ReservedB23", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB24, - "ReservedB24", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB25, - "ReservedB25", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v - {OC::ReservedB26, - "ReservedB26", - OCC::Reserved, - "reserved", - Attribute::None, + + // Shader Execution Reordering + {OC::HitObject_IsMiss, + "HitObject_IsMiss", + OCC::HitObject_StateScalar, + "hitObject_StateScalar", + Attribute::ReadNone, + 1, + {{0x8}}, + {{0x0}}}, // Overloads: 1 + {OC::HitObject_IsHit, + "HitObject_IsHit", + OCC::HitObject_StateScalar, + "hitObject_StateScalar", + Attribute::ReadNone, + 1, + {{0x8}}, + {{0x0}}}, // Overloads: 1 + {OC::HitObject_IsNop, + "HitObject_IsNop", + OCC::HitObject_StateScalar, + "hitObject_StateScalar", + Attribute::ReadNone, + 1, + {{0x8}}, + {{0x0}}}, // Overloads: 1 + {OC::HitObject_RayFlags, + "HitObject_RayFlags", + OCC::HitObject_StateScalar, + "hitObject_StateScalar", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::HitObject_RayTMin, + "HitObject_RayTMin", + OCC::HitObject_StateScalar, + "hitObject_StateScalar", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::HitObject_RayTCurrent, + "HitObject_RayTCurrent", + OCC::HitObject_StateScalar, + "hitObject_StateScalar", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::HitObject_WorldRayOrigin, + "HitObject_WorldRayOrigin", + OCC::HitObject_StateVector, + "hitObject_StateVector", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::HitObject_WorldRayDirection, + "HitObject_WorldRayDirection", + OCC::HitObject_StateVector, + "hitObject_StateVector", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::HitObject_ObjectRayOrigin, + "HitObject_ObjectRayOrigin", + OCC::HitObject_StateVector, + "hitObject_StateVector", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::HitObject_ObjectRayDirection, + "HitObject_ObjectRayDirection", + OCC::HitObject_StateVector, + "hitObject_StateVector", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::HitObject_ObjectToWorld3x4, + "HitObject_ObjectToWorld3x4", + OCC::HitObject_StateMatrix, + "hitObject_StateMatrix", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::HitObject_WorldToObject3x4, + "HitObject_WorldToObject3x4", + OCC::HitObject_StateMatrix, + "hitObject_StateMatrix", + Attribute::ReadNone, + 1, + {{0x2}}, + {{0x0}}}, // Overloads: f + {OC::HitObject_GeometryIndex, + "HitObject_GeometryIndex", + OCC::HitObject_StateScalar, + "hitObject_StateScalar", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::HitObject_InstanceIndex, + "HitObject_InstanceIndex", + OCC::HitObject_StateScalar, + "hitObject_StateScalar", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::HitObject_InstanceID, + "HitObject_InstanceID", + OCC::HitObject_StateScalar, + "hitObject_StateScalar", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::HitObject_PrimitiveIndex, + "HitObject_PrimitiveIndex", + OCC::HitObject_StateScalar, + "hitObject_StateScalar", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::HitObject_HitKind, + "HitObject_HitKind", + OCC::HitObject_StateScalar, + "hitObject_StateScalar", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::HitObject_ShaderTableIndex, + "HitObject_ShaderTableIndex", + OCC::HitObject_StateScalar, + "hitObject_StateScalar", + Attribute::ReadNone, + 1, + {{0x40}}, + {{0x0}}}, // Overloads: i + {OC::HitObject_SetShaderTableIndex, + "HitObject_SetShaderTableIndex", + OCC::HitObject_SetShaderTableIndex, + "hitObject_SetShaderTableIndex", + Attribute::ReadNone, 0, {}, {}}, // Overloads: v - {OC::ReservedB27, - "ReservedB27", - OCC::Reserved, - "reserved", - Attribute::None, + {OC::HitObject_LoadLocalRootTableConstant, + "HitObject_LoadLocalRootTableConstant", + OCC::HitObject_LoadLocalRootTableConstant, + "hitObject_LoadLocalRootTableConstant", + Attribute::ReadOnly, 0, {}, {}}, // Overloads: v + {OC::HitObject_Attributes, + "HitObject_Attributes", + OCC::HitObject_Attributes, + "hitObject_Attributes", + Attribute::ArgMemOnly, + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u + {OC::ReservedB28, "ReservedB28", OCC::Reserved, @@ -3448,8 +3451,17 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, } // Instructions: HitObject_FromRayQuery=263, // HitObject_FromRayQueryWithAttrs=264, HitObject_MakeMiss=265, - // HitObject_MakeNop=266 - if ((263 <= op && op <= 266)) { + // HitObject_MakeNop=266, HitObject_IsMiss=269, HitObject_IsHit=270, + // HitObject_IsNop=271, HitObject_RayFlags=272, HitObject_RayTMin=273, + // HitObject_RayTCurrent=274, HitObject_WorldRayOrigin=275, + // HitObject_WorldRayDirection=276, HitObject_ObjectRayOrigin=277, + // HitObject_ObjectRayDirection=278, HitObject_ObjectToWorld3x4=279, + // HitObject_WorldToObject3x4=280, HitObject_GeometryIndex=281, + // HitObject_InstanceIndex=282, HitObject_InstanceID=283, + // HitObject_PrimitiveIndex=284, HitObject_HitKind=285, + // HitObject_ShaderTableIndex=286, HitObject_SetShaderTableIndex=287, + // HitObject_LoadLocalRootTableConstant=288, HitObject_Attributes=289 + if ((263 <= op && op <= 266) || (269 <= op && op <= 289)) { major = 6; minor = 9; mask = @@ -5666,90 +5678,126 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pV); A(pI32); break; - case OpCode::ReservedB7: - A(pV); + + // Shader Execution Reordering + case OpCode::HitObject_IsMiss: + A(pI1); A(pI32); + A(pHit); break; - case OpCode::ReservedB8: - A(pV); + case OpCode::HitObject_IsHit: + A(pI1); A(pI32); + A(pHit); break; - case OpCode::ReservedB9: - A(pV); + case OpCode::HitObject_IsNop: + A(pI1); A(pI32); + A(pHit); break; - case OpCode::ReservedB10: - A(pV); + case OpCode::HitObject_RayFlags: A(pI32); + A(pI32); + A(pHit); break; - case OpCode::ReservedB11: - A(pV); + case OpCode::HitObject_RayTMin: + A(pF32); A(pI32); + A(pHit); break; - case OpCode::ReservedB12: - A(pV); + case OpCode::HitObject_RayTCurrent: + A(pF32); A(pI32); + A(pHit); break; - case OpCode::ReservedB13: - A(pV); + case OpCode::HitObject_WorldRayOrigin: + A(pF32); + A(pI32); + A(pHit); A(pI32); break; - case OpCode::ReservedB14: - A(pV); + case OpCode::HitObject_WorldRayDirection: + A(pF32); + A(pI32); + A(pHit); A(pI32); break; - case OpCode::ReservedB15: - A(pV); + case OpCode::HitObject_ObjectRayOrigin: + A(pF32); + A(pI32); + A(pHit); A(pI32); break; - case OpCode::ReservedB16: - A(pV); + case OpCode::HitObject_ObjectRayDirection: + A(pF32); + A(pI32); + A(pHit); A(pI32); break; - case OpCode::ReservedB17: - A(pV); + case OpCode::HitObject_ObjectToWorld3x4: + A(pF32); + A(pI32); + A(pHit); + A(pI32); A(pI32); break; - case OpCode::ReservedB18: - A(pV); + case OpCode::HitObject_WorldToObject3x4: + A(pF32); + A(pI32); + A(pHit); + A(pI32); A(pI32); break; - case OpCode::ReservedB19: - A(pV); + case OpCode::HitObject_GeometryIndex: A(pI32); + A(pI32); + A(pHit); break; - case OpCode::ReservedB20: - A(pV); + case OpCode::HitObject_InstanceIndex: + A(pI32); A(pI32); + A(pHit); break; - case OpCode::ReservedB21: - A(pV); + case OpCode::HitObject_InstanceID: A(pI32); + A(pI32); + A(pHit); break; - case OpCode::ReservedB22: - A(pV); + case OpCode::HitObject_PrimitiveIndex: + A(pI32); A(pI32); + A(pHit); break; - case OpCode::ReservedB23: - A(pV); + case OpCode::HitObject_HitKind: A(pI32); + A(pI32); + A(pHit); break; - case OpCode::ReservedB24: - A(pV); + case OpCode::HitObject_ShaderTableIndex: A(pI32); + A(pI32); + A(pHit); break; - case OpCode::ReservedB25: - A(pV); + case OpCode::HitObject_SetShaderTableIndex: + A(pHit); + A(pI32); + A(pHit); A(pI32); break; - case OpCode::ReservedB26: - A(pV); + case OpCode::HitObject_LoadLocalRootTableConstant: + A(pI32); + A(pI32); + A(pHit); A(pI32); break; - case OpCode::ReservedB27: + case OpCode::HitObject_Attributes: A(pV); A(pI32); + A(pHit); + A(udt); break; + + // case OpCode::ReservedB28: A(pV); A(pI32); @@ -5959,6 +6007,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::TempRegStore: case OpCode::CallShader: case OpCode::Pack4x8: + case OpCode::HitObject_Attributes: if (FT->getNumParams() <= 2) return nullptr; return FT->getParamType(2); @@ -6092,27 +6141,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::HitObject_MakeNop: case OpCode::ReservedB5: case OpCode::ReservedB6: - case OpCode::ReservedB7: - case OpCode::ReservedB8: - case OpCode::ReservedB9: - case OpCode::ReservedB10: - case OpCode::ReservedB11: - case OpCode::ReservedB12: - case OpCode::ReservedB13: - case OpCode::ReservedB14: - case OpCode::ReservedB15: - case OpCode::ReservedB16: - case OpCode::ReservedB17: - case OpCode::ReservedB18: - case OpCode::ReservedB19: - case OpCode::ReservedB20: - case OpCode::ReservedB21: - case OpCode::ReservedB22: - case OpCode::ReservedB23: - case OpCode::ReservedB24: - case OpCode::ReservedB25: - case OpCode::ReservedB26: - case OpCode::ReservedB27: + case OpCode::HitObject_SetShaderTableIndex: + case OpCode::HitObject_LoadLocalRootTableConstant: case OpCode::ReservedB28: case OpCode::ReservedB29: case OpCode::ReservedB30: @@ -6164,6 +6194,13 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::RayQuery_CommittedInstanceContributionToHitGroupIndex: case OpCode::StartVertexLocation: case OpCode::StartInstanceLocation: + case OpCode::HitObject_RayFlags: + case OpCode::HitObject_GeometryIndex: + case OpCode::HitObject_InstanceIndex: + case OpCode::HitObject_InstanceID: + case OpCode::HitObject_PrimitiveIndex: + case OpCode::HitObject_HitKind: + case OpCode::HitObject_ShaderTableIndex: return IntegerType::get(Ctx, 32); case OpCode::CalculateLOD: case OpCode::DomainLocation: @@ -6190,6 +6227,14 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::RayQuery_CandidateObjectRayDirection: case OpCode::RayQuery_CommittedObjectRayOrigin: case OpCode::RayQuery_CommittedObjectRayDirection: + case OpCode::HitObject_RayTMin: + case OpCode::HitObject_RayTCurrent: + case OpCode::HitObject_WorldRayOrigin: + case OpCode::HitObject_WorldRayDirection: + case OpCode::HitObject_ObjectRayOrigin: + case OpCode::HitObject_ObjectRayDirection: + case OpCode::HitObject_ObjectToWorld3x4: + case OpCode::HitObject_WorldToObject3x4: return Type::getFloatTy(Ctx); case OpCode::MakeDouble: case OpCode::SplitDouble: @@ -6200,6 +6245,9 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::RayQuery_CommittedTriangleFrontFace: case OpCode::IsHelperLane: case OpCode::QuadVote: + case OpCode::HitObject_IsMiss: + case OpCode::HitObject_IsHit: + case OpCode::HitObject_IsNop: return IntegerType::get(Ctx, 1); case OpCode::CBufferLoadLegacy: case OpCode::Sample: diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll new file mode 100644 index 0000000000..e527125009 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll @@ -0,0 +1,110 @@ +; REQUIRES: dxil-1-9 +; RUN: %dxv %s 2>&1 | FileCheck %s + +; CHECK: Validation succeeded. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.AttribType = type { float, float } +%dx.types.HitObject = type { i8* } + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %attrs = alloca %struct.AttribType, align 4 + %nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() + + %r269 = call i1 @dx.op.hitObject_StateScalar.i1(i32 269, %dx.types.HitObject %nop) ; HitObject_IsMiss(hitObject) + + %r270 = call i1 @dx.op.hitObject_StateScalar.i1(i32 270, %dx.types.HitObject %nop) ; HitObject_IsHit(hitObject) + + %r271 = call i1 @dx.op.hitObject_StateScalar.i1(i32 271, %dx.types.HitObject %nop) ; HitObject_IsNop(hitObject) + + %r272 = call i32 @dx.op.hitObject_StateScalar.i32(i32 272, %dx.types.HitObject %nop) ; HitObject_RayFlags(hitObject) + + %r273 = call float @dx.op.hitObject_StateScalar.f32(i32 273, %dx.types.HitObject %nop) ; HitObject_RayTMin(hitObject) + + %r274 = call float @dx.op.hitObject_StateScalar.f32(i32 274, %dx.types.HitObject %nop) ; HitObject_RayTCurrent(hitObject) + + %r275 = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %nop, i32 0) ; HitObject_WorldRayOrigin(hitObject,component) + + %r276 = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %nop, i32 0) ; HitObject_WorldRayDirection(hitObject,component) + + %r277 = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %nop, i32 0) ; HitObject_ObjectRayOrigin(hitObject,component) + + %r278 = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %nop, i32 0) ; HitObject_ObjectRayDirection(hitObject,component) + + %r279 = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 0, i32 0) ; HitObject_ObjectToWorld3x4(hitObject,row,col) + + %r280 = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 0, i32 0) ; HitObject_WorldToObject3x4(hitObject,row,col) + + %r281 = call i32 @dx.op.hitObject_StateScalar.i32(i32 281, %dx.types.HitObject %nop) ; HitObject_GeometryIndex(hitObject) + + %r282 = call i32 @dx.op.hitObject_StateScalar.i32(i32 282, %dx.types.HitObject %nop) ; HitObject_InstanceIndex(hitObject) + + %r283 = call i32 @dx.op.hitObject_StateScalar.i32(i32 283, %dx.types.HitObject %nop) ; HitObject_InstanceID(hitObject) + + %r284 = call i32 @dx.op.hitObject_StateScalar.i32(i32 284, %dx.types.HitObject %nop) ; HitObject_PrimitiveIndex(hitObject) + + %r285 = call i32 @dx.op.hitObject_StateScalar.i32(i32 285, %dx.types.HitObject %nop) ; HitObject_HitKind(hitObject) + + %r286 = call i32 @dx.op.hitObject_StateScalar.i32(i32 286, %dx.types.HitObject %nop) ; HitObject_ShaderTableIndex(hitObject) + + %r287 = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject %nop, i32 1) ; HitObject_SetShaderTableIndex(hitObject,shaderTableIndex) + + %r288 = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject %nop, i32 42) ; HitObject_LoadLocalRootTableConstant(hitObject,offset) + + call void @dx.op.hitObject_Attributes.struct.AttribType(i32 289, %dx.types.HitObject %nop, %struct.AttribType* nonnull %attrs) ; HitObject_Attributes(hitObject,attributes) + ret void +} + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32, %dx.types.HitObject, i32) #1 + +; Function Attrs: nounwind readnone +declare i1 @dx.op.hitObject_StateScalar.i1(i32, %dx.types.HitObject) #1 + +; Function Attrs: nounwind readnone +declare i32 @dx.op.hitObject_StateScalar.i32(i32, %dx.types.HitObject) #1 + +; Function Attrs: nounwind readonly +declare i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32, %dx.types.HitObject, i32) #2 + +; Function Attrs: nounwind readnone +declare float @dx.op.hitObject_StateVector.f32(i32, %dx.types.HitObject, i32) #1 + +; Function Attrs: nounwind argmemonly +declare void @dx.op.hitObject_Attributes.struct.AttribType(i32, %dx.types.HitObject, %struct.AttribType*) #3 + +; Function Attrs: nounwind readnone +declare float @dx.op.hitObject_StateScalar.f32(i32, %dx.types.HitObject) #1 + +; Function Attrs: nounwind readnone +declare float @dx.op.hitObject_StateMatrix.f32(i32, %dx.types.HitObject, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } +attributes #3 = { nounwind argmemonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.typeAnnotations = !{!2} +!dx.entryPoints = !{!3, !4} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !5} +!3 = !{null, !"", null, null, !6} +!4 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !7} +!5 = !{!8} +!6 = !{i32 0, i64 0} +!7 = !{i32 8, i32 7, i32 5, !9} +!8 = !{i32 1, !10, !10} +!9 = !{i32 0} +!10 = !{} + diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index b3b9c82528..28695a4036 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -851,6 +851,11 @@ def populate_categories_and_models(self): for i in ( "HitObject_MakeMiss,HitObject_MakeNop" + ",HitObject_FromRayQuery,HitObject_FromRayQueryWithAttrs" + + ",HitObject_IsMiss,HitObject_IsHit,HitObject_IsNop" + + ",HitObject_RayFlags,HitObject_RayTMin,HitObject_RayTCurrent,HitObject_GeometryIndex,HitObject_InstanceIndex,HitObject_InstanceID,HitObject_PrimitiveIndex,HitObject_HitKind,HitObject_ShaderTableIndex" + + ",HitObject_WorldRayOrigin,HitObject_WorldRayDirection,HitObject_ObjectRayOrigin,HitObject_ObjectRayDirection" + + ",HitObject_ObjectToWorld3x4,HitObject_WorldToObject3x4" + + ",HitObject_SetShaderTableIndex,HitObject_LoadLocalRootTableConstant,HitObject_Attributes" ).split(","): self.name_idx[i].category = "Shader Execution Reordering" self.name_idx[i].shader_model = 6, 9 @@ -5817,7 +5822,342 @@ def UFI(name, **mappings): ) next_op_idx += 1 - next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 26, 5) + next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 2, 5) + + self.add_dxil_op( + "HitObject_IsMiss", + next_op_idx, + "HitObject_StateScalar", + "Returns `true` if the HitObject represents a miss", + "1", + "rn", + [ + db_dxil_param(0, "i1", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_IsHit", + next_op_idx, + "HitObject_StateScalar", + "Returns `true` if the HitObject is a NOP-HitObject", + "1", + "rn", + [ + db_dxil_param(0, "i1", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_IsNop", + next_op_idx, + "HitObject_StateScalar", + "Returns `true` if the HitObject represents a nop", + "1", + "rn", + [ + db_dxil_param(0, "i1", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_RayFlags", + next_op_idx, + "HitObject_StateScalar", + "Returns the ray flags set in the HitObject", + "i", + "rn", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_RayTMin", + next_op_idx, + "HitObject_StateScalar", + "Returns the TMin value set in the HitObject", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_RayTCurrent", + next_op_idx, + "HitObject_StateScalar", + "Returns the current T value set in the HitObject", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_WorldRayOrigin", + next_op_idx, + "HitObject_StateVector", + "Returns the ray origin in world space", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_WorldRayDirection", + next_op_idx, + "HitObject_StateVector", + "Returns the ray direction in world space", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_ObjectRayOrigin", + next_op_idx, + "HitObject_StateVector", + "Returns the ray origin in object space", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_ObjectRayDirection", + next_op_idx, + "HitObject_StateVector", + "Returns the ray direction in object space", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_ObjectToWorld3x4", + next_op_idx, + "HitObject_StateMatrix", + "Returns the object to world space transformation matrix in 3x4 form", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param( + 3, + "i32", + "row", + "row [0..2], , relative to the element", + is_const=True, + ), + db_dxil_param( + 4, + "i32", + "col", + "column [0..3], relative to the element", + is_const=True, + ), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_WorldToObject3x4", + next_op_idx, + "HitObject_StateMatrix", + "Returns the world to object space transformation matrix in 3x4 form", + "f", + "rn", + [ + db_dxil_param(0, "f", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param( + 3, + "i32", + "row", + "row [0..2], relative to the element", + is_const=True, + ), + db_dxil_param( + 4, + "i32", + "col", + "column [0..3], relative to the element", + is_const=True, + ), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_GeometryIndex", + next_op_idx, + "HitObject_StateScalar", + "Returns the geometry index committed on hit", + "i", + "rn", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_InstanceIndex", + next_op_idx, + "HitObject_StateScalar", + "Returns the instance index committed on hit", + "i", + "rn", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_InstanceID", + next_op_idx, + "HitObject_StateScalar", + "Returns the instance id committed on hit", + "i", + "rn", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_PrimitiveIndex", + next_op_idx, + "HitObject_StateScalar", + "Returns the primitive index committed on hit", + "i", + "rn", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_HitKind", + next_op_idx, + "HitObject_StateScalar", + "Returns the HitKind of the hit", + "i", + "rn", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_ShaderTableIndex", + next_op_idx, + "HitObject_StateScalar", + "Returns the shader table index set for this HitObject", + "i", + "rn", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_SetShaderTableIndex", + next_op_idx, + "HitObject_SetShaderTableIndex", + "Returns a HitObject with updated shader table index", + "v", + "rn", + [ + db_dxil_param( + 0, "hit_object", "hitObject", "hit with shader table index set" + ), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param(3, "i32", "shaderTableIndex", "shader table index"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_LoadLocalRootTableConstant", + next_op_idx, + "HitObject_LoadLocalRootTableConstant", + "Returns the root table constant for this HitObject and offset", + "v", + "ro", + [ + db_dxil_param(0, "i32", "", "operation result"), + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param(3, "i32", "offset", "offset"), + ], + ) + next_op_idx += 1 + + self.add_dxil_op( + "HitObject_Attributes", + next_op_idx, + "HitObject_Attributes", + "Returns the attributes set for this HitObject", + "u", + "amo", + [ + retvoid_param, + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param( + 3, "udt", "attributes", "pointer to store the attributes to" + ), + ], + ) + next_op_idx += 1 + + next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 3, 28) # Reserved block C next_op_idx = self.reserve_dxil_op_range("ReservedC", next_op_idx, 10) From 8280d0fb4104ce9c67af3b9f4a1335760c7c5113 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Mon, 14 Apr 2025 18:36:35 +0200 Subject: [PATCH 80/88] [SER] HitObject_Invoke|TraceRay DXIL opcodes and check-pass test (#7278) Add the DXIL operations and a passing validation test for: - HitObject_TraceRay - HitObject_Invoke DXC SER implementation tracker: #7214 --- include/dxc/DXIL/DxilConstants.h | 10 +- include/dxc/DXIL/DxilInstructions.h | 117 ++++++++++++++++++ lib/DXIL/DxilOperations.cpp | 90 ++++++++------ .../DxilShaderAccessTracking.cpp | 7 +- lib/DxilValidation/DxilValidation.cpp | 23 ++-- .../ser_hitobject_traceinvoke_passing.ll | 68 ++++++++++ utils/hct/hctdb.py | 86 ++++++++++++- 7 files changed, 349 insertions(+), 52 deletions(-) create mode 100644 tools/clang/test/LitDXILValidation/ser_hitobject_traceinvoke_passing.ll diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 723abe552f..e002779d09 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -502,11 +502,9 @@ enum class OpCode : unsigned { ReservedA0 = 259, // reserved ReservedA1 = 260, // reserved ReservedA2 = 261, // reserved - ReservedB0 = 262, // reserved ReservedB28 = 290, // reserved ReservedB29 = 291, // reserved ReservedB30 = 292, // reserved - ReservedB5 = 267, // reserved ReservedB6 = 268, // reserved ReservedC0 = 293, // reserved ReservedC1 = 294, // reserved @@ -903,6 +901,8 @@ enum class OpCode : unsigned { HitObject_HitKind = 285, // Returns the HitKind of the hit HitObject_InstanceID = 283, // Returns the instance id committed on hit HitObject_InstanceIndex = 282, // Returns the instance index committed on hit + HitObject_Invoke = 267, // Represents the invocation of the CH/MS shader + // represented by the HitObject HitObject_IsHit = 270, // Returns `true` if the HitObject is a NOP-HitObject HitObject_IsMiss = 269, // Returns `true` if the HitObject represents a miss HitObject_IsNop = 271, // Returns `true` if the HitObject represents a nop @@ -925,6 +925,8 @@ enum class OpCode : unsigned { 287, // Returns a HitObject with updated shader table index HitObject_ShaderTableIndex = 286, // Returns the shader table index set for this HitObject + HitObject_TraceRay = 262, // Analogous to TraceRay but without invoking CH/MS + // and returns the intermediate state as a HitObject HitObject_WorldRayDirection = 276, // Returns the ray direction in world space HitObject_WorldRayOrigin = 275, // Returns the ray origin in world space HitObject_WorldToObject3x4 = 280, // Returns the world to object space @@ -1308,6 +1310,7 @@ enum class OpCodeClass : unsigned { HitObject_Attributes, HitObject_FromRayQuery, HitObject_FromRayQueryWithAttrs, + HitObject_Invoke, HitObject_LoadLocalRootTableConstant, HitObject_MakeMiss, HitObject_MakeNop, @@ -1315,6 +1318,7 @@ enum class OpCodeClass : unsigned { HitObject_StateMatrix, HitObject_StateScalar, HitObject_StateVector, + HitObject_TraceRay, // Synchronization AtomicBinOp, @@ -1380,7 +1384,7 @@ enum class OpCodeClass : unsigned { NumOpClasses_Dxil_1_7 = 153, NumOpClasses_Dxil_1_8 = 174, - NumOpClasses = 187 // exclusive last value of enumeration + NumOpClasses = 189 // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 2655124c2d..e39f754c68 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -8850,6 +8850,92 @@ struct DxilInst_AllocateRayQuery2 { } }; +/// This instruction Analogous to TraceRay but without invoking CH/MS and +/// returns the intermediate state as a HitObject +struct DxilInst_HitObject_TraceRay { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_TraceRay(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_TraceRay); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (16 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_accelerationStructure = 1, + arg_rayFlags = 2, + arg_instanceInclusionMask = 3, + arg_rayContributionToHitGroupIndex = 4, + arg_multiplierForGeometryContributionToHitGroupIndex = 5, + arg_missShaderIndex = 6, + arg_Origin_X = 7, + arg_Origin_Y = 8, + arg_Origin_Z = 9, + arg_TMin = 10, + arg_Direction_X = 11, + arg_Direction_Y = 12, + arg_Direction_Z = 13, + arg_TMax = 14, + arg_payload = 15, + }; + // Accessors + llvm::Value *get_accelerationStructure() const { + return Instr->getOperand(1); + } + void set_accelerationStructure(llvm::Value *val) { + Instr->setOperand(1, val); + } + llvm::Value *get_rayFlags() const { return Instr->getOperand(2); } + void set_rayFlags(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_instanceInclusionMask() const { + return Instr->getOperand(3); + } + void set_instanceInclusionMask(llvm::Value *val) { + Instr->setOperand(3, val); + } + llvm::Value *get_rayContributionToHitGroupIndex() const { + return Instr->getOperand(4); + } + void set_rayContributionToHitGroupIndex(llvm::Value *val) { + Instr->setOperand(4, val); + } + llvm::Value *get_multiplierForGeometryContributionToHitGroupIndex() const { + return Instr->getOperand(5); + } + void set_multiplierForGeometryContributionToHitGroupIndex(llvm::Value *val) { + Instr->setOperand(5, val); + } + llvm::Value *get_missShaderIndex() const { return Instr->getOperand(6); } + void set_missShaderIndex(llvm::Value *val) { Instr->setOperand(6, val); } + llvm::Value *get_Origin_X() const { return Instr->getOperand(7); } + void set_Origin_X(llvm::Value *val) { Instr->setOperand(7, val); } + llvm::Value *get_Origin_Y() const { return Instr->getOperand(8); } + void set_Origin_Y(llvm::Value *val) { Instr->setOperand(8, val); } + llvm::Value *get_Origin_Z() const { return Instr->getOperand(9); } + void set_Origin_Z(llvm::Value *val) { Instr->setOperand(9, val); } + llvm::Value *get_TMin() const { return Instr->getOperand(10); } + void set_TMin(llvm::Value *val) { Instr->setOperand(10, val); } + llvm::Value *get_Direction_X() const { return Instr->getOperand(11); } + void set_Direction_X(llvm::Value *val) { Instr->setOperand(11, val); } + llvm::Value *get_Direction_Y() const { return Instr->getOperand(12); } + void set_Direction_Y(llvm::Value *val) { Instr->setOperand(12, val); } + llvm::Value *get_Direction_Z() const { return Instr->getOperand(13); } + void set_Direction_Z(llvm::Value *val) { Instr->setOperand(13, val); } + llvm::Value *get_TMax() const { return Instr->getOperand(14); } + void set_TMax(llvm::Value *val) { Instr->setOperand(14, val); } + llvm::Value *get_payload() const { return Instr->getOperand(15); } + void set_payload(llvm::Value *val) { Instr->setOperand(15, val); } +}; + /// This instruction Creates a new HitObject representing a committed hit from a /// RayQuery struct DxilInst_HitObject_FromRayQuery { @@ -8987,6 +9073,37 @@ struct DxilInst_HitObject_MakeNop { bool requiresUniformInputs() const { return false; } }; +/// This instruction Represents the invocation of the CH/MS shader represented +/// by the HitObject +struct DxilInst_HitObject_Invoke { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_HitObject_Invoke(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::HitObject_Invoke); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_payload = 2, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_payload() const { return Instr->getOperand(2); } + void set_payload(llvm::Value *val) { Instr->setOperand(2, val); } +}; + /// This instruction Returns `true` if the HitObject represents a miss struct DxilInst_HitObject_IsMiss { llvm::Instruction *Instr; diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index d9276fc7d6..b837d6e65d 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2303,16 +2303,16 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { 0, {}, {}}, // Overloads: v - {OC::ReservedB0, - "ReservedB0", - OCC::Reserved, - "reserved", - Attribute::None, - 0, - {}, - {}}, // Overloads: v // Shader Execution Reordering + {OC::HitObject_TraceRay, + "HitObject_TraceRay", + OCC::HitObject_TraceRay, + "hitObject_TraceRay", + Attribute::None, + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u {OC::HitObject_FromRayQuery, "HitObject_FromRayQuery", OCC::HitObject_FromRayQuery, @@ -2345,15 +2345,15 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { 0, {}, {}}, // Overloads: v - - {OC::ReservedB5, - "ReservedB5", - OCC::Reserved, - "reserved", + {OC::HitObject_Invoke, + "HitObject_Invoke", + OCC::HitObject_Invoke, + "hitObject_Invoke", Attribute::None, - 0, - {}, - {}}, // Overloads: v + 1, + {{0x100}}, + {{0x0}}}, // Overloads: u + {OC::ReservedB6, "ReservedB6", OCC::Reserved, @@ -3449,19 +3449,20 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, minor = 9; return; } - // Instructions: HitObject_FromRayQuery=263, + // Instructions: HitObject_TraceRay=262, HitObject_FromRayQuery=263, // HitObject_FromRayQueryWithAttrs=264, HitObject_MakeMiss=265, - // HitObject_MakeNop=266, HitObject_IsMiss=269, HitObject_IsHit=270, - // HitObject_IsNop=271, HitObject_RayFlags=272, HitObject_RayTMin=273, - // HitObject_RayTCurrent=274, HitObject_WorldRayOrigin=275, - // HitObject_WorldRayDirection=276, HitObject_ObjectRayOrigin=277, - // HitObject_ObjectRayDirection=278, HitObject_ObjectToWorld3x4=279, - // HitObject_WorldToObject3x4=280, HitObject_GeometryIndex=281, - // HitObject_InstanceIndex=282, HitObject_InstanceID=283, - // HitObject_PrimitiveIndex=284, HitObject_HitKind=285, - // HitObject_ShaderTableIndex=286, HitObject_SetShaderTableIndex=287, + // HitObject_MakeNop=266, HitObject_Invoke=267, HitObject_IsMiss=269, + // HitObject_IsHit=270, HitObject_IsNop=271, HitObject_RayFlags=272, + // HitObject_RayTMin=273, HitObject_RayTCurrent=274, + // HitObject_WorldRayOrigin=275, HitObject_WorldRayDirection=276, + // HitObject_ObjectRayOrigin=277, HitObject_ObjectRayDirection=278, + // HitObject_ObjectToWorld3x4=279, HitObject_WorldToObject3x4=280, + // HitObject_GeometryIndex=281, HitObject_InstanceIndex=282, + // HitObject_InstanceID=283, HitObject_PrimitiveIndex=284, + // HitObject_HitKind=285, HitObject_ShaderTableIndex=286, + // HitObject_SetShaderTableIndex=287, // HitObject_LoadLocalRootTableConstant=288, HitObject_Attributes=289 - if ((263 <= op && op <= 266) || (269 <= op && op <= 289)) { + if ((262 <= op && op <= 267) || (269 <= op && op <= 289)) { major = 6; minor = 9; mask = @@ -5632,12 +5633,27 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pV); A(pI32); break; - case OpCode::ReservedB0: - A(pV); - A(pI32); - break; // Shader Execution Reordering + case OpCode::HitObject_TraceRay: + A(pHit); + A(pI32); + A(pRes); + A(pI32); + A(pI32); + A(pI32); + A(pI32); + A(pI32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(pF32); + A(udt); + break; case OpCode::HitObject_FromRayQuery: A(pHit); A(pI32); @@ -5668,12 +5684,14 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pHit); A(pI32); break; - - // - case OpCode::ReservedB5: + case OpCode::HitObject_Invoke: A(pV); A(pI32); + A(pHit); + A(udt); break; + + // case OpCode::ReservedB6: A(pV); A(pI32); @@ -6007,6 +6025,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::TempRegStore: case OpCode::CallShader: case OpCode::Pack4x8: + case OpCode::HitObject_Invoke: case OpCode::HitObject_Attributes: if (FT->getNumParams() <= 2) return nullptr; @@ -6048,6 +6067,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { return nullptr; return FT->getParamType(5); case OpCode::TraceRay: + case OpCode::HitObject_TraceRay: if (FT->getNumParams() <= 15) return nullptr; return FT->getParamType(15); @@ -6135,11 +6155,9 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::ReservedA0: case OpCode::ReservedA1: case OpCode::ReservedA2: - case OpCode::ReservedB0: case OpCode::HitObject_FromRayQuery: case OpCode::HitObject_MakeMiss: case OpCode::HitObject_MakeNop: - case OpCode::ReservedB5: case OpCode::ReservedB6: case OpCode::HitObject_SetShaderTableIndex: case OpCode::HitObject_LoadLocalRootTableConstant: diff --git a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp index bd96d83965..1dddb6c0e6 100644 --- a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp +++ b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp @@ -905,13 +905,14 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) { case DXIL::OpCode::BufferUpdateCounter: readWrite = ShaderAccessFlags::Counter; break; + case DXIL::OpCode::HitObject_TraceRay: case DXIL::OpCode::TraceRay: { // Read of AccelerationStructure; doesn't match function attribute - auto res = GetResourceFromHandle(Call->getArgOperand(1), DM); - if (res.accessStyle == AccessStyle::None) { + auto Res = GetResourceFromHandle(Call->getArgOperand(1), DM); + if (Res.accessStyle == AccessStyle::None) { continue; } - if (EmitResourceAccess(DM, res, Call, HlslOP, Ctx, + if (EmitResourceAccess(DM, Res, Call, HlslOP, Ctx, ShaderAccessFlags::Read)) { Modified = true; } diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index aa7bb398fa..5ec72e0267 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -1006,6 +1006,15 @@ static bool ValidateStorageMasks(Instruction *I, DXIL::OpCode Opcode, return true; } +static void ValidateASHandle(CallInst *CI, Value *Hdl, + ValidationContext &ValCtx) { + DxilResourceProperties RP = ValCtx.GetResourceFromVal(Hdl); + if (RP.getResourceClass() == DXIL::ResourceClass::Invalid || + RP.getResourceKind() != DXIL::ResourceKind::RTAccelerationStructure) { + ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay); + } +} + static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode, ValidationContext &ValCtx) { switch (Opcode) { @@ -1587,14 +1596,12 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode, case DXIL::OpCode::TraceRay: { DxilInst_TraceRay TraceRay(CI); Value *Hdl = TraceRay.get_AccelerationStructure(); - DxilResourceProperties RP = ValCtx.GetResourceFromVal(Hdl); - if (RP.getResourceClass() == DXIL::ResourceClass::Invalid) { - ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay); - return; - } - if (RP.getResourceKind() != DXIL::ResourceKind::RTAccelerationStructure) { - ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay); - } + ValidateASHandle(CI, Hdl, ValCtx); + } break; + case DXIL::OpCode::HitObject_TraceRay: { + DxilInst_HitObject_TraceRay HOTraceRay(CI); + Value *Hdl = HOTraceRay.get_accelerationStructure(); + ValidateASHandle(CI, Hdl, ValCtx); } break; default: break; diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_traceinvoke_passing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_traceinvoke_passing.ll new file mode 100644 index 0000000000..f3b99300be --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_traceinvoke_passing.ll @@ -0,0 +1,68 @@ +; REQUIRES: dxil-1-9 +; RUN: %dxv %s 2>&1 | FileCheck %s + +; CHECK: Validation succeeded. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%struct.Payload = type { <3 x float> } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.HitObject = type { i8* } +%struct.RaytracingAccelerationStructure = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", align 4 + %2 = alloca %struct.Payload, align 4 + %3 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %4 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 16, i32 0 }) ; AnnotateHandle(res,props) resource: RTAccelerationStructure + %5 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %5, %struct.Payload* nonnull %2) ; HitObject_Invoke(hitObject,payload) + ret void +} + +; Function Attrs: nounwind +declare %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.Payload*) #0 + +; Function Attrs: nounwind +declare void @dx.op.hitObject_Invoke.struct.Payload(i32, %dx.types.HitObject, %struct.Payload*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!3} +!dx.dxrPayloadAnnotations = !{!4} +!dx.entryPoints = !{!5, !6} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{!7, null, null, null} +!3 = !{i32 1, void ()* @"\01?main@@YAXXZ", !8} +!4 = !{i32 0, %struct.Payload undef, !9} +!5 = !{null, !"", null, !2, null} +!6 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !10} +!7 = !{!11} +!8 = !{!12} +!9 = !{!13} +!10 = !{i32 8, i32 7, i32 5, !14} +!11 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !15} +!12 = !{i32 1, !16, !16} +!13 = !{i32 0, i32 8210} +!14 = !{i32 0} +!15 = !{i32 0, i32 4} +!16 = !{} diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 28695a4036..595bad7c1b 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -850,6 +850,7 @@ def populate_categories_and_models(self): self.name_idx[i].shader_model = 6, 8 for i in ( "HitObject_MakeMiss,HitObject_MakeNop" + + ",HitObject_TraceRay,HitObject_Invoke" + ",HitObject_FromRayQuery,HitObject_FromRayQueryWithAttrs" + ",HitObject_IsMiss,HitObject_IsHit,HitObject_IsNop" + ",HitObject_RayFlags,HitObject_RayTMin,HitObject_RayTCurrent,HitObject_GeometryIndex,HitObject_InstanceIndex,HitObject_InstanceID,HitObject_PrimitiveIndex,HitObject_HitKind,HitObject_ShaderTableIndex" @@ -5747,7 +5748,68 @@ def UFI(name, **mappings): next_op_idx = self.reserve_dxil_op_range("ReservedA", next_op_idx, 3) # Shader Execution Reordering - next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 1) + self.add_dxil_op( + "HitObject_TraceRay", + next_op_idx, + "HitObject_TraceRay", + "Analogous to TraceRay but without invoking CH/MS and returns the intermediate state as a HitObject", + "u", + "", + [ + db_dxil_param(0, "hit_object", "", "Resulting HitObject"), + db_dxil_param( + 2, + "res", + "accelerationStructure", + "Top-level acceleration structure to use", + ), + db_dxil_param( + 3, + "i32", + "rayFlags", + "Valid combination of Ray_flags", + ), + db_dxil_param( + 4, + "i32", + "instanceInclusionMask", + "Bottom 8 bits of InstanceInclusionMask are used to include/reject geometry instances based on the InstanceMask in each instance: if(!((InstanceInclusionMask & InstanceMask) & 0xff)) { ignore intersection }", + ), + db_dxil_param( + 5, + "i32", + "rayContributionToHitGroupIndex", + "Offset to add into Addressing calculations within shader tables for hit group indexing. Only the bottom 4 bits of this value are used", + ), + db_dxil_param( + 6, + "i32", + "multiplierForGeometryContributionToHitGroupIndex", + "Stride to multiply by per-geometry GeometryContributionToHitGroupIndex in Addressing calculations within shader tables for hit group indexing. Only the bottom 4 bits of this value are used", + ), + db_dxil_param( + 7, + "i32", + "missShaderIndex", + "Miss shader index in Addressing calculations within shader tables. Only the bottom 16 bits of this value are used", + ), + db_dxil_param(8, "f", "Origin_X", "Origin x of the ray"), + db_dxil_param(9, "f", "Origin_Y", "Origin y of the ray"), + db_dxil_param(10, "f", "Origin_Z", "Origin z of the ray"), + db_dxil_param(11, "f", "TMin", "Tmin of the ray"), + db_dxil_param(12, "f", "Direction_X", "Direction x of the ray"), + db_dxil_param(13, "f", "Direction_Y", "Direction y of the ray"), + db_dxil_param(14, "f", "Direction_Z", "Direction z of the ray"), + db_dxil_param(15, "f", "TMax", "Tmax of the ray"), + db_dxil_param( + 16, + "udt", + "payload", + "User-defined payload structure", + ), + ], + ) + next_op_idx += 1 self.add_dxil_op( "HitObject_FromRayQuery", @@ -5822,7 +5884,27 @@ def UFI(name, **mappings): ) next_op_idx += 1 - next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 2, 5) + self.add_dxil_op( + "HitObject_Invoke", + next_op_idx, + "HitObject_Invoke", + "Represents the invocation of the CH/MS shader represented by the HitObject", + "u", + "", + [ + retvoid_param, + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param( + 3, + "udt", + "payload", + "User-defined payload structure", + ), + ], + ) + next_op_idx += 1 + + next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 1, 6) self.add_dxil_op( "HitObject_IsMiss", From b5a9cd59df273cd684b5db82acec88e90f87893b Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Mon, 14 Apr 2025 23:04:15 +0200 Subject: [PATCH 81/88] [SER] MaybeReorderThread DXIL opcode and validation (#7256) - DXIL opcodes for MaybeReorderThread - Validator rules - DXV validation test (passing & expected failures) Specification: https://github.com/microsoft/hlsl-specs/blob/main/proposals/0027-shader-execution-reordering.md DXC SER implementation tracker: #7214 --- include/dxc/DXIL/DxilConstants.h | 5 +- include/dxc/DXIL/DxilInstructions.h | 37 ++++++++++++ lib/DXIL/DxilOperations.cpp | 29 +++++---- lib/DxilValidation/DxilValidation.cpp | 24 ++++++++ .../ser_maybereorder_failing.ll | 60 +++++++++++++++++++ .../ser_maybereorder_passing.ll | 46 ++++++++++++++ utils/hct/hctdb.py | 38 +++++++++++- 7 files changed, 223 insertions(+), 16 deletions(-) create mode 100644 tools/clang/test/LitDXILValidation/ser_maybereorder_failing.ll create mode 100644 tools/clang/test/LitDXILValidation/ser_maybereorder_passing.ll diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index e002779d09..8c73328fbd 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -505,7 +505,6 @@ enum class OpCode : unsigned { ReservedB28 = 290, // reserved ReservedB29 = 291, // reserved ReservedB30 = 292, // reserved - ReservedB6 = 268, // reserved ReservedC0 = 293, // reserved ReservedC1 = 294, // reserved ReservedC2 = 295, // reserved @@ -931,6 +930,7 @@ enum class OpCode : unsigned { HitObject_WorldRayOrigin = 275, // Returns the ray origin in world space HitObject_WorldToObject3x4 = 280, // Returns the world to object space // transformation matrix in 3x4 form + MaybeReorderThread = 268, // Reorders the current thread // Synchronization AtomicBinOp = 78, // performs an atomic operation on two operands @@ -1319,6 +1319,7 @@ enum class OpCodeClass : unsigned { HitObject_StateScalar, HitObject_StateVector, HitObject_TraceRay, + MaybeReorderThread, // Synchronization AtomicBinOp, @@ -1384,7 +1385,7 @@ enum class OpCodeClass : unsigned { NumOpClasses_Dxil_1_7 = 153, NumOpClasses_Dxil_1_8 = 174, - NumOpClasses = 189 // exclusive last value of enumeration + NumOpClasses = 190 // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index e39f754c68..a99c5360d4 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -9104,6 +9104,43 @@ struct DxilInst_HitObject_Invoke { void set_payload(llvm::Value *val) { Instr->setOperand(2, val); } }; +/// This instruction Reorders the current thread +struct DxilInst_MaybeReorderThread { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_MaybeReorderThread(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::MaybeReorderThread); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_hitObject = 1, + arg_coherenceHint = 2, + arg_numCoherenceHintBitsFromLSB = 3, + }; + // Accessors + llvm::Value *get_hitObject() const { return Instr->getOperand(1); } + void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_coherenceHint() const { return Instr->getOperand(2); } + void set_coherenceHint(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_numCoherenceHintBitsFromLSB() const { + return Instr->getOperand(3); + } + void set_numCoherenceHintBitsFromLSB(llvm::Value *val) { + Instr->setOperand(3, val); + } +}; + /// This instruction Returns `true` if the HitObject represents a miss struct DxilInst_HitObject_IsMiss { llvm::Instruction *Instr; diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index b837d6e65d..f614ba9d14 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2353,17 +2353,14 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { 1, {{0x100}}, {{0x0}}}, // Overloads: u - - {OC::ReservedB6, - "ReservedB6", - OCC::Reserved, - "reserved", + {OC::MaybeReorderThread, + "MaybeReorderThread", + OCC::MaybeReorderThread, + "maybeReorderThread", Attribute::None, 0, {}, {}}, // Overloads: v - - // Shader Execution Reordering {OC::HitObject_IsMiss, "HitObject_IsMiss", OCC::HitObject_StateScalar, @@ -3449,6 +3446,13 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, minor = 9; return; } + // Instructions: MaybeReorderThread=268 + if (op == 268) { + major = 6; + minor = 9; + mask = SFLAG(Library) | SFLAG(RayGeneration); + return; + } // Instructions: HitObject_TraceRay=262, HitObject_FromRayQuery=263, // HitObject_FromRayQueryWithAttrs=264, HitObject_MakeMiss=265, // HitObject_MakeNop=266, HitObject_Invoke=267, HitObject_IsMiss=269, @@ -5690,14 +5694,13 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pHit); A(udt); break; - - // - case OpCode::ReservedB6: + case OpCode::MaybeReorderThread: A(pV); A(pI32); + A(pHit); + A(pI32); + A(pI32); break; - - // Shader Execution Reordering case OpCode::HitObject_IsMiss: A(pI1); A(pI32); @@ -6158,7 +6161,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::HitObject_FromRayQuery: case OpCode::HitObject_MakeMiss: case OpCode::HitObject_MakeNop: - case OpCode::ReservedB6: + case OpCode::MaybeReorderThread: case OpCode::HitObject_SetShaderTableIndex: case OpCode::HitObject_LoadLocalRootTableConstant: case OpCode::ReservedB28: diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 5ec72e0267..00a6b9ae14 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -1886,6 +1886,30 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, {"CreateHandleForLib", "Library"}); } break; + + // Shader Execution Reordering + case DXIL::OpCode::MaybeReorderThread: { + Value *HitObject = CI->getArgOperand(1); + Value *CoherenceHintBits = CI->getArgOperand(2); + Value *NumCoherenceHintBits = CI->getArgOperand(3); + + if (isa(HitObject)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject); + + if (isa(NumCoherenceHintBits)) + ValCtx.EmitInstrError( + CI, ValidationRule::InstrMayReorderThreadUndefCoherenceHintParam); + + ConstantInt *NumCoherenceHintBitsConst = + dyn_cast(NumCoherenceHintBits); + const bool HasCoherenceHint = + NumCoherenceHintBitsConst && + NumCoherenceHintBitsConst->getLimitedValue() != 0; + if (HasCoherenceHint && isa(CoherenceHintBits)) + ValCtx.EmitInstrError( + CI, ValidationRule::InstrMayReorderThreadUndefCoherenceHintParam); + } break; + case DXIL::OpCode::AtomicBinOp: case DXIL::OpCode::AtomicCompareExchange: { Type *pOverloadType = OP::GetOverloadType(Opcode, CI->getCalledFunction()); diff --git a/tools/clang/test/LitDXILValidation/ser_maybereorder_failing.ll b/tools/clang/test/LitDXILValidation/ser_maybereorder_failing.ll new file mode 100644 index 0000000000..4502b9241d --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_maybereorder_failing.ll @@ -0,0 +1,60 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.HitObject = type { i8* } + +; CHECK: Function: ?main@@YAXXZ: error: Use of undef coherence hint or num coherence hint bits in MaybeReorderThread. +; CHECK-NEXT: note: at 'call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 1, i32 undef)' + +; CHECK: Function: ?main@@YAXXZ: error: Use of undef coherence hint or num coherence hint bits in MaybeReorderThread. +; CHECK-NEXT: note: at 'call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 undef, i32 1)' + +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK-NEXT: note: at 'call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject undef, i32 11, i32 0)' + +; CHECK: Validation failed. + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() + + ; Validate that hit object is not undef. + call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject undef, i32 11, i32 0) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) + + ; Validate that coherence hint is not undef while numCoherenceHintBitsFromLSB is not 0. + call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 undef, i32 1) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) + + ; Validate that num coherence hint bits from LSB is not undef. + call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 1, i32 undef) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) + ret void +} + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1 + +; Function Attrs: nounwind +declare void @dx.op.maybeReorderThread(i32, %dx.types.HitObject, i32, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.typeAnnotations = !{!2} +!dx.entryPoints = !{!6, !8} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !3} +!3 = !{!4} +!4 = !{i32 1, !5, !5} +!5 = !{} +!6 = !{null, !"", null, null, !7} +!7 = !{i32 0, i64 0} +!8 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !9} +!9 = !{i32 8, i32 7, i32 5, !10} +!10 = !{i32 0} diff --git a/tools/clang/test/LitDXILValidation/ser_maybereorder_passing.ll b/tools/clang/test/LitDXILValidation/ser_maybereorder_passing.ll new file mode 100644 index 0000000000..8ee7677bd4 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_maybereorder_passing.ll @@ -0,0 +1,46 @@ +; REQUIRES: dxil-1-9 +; RUN: %dxv %s | FileCheck %s + +; CHECK: Validation succeeded. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.HitObject = type { i8* } + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() + call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 241, i32 3) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) + + ; Coherence hint disabled, accept 'undef' coherence hint bits. + call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 undef, i32 0) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) + ret void +} + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1 + +; Function Attrs: nounwind +declare void @dx.op.maybeReorderThread(i32, %dx.types.HitObject, i32, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.typeAnnotations = !{!2} +!dx.entryPoints = !{!6, !8} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !3} +!3 = !{!4} +!4 = !{i32 1, !5, !5} +!5 = !{} +!6 = !{null, !"", null, null, !7} +!7 = !{i32 0, i64 0} +!8 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !9} +!9 = !{i32 8, i32 7, i32 5, !10} +!10 = !{i32 0} diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 595bad7c1b..9b2f33727a 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -866,6 +866,13 @@ def populate_categories_and_models(self): "closesthit", "miss", ) + for i in ("MaybeReorderThread").split(","): + self.name_idx[i].category = "Shader Execution Reordering" + self.name_idx[i].shader_model = 6, 9 + self.name_idx[i].shader_stages = ( + "library", + "raygeneration", + ) def populate_llvm_instructions(self): # Add instructions that map to LLVM instructions. @@ -5904,7 +5911,26 @@ def UFI(name, **mappings): ) next_op_idx += 1 - next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 1, 6) + self.add_dxil_op( + "MaybeReorderThread", + next_op_idx, + "MaybeReorderThread", + "Reorders the current thread", + "v", + "", + [ + retvoid_param, + db_dxil_param(2, "hit_object", "hitObject", "hit"), + db_dxil_param(3, "i32", "coherenceHint", "Coherence hint"), + db_dxil_param( + 4, + "i32", + "numCoherenceHintBitsFromLSB", + "Num coherence hint bits from LSB", + ), + ], + ) + next_op_idx += 1 self.add_dxil_op( "HitObject_IsMiss", @@ -8267,6 +8293,16 @@ def build_valrules(self): "Invalid use of completed record handle.", ) + # Shader Execution Reordering + self.add_valrule( + "Instr.UndefHitObject", + "HitObject is undef.", + ) + self.add_valrule( + "Instr.MayReorderThreadUndefCoherenceHintParam", + "Use of undef coherence hint or num coherence hint bits in MaybeReorderThread.", + ) + # Some legacy rules: # - space is only supported for shader targets 5.1 and higher # - multiple rules regarding derivatives, which isn't a supported feature for DXIL From 47e11af022d4ed41ac87348f822ea8804b55523a Mon Sep 17 00:00:00 2001 From: Dan Brown <61992655+danbrown-amd@users.noreply.github.com> Date: Mon, 14 Apr 2025 15:20:13 -0600 Subject: [PATCH 82/88] [spirv] Handles rvalue as implicit object argument of vk::BufferPointer::Get(). (#7313) [spirv] Handles rvalue as implicit object argument of vk::BufferPointer::Get(). Fixes #7302. --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 6 ++++ .../vk.buffer-pointer.rvalue.hlsl | 35 +++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index eed4f6369f..cd5f860555 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -10932,6 +10932,12 @@ SpirvInstruction *SpirvEmitter::processIntrinsicGetBufferContents( SpirvInstruction *bufferPointer = doExpr(obj); if (!bufferPointer) return nullptr; + if (bufferPointer->isRValue()) { + bufferPointer->setRValue(false); + bufferPointer->setStorageClass(spv::StorageClass::PhysicalStorageBuffer); + return bufferPointer; + } + unsigned align = hlsl::GetVKBufferPointerAlignment(obj->getType()); lowerTypeVisitor.visitInstruction(bufferPointer); diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl new file mode 100644 index 0000000000..930770cc16 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl @@ -0,0 +1,35 @@ +// RUN: %dxc -spirv -HV 202x -Od -T cs_6_9 %s | FileCheck %s + +// Issue #7302: implicit object argument of Get() evaluates to rvalue + +template +[[vk::ext_instruction(/*spv::OpBitcast*/124)]] +T bitcast(U); + +struct Content +{ + int a; +}; + +// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[I1:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 1 +// CHECK-DAG: [[IO:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0 +// CHECK: [[UINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[UDEADBEEF:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 3735928559 +// CHECK-DAG: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0 +// CHECK: [[V2UINT:%[_0-9A-Za-z]*]] = OpTypeVector [[UINT]] 2 +// CHECK: [[VECTOR:%[_0-9A-Za-z]*]] = OpConstantComposite [[V2UINT]] [[UDEADBEEF]] [[U0]] +// CHECK: [[CONTENT:%[_0-9A-Za-z]*]] = OpTypeStruct [[INT]] +// CHECK: [[PPCONTENT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[CONTENT]] +// CHECK: [[PPINT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[INT]] + +[numthreads(1, 1, 1)] +void main() +{ + bitcast >(uint32_t2(0xdeadbeefu,0x0u)).Get().a = 1; +} + +// CHECK: [[BITCAST:%[0-9]*]] = OpBitcast [[PPCONTENT]] [[VECTOR]] +// CHECK: [[PTR:%[0-9]*]] = OpAccessChain [[PPINT]] [[BITCAST]] [[IO]] +// CHECK: OpStore [[PTR]] [[I1]] Aligned 4 + From 30a757960b6d8ff792a59638ed826606e5675409 Mon Sep 17 00:00:00 2001 From: Dan Brown <61992655+danbrown-amd@users.noreply.github.com> Date: Tue, 15 Apr 2025 09:29:20 -0600 Subject: [PATCH 83/88] [spirv] Fixes vk::BufferPointer constructor expression construction. (#7331) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Constructors are now properly attached to the template class declaration instead of a specialization. Closes #6489 (again). --------- Co-authored-by: Nathan Gauër --- tools/clang/lib/AST/ASTContextHLSL.cpp | 16 +++-- tools/clang/lib/Sema/SemaExprCXX.cpp | 63 +++++++++++++------ .../vk.buffer-pointer.from-uint.hlsl | 46 ++++++++++++++ 3 files changed, 102 insertions(+), 23 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index 2c3c20546f..0a688c03fa 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -1390,19 +1390,27 @@ CXXRecordDecl *hlsl::DeclareVkBufferPointerType(ASTContext &context, DeclarationName(&context.Idents.get("Get")), true); CanQualType canQualType = recordDecl->getTypeForDecl()->getCanonicalTypeUnqualified(); - CreateConstructorDeclarationWithParams( + auto *copyConstructorDecl = CreateConstructorDeclarationWithParams( context, recordDecl, context.VoidTy, {context.getRValueReferenceType(canQualType)}, {"bufferPointer"}, - context.DeclarationNames.getCXXConstructorName(canQualType), false); - CreateConstructorDeclarationWithParams( + context.DeclarationNames.getCXXConstructorName(canQualType), false, true); + auto *addressConstructorDecl = CreateConstructorDeclarationWithParams( context, recordDecl, context.VoidTy, {context.UnsignedIntTy}, {"address"}, - context.DeclarationNames.getCXXConstructorName(canQualType), false); + context.DeclarationNames.getCXXConstructorName(canQualType), false, true); + hlsl::CreateFunctionTemplateDecl( + context, recordDecl, copyConstructorDecl, + Builder.getTemplateDecl()->getTemplateParameters()->begin(), 2); + hlsl::CreateFunctionTemplateDecl( + context, recordDecl, addressConstructorDecl, + Builder.getTemplateDecl()->getTemplateParameters()->begin(), 2); StringRef OpcodeGroup = GetHLOpcodeGroupName(HLOpcodeGroup::HLIntrinsic); unsigned Opcode = static_cast(IntrinsicOp::MOP_GetBufferContents); methodDecl->addAttr( HLSLIntrinsicAttr::CreateImplicit(context, OpcodeGroup, "", Opcode)); methodDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context)); + copyConstructorDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context)); + addressConstructorDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context)); return Builder.completeDefinition(); } diff --git a/tools/clang/lib/Sema/SemaExprCXX.cpp b/tools/clang/lib/Sema/SemaExprCXX.cpp index 4723bc93e9..5113c56205 100644 --- a/tools/clang/lib/Sema/SemaExprCXX.cpp +++ b/tools/clang/lib/Sema/SemaExprCXX.cpp @@ -1057,26 +1057,51 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo, Expr *Arg = Exprs[0]; #ifdef ENABLE_SPIRV_CODEGEN if (hlsl::IsVKBufferPointerType(Ty) && Arg->getType()->isIntegerType()) { - for (auto *ctor : Ty->getAsCXXRecordDecl()->ctors()) { - if (auto *functionType = ctor->getType()->getAs()) { - if (functionType->getNumParams() != 1 || - !functionType->getParamType(0)->isIntegerType()) - continue; - - CanQualType argType = Arg->getType()->getCanonicalTypeUnqualified(); - if (!Arg->isRValue()) { - Arg = ImpCastExprToType(Arg, argType, CK_LValueToRValue).get(); - } - if (argType != Context.UnsignedLongLongTy) { - Arg = ImpCastExprToType(Arg, Context.UnsignedLongLongTy, - CK_IntegralCast) - .get(); - } - return CXXConstructExpr::Create( - Context, Ty, TyBeginLoc, ctor, false, {Arg}, false, false, false, - false, CXXConstructExpr::ConstructionKind::CK_Complete, - SourceRange(LParenLoc, RParenLoc)); + typedef DeclContext::specific_decl_iterator ft_iter; + auto *recordDecl = Ty->getAsCXXRecordDecl(); + auto *specDecl = cast(recordDecl); + auto *templatedDecl = + specDecl->getSpecializedTemplate()->getTemplatedDecl(); + auto functionTemplateDecls = + llvm::iterator_range(ft_iter(templatedDecl->decls_begin()), + ft_iter(templatedDecl->decls_end())); + for (auto *ftd : functionTemplateDecls) { + auto *fd = ftd->getTemplatedDecl(); + if (fd->getNumParams() != 1 || + !fd->getParamDecl(0)->getType()->isIntegerType()) + continue; + + void *insertPos; + auto templateArgs = ftd->getInjectedTemplateArgs(); + auto *functionDecl = ftd->findSpecialization(templateArgs, insertPos); + if (!functionDecl) { + DeclarationNameInfo DInfo(ftd->getDeclName(), + recordDecl->getLocation()); + auto *templateArgList = TemplateArgumentList::CreateCopy( + Context, templateArgs.data(), templateArgs.size()); + functionDecl = CXXConstructorDecl::Create( + Context, recordDecl, Arg->getLocStart(), DInfo, Ty, TInfo, false, + false, false, false); + functionDecl->setFunctionTemplateSpecialization(ftd, templateArgList, + insertPos); + } else if (functionDecl->getDeclKind() != Decl::Kind::CXXConstructor) { + continue; + } + + CanQualType argType = Arg->getType()->getCanonicalTypeUnqualified(); + if (!Arg->isRValue()) { + Arg = ImpCastExprToType(Arg, argType, CK_LValueToRValue).get(); + } + if (argType != Context.UnsignedLongLongTy) { + Arg = ImpCastExprToType(Arg, Context.UnsignedLongLongTy, + CK_IntegralCast) + .get(); } + return CXXConstructExpr::Create( + Context, Ty, TyBeginLoc, cast(functionDecl), + false, {Arg}, false, false, false, false, + CXXConstructExpr::ConstructionKind::CK_Complete, + SourceRange(LParenLoc, RParenLoc)); } } #endif diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl new file mode 100644 index 0000000000..b44e1eca09 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl @@ -0,0 +1,46 @@ +// RUN: %dxc -spirv -Od -T cs_6_7 %s | FileCheck %s +// RUN: %dxc -spirv -Od -T cs_6_7 -DALIGN_16 %s | FileCheck %s +// RUN: %dxc -spirv -Od -T cs_6_7 -DNO_PC %s | FileCheck %s + +// Was getting bogus type errors with the defined changes + +#ifdef ALIGN_16 +typedef vk::BufferPointer BufferType; +#else +typedef vk::BufferPointer BufferType; +#endif +#ifndef NO_PC +struct PushConstantStruct { + BufferType push_buffer; +}; +[[vk::push_constant]] PushConstantStruct push_constant; +#endif + +RWStructuredBuffer output; + +// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1 +// CHECK: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0 +// CHECK: [[UINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 0 +// CHECK: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0 +// CHECK: [[PPUINT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[UINT]] +// CHECK: [[PFPPUINT:%[_0-9A-Za-z]*]] = OpTypePointer Function [[PPUINT]] +// CHECK: [[PUUINT:%[_0-9A-Za-z]*]] = OpTypePointer Uniform [[UINT]] +// CHECK: [[OUTPUT:%[_0-9A-Za-z]*]] = OpVariable %{{[_0-9A-Za-z]*}} Uniform + +[numthreads(1, 1, 1)] +void main() { + uint64_t addr = 123; + vk::BufferPointer test = vk::BufferPointer(addr); + output[0] = test.Get(); +} + +// CHECK: [[TEST:%[_0-9A-Za-z]*]] = OpVariable [[PFPPUINT]] Function +// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpConvertUToPtr [[PPUINT]] +// CHECK: OpStore [[TEST]] [[X1]] +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PPUINT]] [[TEST]] Aligned 32 +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[UINT]] [[X2]] Aligned 4 +// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpAccessChain [[PUUINT]] [[OUTPUT]] [[I0]] [[U0]] +// CHECK: OpStore [[X4]] [[X3]] +// CHECK: OpReturn +// CHECK: OpFunctionEnd + From ea3d8466d807fccbee1a3dc16d4b15bafd12d4fe Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Tue, 15 Apr 2025 22:30:11 +0200 Subject: [PATCH 84/88] [SER] Declare all SER HLSL intrinsics (#7347) Simplify merging the SER lowering PRs by declaring all missing SER HLSL intrinsics up front. This reserves stable HLSL opcodes similar to what was done for the DXIL opcodes before. Specification: https://github.com/microsoft/hlsl-specs/blob/main/proposals/0027-shader-execution-reordering.md DXC SER implementation tracker: #7214 --- include/dxc/HlslIntrinsicOp.h | 29 +++++- include/dxc/dxcapi.internal.h | 7 +- lib/HLSL/HLOperationLower.cpp | 128 ++++++++++++++++++++++++++ tools/clang/lib/Sema/SemaHLSL.cpp | 10 +- utils/hct/gen_intrin_main.txt | 28 ++++++ utils/hct/hctdb.py | 1 + utils/hct/hlsl_intrinsic_opcodes.json | 31 ++++++- 7 files changed, 225 insertions(+), 9 deletions(-) diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index 68b88822e8..d37c27a38e 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -336,7 +336,34 @@ enum class IntrinsicOp { MOP_TraceRayInline = 325, MOP_WorldRayDirection = 326, MOP_WorldRayOrigin = 327, + MOP_DxHitObject_FromRayQuery = 363, + MOP_DxHitObject_GetAttributes = 364, + MOP_DxHitObject_GetGeometryIndex = 365, + MOP_DxHitObject_GetHitKind = 366, + MOP_DxHitObject_GetInstanceID = 367, + MOP_DxHitObject_GetInstanceIndex = 368, + MOP_DxHitObject_GetObjectRayDirection = 369, + MOP_DxHitObject_GetObjectRayOrigin = 370, + MOP_DxHitObject_GetObjectToWorld3x4 = 371, + MOP_DxHitObject_GetObjectToWorld4x3 = 372, + MOP_DxHitObject_GetPrimitiveIndex = 373, + MOP_DxHitObject_GetRayFlags = 374, + MOP_DxHitObject_GetRayTCurrent = 375, + MOP_DxHitObject_GetRayTMin = 376, + MOP_DxHitObject_GetShaderTableIndex = 377, + MOP_DxHitObject_GetWorldRayDirection = 378, + MOP_DxHitObject_GetWorldRayOrigin = 379, + MOP_DxHitObject_GetWorldToObject3x4 = 380, + MOP_DxHitObject_GetWorldToObject4x3 = 381, + MOP_DxHitObject_Invoke = 382, + MOP_DxHitObject_IsHit = 383, + MOP_DxHitObject_IsMiss = 384, + MOP_DxHitObject_IsNop = 385, + MOP_DxHitObject_LoadLocalRootTableConstant = 386, + MOP_DxHitObject_MakeMiss = 387, MOP_DxHitObject_MakeNop = 358, + MOP_DxHitObject_SetShaderTableIndex = 388, + MOP_DxHitObject_TraceRay = 389, IOP_DxMaybeReorderThread = 359, MOP_Count = 328, MOP_FinishedCrossGroupSharing = 329, @@ -369,7 +396,7 @@ enum class IntrinsicOp { IOP_usign = 355, MOP_InterlockedUMax = 356, MOP_InterlockedUMin = 357, - Num_Intrinsics = 363, + Num_Intrinsics = 390, }; inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) { switch (opcode) { diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h index d37054194b..28bd3e7066 100644 --- a/include/dxc/dxcapi.internal.h +++ b/include/dxc/dxcapi.internal.h @@ -131,12 +131,13 @@ enum LEGAL_INTRINSIC_COMPTYPES { LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS = 50, LICOMPTYPE_HIT_OBJECT = 51, + LICOMPTYPE_RAY_QUERY = 52, #ifdef ENABLE_SPIRV_CODEGEN - LICOMPTYPE_VK_BUFFER_POINTER = 52, - LICOMPTYPE_COUNT = 53 + LICOMPTYPE_VK_BUFFER_POINTER = 53, + LICOMPTYPE_COUNT = 54 #else - LICOMPTYPE_COUNT = 52 + LICOMPTYPE_COUNT = 53 #endif }; diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index c0f9d7fddd..b5114fa34b 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -6197,6 +6197,77 @@ Value *TranslateMaybeReorderThread(CallInst *CI, IntrinsicOp IOP, bool &Translated) { return nullptr; // TODO: Merge SER DXIL patches } + +Value *TranslateHitObjectFromRayQuery(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectInvoke(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return nullptr; // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectGetAttributes(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectScalarGetter(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectVectorGetter(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectMatrixGetter(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectLoadLocalRootTableConstant( + CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + +Value *TranslateHitObjectSetShaderTableIndex( + CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches +} + } // namespace // Resource Handle. @@ -6908,6 +6979,63 @@ IntrinsicLower gLowerTable[] = { DXIL::OpCode::NumOpCodes}, {IntrinsicOp::MOP_GetBufferContents, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes}, + {IntrinsicOp::MOP_DxHitObject_FromRayQuery, TranslateHitObjectFromRayQuery, + DXIL::OpCode::HitObject_FromRayQuery}, + {IntrinsicOp::MOP_DxHitObject_GetAttributes, + TranslateHitObjectGetAttributes, DXIL::OpCode::HitObject_Attributes}, + {IntrinsicOp::MOP_DxHitObject_GetGeometryIndex, + TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_GeometryIndex}, + {IntrinsicOp::MOP_DxHitObject_GetHitKind, TranslateHitObjectScalarGetter, + DXIL::OpCode::HitObject_HitKind}, + {IntrinsicOp::MOP_DxHitObject_GetInstanceID, TranslateHitObjectScalarGetter, + DXIL::OpCode::HitObject_InstanceID}, + {IntrinsicOp::MOP_DxHitObject_GetInstanceIndex, + TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_InstanceIndex}, + {IntrinsicOp::MOP_DxHitObject_GetObjectRayDirection, + TranslateHitObjectVectorGetter, + DXIL::OpCode::HitObject_ObjectRayDirection}, + {IntrinsicOp::MOP_DxHitObject_GetObjectRayOrigin, + TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_ObjectRayOrigin}, + {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld3x4, + TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4}, + {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld4x3, + TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4}, + {IntrinsicOp::MOP_DxHitObject_GetPrimitiveIndex, + TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_PrimitiveIndex}, + {IntrinsicOp::MOP_DxHitObject_GetRayFlags, TranslateHitObjectScalarGetter, + DXIL::OpCode::HitObject_RayFlags}, + {IntrinsicOp::MOP_DxHitObject_GetRayTCurrent, + TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_RayTCurrent}, + {IntrinsicOp::MOP_DxHitObject_GetRayTMin, TranslateHitObjectScalarGetter, + DXIL::OpCode::HitObject_RayTMin}, + {IntrinsicOp::MOP_DxHitObject_GetShaderTableIndex, + TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_ShaderTableIndex}, + {IntrinsicOp::MOP_DxHitObject_GetWorldRayDirection, + TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayDirection}, + {IntrinsicOp::MOP_DxHitObject_GetWorldRayOrigin, + TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayOrigin}, + {IntrinsicOp::MOP_DxHitObject_GetWorldToObject3x4, + TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4}, + {IntrinsicOp::MOP_DxHitObject_GetWorldToObject4x3, + TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4}, + {IntrinsicOp::MOP_DxHitObject_Invoke, TranslateHitObjectInvoke, + DXIL::OpCode::HitObject_Invoke}, + {IntrinsicOp::MOP_DxHitObject_IsHit, TranslateHitObjectScalarGetter, + DXIL::OpCode::HitObject_IsHit}, + {IntrinsicOp::MOP_DxHitObject_IsMiss, TranslateHitObjectScalarGetter, + DXIL::OpCode::HitObject_IsMiss}, + {IntrinsicOp::MOP_DxHitObject_IsNop, TranslateHitObjectScalarGetter, + DXIL::OpCode::HitObject_IsNop}, + {IntrinsicOp::MOP_DxHitObject_LoadLocalRootTableConstant, + TranslateHitObjectLoadLocalRootTableConstant, + DXIL::OpCode::HitObject_LoadLocalRootTableConstant}, + {IntrinsicOp::MOP_DxHitObject_MakeMiss, TranslateHitObjectMake, + DXIL::OpCode::HitObject_MakeMiss}, + {IntrinsicOp::MOP_DxHitObject_SetShaderTableIndex, + TranslateHitObjectSetShaderTableIndex, + DXIL::OpCode::HitObject_SetShaderTableIndex}, + {IntrinsicOp::MOP_DxHitObject_TraceRay, TranslateHitObjectTraceRay, + DXIL::OpCode::HitObject_TraceRay}, }; } // namespace static_assert( diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 5236a1e3c4..230c7e65d9 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -580,9 +580,9 @@ const UINT g_uBasicKindProps[] = { 0, // AR_OBJECT_PROCEDURAL_PRIMITIVE_HIT_GROUP, 0, // AR_OBJECT_RAYTRACING_PIPELINE_CONFIG1, - BPROP_OBJECT, // AR_OBJECT_RAY_QUERY, - BPROP_OBJECT, // AR_OBJECT_HEAP_RESOURCE, - BPROP_OBJECT, // AR_OBJECT_HEAP_SAMPLER, + LICOMPTYPE_RAY_QUERY, // AR_OBJECT_RAY_QUERY, + BPROP_OBJECT, // AR_OBJECT_HEAP_RESOURCE, + BPROP_OBJECT, // AR_OBJECT_HEAP_SAMPLER, BPROP_OBJECT | BPROP_RWBUFFER | BPROP_TEXTURE, // AR_OBJECT_RWTEXTURE2DMS BPROP_OBJECT | BPROP_RWBUFFER | @@ -1135,6 +1135,9 @@ static const ArBasicKind g_ResourceCT[] = {AR_OBJECT_HEAP_RESOURCE, static const ArBasicKind g_RayDescCT[] = {AR_OBJECT_RAY_DESC, AR_BASIC_UNKNOWN}; +static const ArBasicKind g_RayQueryCT[] = {AR_OBJECT_RAY_QUERY, + AR_BASIC_UNKNOWN}; + static const ArBasicKind g_AccelerationStructCT[] = { AR_OBJECT_ACCELERATION_STRUCT, AR_BASIC_UNKNOWN}; @@ -1297,6 +1300,7 @@ const ArBasicKind *g_LegalIntrinsicCompTypes[] = { g_GroupNodeOutputRecordsCT, // LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS g_DxHitObjectCT, // LICOMPTYPE_HIT_OBJECT + g_RayQueryCT, // LICOMPTYPE_RAY_QUERY #ifdef ENABLE_SPIRV_CODEGEN g_VKBufferPointerCT, // LICOMPTYPE_VK_BUFFER_POINTER #endif diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 55c3643d95..f1274fd308 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -1101,6 +1101,34 @@ uint [[ro]] CommittedInstanceContributionToHitGroupIndex(); // Shader Execution Reordering namespace DxHitObjectMethods { DxHitObject [[static,class_prefix,min_sm=6.9]] MakeNop(); + DxHitObject [[static,class_prefix,min_sm=6.9]] MakeMiss(in uint RayFlags, in uint MissShaderIndex, in ray_desc Ray); + DxHitObject [[static,class_prefix,min_sm=6.9]] FromRayQuery(in RayQuery rq); + DxHitObject [[static,class_prefix,min_sm=6.9]] FromRayQuery(in RayQuery rq, in uint HitKind, in udt Attributes); + DxHitObject [[static,class_prefix,min_sm=6.9]] TraceRay(in acceleration_struct AccelerationStructure, in uint RayFlags, in uint InstanceInclusionMask, in uint RayContributionToHitGroupIndex, in uint MultiplierForGeometryContributionToHitGroupIndex, in uint MissShaderIndex, in ray_desc Ray, inout udt Payload); + void [[static,class_prefix,min_sm=6.9]] Invoke(in DxHitObject ho, inout udt Payload); + bool [[rn,class_prefix,min_sm=6.9]] IsMiss(); + bool [[rn,class_prefix,min_sm=6.9]] IsHit(); + bool [[rn,class_prefix,min_sm=6.9]] IsNop(); + uint [[rn,class_prefix,min_sm=6.9]] GetRayFlags(); + float [[rn,class_prefix,min_sm=6.9]] GetRayTMin(); + float [[rn,class_prefix,min_sm=6.9]] GetRayTCurrent(); + float<3> [[rn,class_prefix,min_sm=6.9]] GetWorldRayOrigin(); + float<3> [[rn,class_prefix,min_sm=6.9]] GetWorldRayDirection(); + float<3> [[rn,class_prefix,min_sm=6.9]] GetObjectRayOrigin(); + float<3> [[rn,class_prefix,min_sm=6.9]] GetObjectRayDirection(); + float<3,4> [[rn,class_prefix,min_sm=6.9]] GetObjectToWorld3x4(); + float<4,3> [[rn,class_prefix,min_sm=6.9]] GetObjectToWorld4x3(); + float<3,4> [[rn,class_prefix,min_sm=6.9]] GetWorldToObject3x4(); + float<4,3> [[rn,class_prefix,min_sm=6.9]] GetWorldToObject4x3(); + uint [[rn,class_prefix,min_sm=6.9]] GetGeometryIndex(); + uint [[rn,class_prefix,min_sm=6.9]] GetInstanceIndex(); + uint [[rn,class_prefix,min_sm=6.9]] GetInstanceID(); + uint [[rn,class_prefix,min_sm=6.9]] GetPrimitiveIndex(); + uint [[rn,class_prefix,min_sm=6.9]] GetHitKind(); + uint [[rn,class_prefix,min_sm=6.9]] GetShaderTableIndex(); + $funcT [[class_prefix,min_sm=6.9]] GetAttributes(); + void [[class_prefix,min_sm=6.9]] SetShaderTableIndex(in uint RecordIndex); + uint [[ro,class_prefix,min_sm=6.9]] LoadLocalRootTableConstant(in uint RootConstantOffsetInBytes); } namespace namespace DxIntrinsics { diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 9b2f33727a..6344fb5849 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -9183,6 +9183,7 @@ def __init__(self, intrinsic_defs, opcode_data): "ThreadNodeOutputRecords": "LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS", "DxHitObject": "LICOMPTYPE_HIT_OBJECT", "VkBufferPointer": "LICOMPTYPE_VK_BUFFER_POINTER", + "RayQuery": "LICOMPTYPE_RAY_QUERY", } self.trans_rowcol = {"r": "IA_R", "c": "IA_C", "r2": "IA_R2", "c2": "IA_C2"} diff --git a/utils/hct/hlsl_intrinsic_opcodes.json b/utils/hct/hlsl_intrinsic_opcodes.json index c4527277cd..d99b84b745 100644 --- a/utils/hct/hlsl_intrinsic_opcodes.json +++ b/utils/hct/hlsl_intrinsic_opcodes.json @@ -1,6 +1,6 @@ { "IntrinsicOpCodes": { - "Num_Intrinsics": 363, + "Num_Intrinsics": 390, "IOP_AcceptHitAndEndSearch": 0, "IOP_AddUint64": 1, "IOP_AllMemoryBarrier": 2, @@ -363,6 +363,33 @@ "IOP_DxMaybeReorderThread": 359, "IOP_Vkreinterpret_pointer_cast": 360, "IOP_Vkstatic_pointer_cast": 361, - "MOP_GetBufferContents": 362 + "MOP_GetBufferContents": 362, + "MOP_DxHitObject_FromRayQuery": 363, + "MOP_DxHitObject_GetAttributes": 364, + "MOP_DxHitObject_GetGeometryIndex": 365, + "MOP_DxHitObject_GetHitKind": 366, + "MOP_DxHitObject_GetInstanceID": 367, + "MOP_DxHitObject_GetInstanceIndex": 368, + "MOP_DxHitObject_GetObjectRayDirection": 369, + "MOP_DxHitObject_GetObjectRayOrigin": 370, + "MOP_DxHitObject_GetObjectToWorld3x4": 371, + "MOP_DxHitObject_GetObjectToWorld4x3": 372, + "MOP_DxHitObject_GetPrimitiveIndex": 373, + "MOP_DxHitObject_GetRayFlags": 374, + "MOP_DxHitObject_GetRayTCurrent": 375, + "MOP_DxHitObject_GetRayTMin": 376, + "MOP_DxHitObject_GetShaderTableIndex": 377, + "MOP_DxHitObject_GetWorldRayDirection": 378, + "MOP_DxHitObject_GetWorldRayOrigin": 379, + "MOP_DxHitObject_GetWorldToObject3x4": 380, + "MOP_DxHitObject_GetWorldToObject4x3": 381, + "MOP_DxHitObject_Invoke": 382, + "MOP_DxHitObject_IsHit": 383, + "MOP_DxHitObject_IsMiss": 384, + "MOP_DxHitObject_IsNop": 385, + "MOP_DxHitObject_LoadLocalRootTableConstant": 386, + "MOP_DxHitObject_MakeMiss": 387, + "MOP_DxHitObject_SetShaderTableIndex": 388, + "MOP_DxHitObject_TraceRay": 389 } } From 5f18e2bac0833412ca07637a98d445a84f7d30e2 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Tue, 15 Apr 2025 15:43:10 -0700 Subject: [PATCH 85/88] Add HctGen of DXIL.rst back to build without LLVM_BUILD_DOCS required (#7346) HctGen of DXIL.rst should happen on every ordinary build, and be updated with other HctGen modified files. This isn't about building the doc, it's about updating it when definitions change in hctdb.py. We've been missing updates to DXIL.rst for quite a while due to this issue, introduced [here](https://github.com/microsoft/DirectXShaderCompiler/pull/6715/files#diff-1e7de1ae2d059d21e1dd75d5812d5a34b0222cef273b7c3a2af62eb747f9d20aR768-R770). This also brings DXIL.rst up to date. --- CMakeLists.txt | 4 +- docs/DXIL.rst | 698 +++++++++++++++++++++++++++---------------------- 2 files changed, 380 insertions(+), 322 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0977fa1246..5210718005 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -762,9 +762,7 @@ if (LLVM_INCLUDE_DOCS) add_subdirectory(docs) endif() -if (LLVM_BUILD_DOCS) - add_hlsl_hctgen(DxilDocs OUTPUT docs/DXIL.rst CODE_TAG) # HLSL Change -endif() +add_hlsl_hctgen(DxilDocs OUTPUT docs/DXIL.rst CODE_TAG) # HLSL Change add_subdirectory(cmake/modules) diff --git a/docs/DXIL.rst b/docs/DXIL.rst index a68e31d0a9..a1c5055085 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -1984,54 +1984,57 @@ The following LLVM instructions are valid in a DXIL program, with the specified .. hctdb_instrhelp.get_instrs_rst() .. INSTR-RST:BEGIN -============= ======================================================================= ================= -Instruction Action Operand overloads -============= ======================================================================= ================= -Ret returns a value (possibly void), from a function. vhfd1wil -Br branches (conditional or unconditional) -Switch performs a multiway switch -Add returns the sum of its two operands wil -FAdd returns the sum of its two operands hfd -Sub returns the difference of its two operands wil -FSub returns the difference of its two operands hfd -Mul returns the product of its two operands wil -FMul returns the product of its two operands hfd -UDiv returns the quotient of its two unsigned operands wil -SDiv returns the quotient of its two signed operands wil -FDiv returns the quotient of its two operands hfd -URem returns the remainder from the unsigned division of its two operands wil -SRem returns the remainder from the signed division of its two operands wil -FRem returns the remainder from the division of its two operands hfd -Shl shifts left (logical) wil -LShr shifts right (logical), with zero bit fill wil -AShr shifts right (arithmetic), with 'a' operand sign bit fill wil -And returns a bitwise logical and of its two operands 1wil -Or returns a bitwise logical or of its two operands 1wil -Xor returns a bitwise logical xor of its two operands 1wil -Alloca allocates memory on the stack frame of the currently executing function -Load reads from memory -Store writes to memory -GetElementPtr gets the address of a subelement of an aggregate value -AtomicCmpXchg atomically modifies memory -AtomicRMW atomically modifies memory -Trunc truncates an integer 1wil -ZExt zero extends an integer 1wil -SExt sign extends an integer 1wil -FPToUI converts a floating point to UInt hfd1wil -FPToSI converts a floating point to SInt hfd1wil -UIToFP converts a UInt to floating point hfd1wil -SIToFP converts a SInt to floating point hfd1wil -FPTrunc truncates a floating point hfd -FPExt extends a floating point hfd -BitCast performs a bit-preserving type cast hfd1wil -AddrSpaceCast casts a value addrspace -ICmp compares integers 1wil -FCmp compares floating points hfd -PHI is a PHI node instruction -Call calls a function -Select selects an instruction -ExtractValue extracts from aggregate -============= ======================================================================= ================= +============== ======================================================================= ================= +Instruction Action Operand overloads +============== ======================================================================= ================= +Ret returns a value (possibly void), from a function. vhfd1wil +Br branches (conditional or unconditional) +Switch performs a multiway switch +Add returns the sum of its two operands wil +FAdd returns the sum of its two operands hfd +Sub returns the difference of its two operands wil +FSub returns the difference of its two operands hfd +Mul returns the product of its two operands wil +FMul returns the product of its two operands hfd +UDiv returns the quotient of its two unsigned operands wil +SDiv returns the quotient of its two signed operands wil +FDiv returns the quotient of its two operands hfd +URem returns the remainder from the unsigned division of its two operands wil +SRem returns the remainder from the signed division of its two operands wil +FRem returns the remainder from the division of its two operands hfd +Shl shifts left (logical) wil +LShr shifts right (logical), with zero bit fill wil +AShr shifts right (arithmetic), with 'a' operand sign bit fill wil +And returns a bitwise logical and of its two operands 1wil +Or returns a bitwise logical or of its two operands 1wil +Xor returns a bitwise logical xor of its two operands 1wil +Alloca allocates memory on the stack frame of the currently executing function +Load reads from memory +Store writes to memory +GetElementPtr gets the address of a subelement of an aggregate value +AtomicCmpXchg atomically modifies memory +AtomicRMW atomically modifies memory +Trunc truncates an integer 1wil +ZExt zero extends an integer 1wil +SExt sign extends an integer 1wil +FPToUI converts a floating point to UInt hfd1wil +FPToSI converts a floating point to SInt hfd1wil +UIToFP converts a UInt to floating point hfd1wil +SIToFP converts a SInt to floating point hfd1wil +FPTrunc truncates a floating point hfd +FPExt extends a floating point hfd +BitCast performs a bit-preserving type cast hfd1wil +AddrSpaceCast casts a value addrspace +ICmp compares integers 1wil +FCmp compares floating points hfd +PHI is a PHI node instruction +Call calls a function +Select selects an instruction +ExtractElement extracts from vector +InsertElement inserts into vector +ShuffleVector Shuffle two vectors +ExtractValue extracts from aggregate +============== ======================================================================= ================= FAdd @@ -2369,6 +2372,53 @@ ID Name Description 255 SampleCmpBias samples a texture after applying the input bias to the mipmap level and compares a single component against the specified comparison value 256 StartVertexLocation returns the BaseVertexLocation from DrawIndexedInstanced or StartVertexLocation from DrawInstanced 257 StartInstanceLocation returns the StartInstanceLocation from Draw*Instanced +258 AllocateRayQuery2 allocates space for RayQuery and return handle +259 ReservedA0 reserved +260 ReservedA1 reserved +261 ReservedA2 reserved +262 HitObject_TraceRay Analogous to TraceRay but without invoking CH/MS and returns the intermediate state as a HitObject +263 HitObject_FromRayQuery Creates a new HitObject representing a committed hit from a RayQuery +264 HitObject_FromRayQueryWithAttrs Creates a new HitObject representing a committed hit from a RayQuery and committed attributes +265 HitObject_MakeMiss Creates a new HitObject representing a miss +266 HitObject_MakeNop Creates an empty nop HitObject +267 HitObject_Invoke Represents the invocation of the CH/MS shader represented by the HitObject +268 MaybeReorderThread Reorders the current thread +269 HitObject_IsMiss Returns `true` if the HitObject represents a miss +270 HitObject_IsHit Returns `true` if the HitObject is a NOP-HitObject +271 HitObject_IsNop Returns `true` if the HitObject represents a nop +272 HitObject_RayFlags Returns the ray flags set in the HitObject +273 HitObject_RayTMin Returns the TMin value set in the HitObject +274 HitObject_RayTCurrent Returns the current T value set in the HitObject +275 HitObject_WorldRayOrigin Returns the ray origin in world space +276 HitObject_WorldRayDirection Returns the ray direction in world space +277 HitObject_ObjectRayOrigin Returns the ray origin in object space +278 HitObject_ObjectRayDirection Returns the ray direction in object space +279 HitObject_ObjectToWorld3x4 Returns the object to world space transformation matrix in 3x4 form +280 HitObject_WorldToObject3x4 Returns the world to object space transformation matrix in 3x4 form +281 HitObject_GeometryIndex Returns the geometry index committed on hit +282 HitObject_InstanceIndex Returns the instance index committed on hit +283 HitObject_InstanceID Returns the instance id committed on hit +284 HitObject_PrimitiveIndex Returns the primitive index committed on hit +285 HitObject_HitKind Returns the HitKind of the hit +286 HitObject_ShaderTableIndex Returns the shader table index set for this HitObject +287 HitObject_SetShaderTableIndex Returns a HitObject with updated shader table index +288 HitObject_LoadLocalRootTableConstant Returns the root table constant for this HitObject and offset +289 HitObject_Attributes Returns the attributes set for this HitObject +290 ReservedB28 reserved +291 ReservedB29 reserved +292 ReservedB30 reserved +293 ReservedC0 reserved +294 ReservedC1 reserved +295 ReservedC2 reserved +296 ReservedC3 reserved +297 ReservedC4 reserved +298 ReservedC5 reserved +299 ReservedC6 reserved +300 ReservedC7 reserved +301 ReservedC8 reserved +302 ReservedC9 reserved +303 RawBufferVectorLoad reads from a raw buffer and structured buffer +304 RawBufferVectorStore writes to a RWByteAddressBuffer or RWStructuredBuffer === ===================================================== ======================================================================================================================================================================================================================= @@ -3015,277 +3065,287 @@ The set of validation rules that are known to hold for a DXIL program is identif .. hctdb_instrhelp.get_valrules_rst() .. VALRULES-RST:BEGIN -========================================= ======================================================================================================================================================================================================================================================================================================== -Rule Code Description -========================================= ======================================================================================================================================================================================================================================================================================================== -BITCODE.VALID Module must be bitcode-valid -CONTAINER.PARTINVALID DXIL Container must not contain unknown parts -CONTAINER.PARTMATCHES DXIL Container Parts must match Module -CONTAINER.PARTMISSING DXIL Container requires certain parts, corresponding to module -CONTAINER.PARTREPEATED DXIL Container must have only one of each part type -CONTAINER.ROOTSIGNATUREINCOMPATIBLE Root Signature in DXIL Container must be compatible with shader -DECL.ATTRSTRUCT Attributes parameter must be struct type -DECL.DXILFNEXTERN External function must be a DXIL function -DECL.DXILNSRESERVED The DXIL reserved prefixes must only be used by built-in functions and types -DECL.EXTRAARGS Extra arguments not allowed for shader functions -DECL.FNATTRIBUTE Functions should only contain known function attributes -DECL.FNFLATTENPARAM Function parameters must not use struct types -DECL.FNISCALLED Functions can only be used by call instructions -DECL.MULTIPLENODEINPUTS A node shader may not have more than one input record -DECL.NODELAUNCHINPUTTYPE Invalid input record type for node launch type -DECL.NOTUSEDEXTERNAL External declaration should not be used -DECL.PARAMSTRUCT Callable function parameter must be struct type -DECL.PAYLOADSTRUCT Payload parameter must be struct type -DECL.RAYQUERYINFNSIG Rayquery objects not allowed in function signatures -DECL.RESOURCEINFNSIG Resources not allowed in function signatures -DECL.SHADERMISSINGARG payload/params/attributes parameter is required for certain shader types -DECL.SHADERRETURNVOID Shader functions must return void -DECL.USEDEXTERNALFUNCTION External function must be used -DECL.USEDINTERNAL Internal declaration must be used -FLOW.DEADLOOP Loop must have break. -FLOW.FUNCTIONCALL Function with parameter is not permitted -FLOW.NORECURSION Recursion is not permitted. -FLOW.REDUCIBLE Execution flow must be reducible. -INSTR.ALLOWED Instructions must be of an allowed type. -INSTR.ATOMICCONST Constant destination to atomic. -INSTR.ATOMICINTRINNONUAV Non-UAV destination to atomic intrinsic. -INSTR.ATOMICOPNONGROUPSHAREDORRECORD Non-groupshared or node record destination to atomic operation. -INSTR.ATTRIBUTEATVERTEXNOINTERPOLATION Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function. -INSTR.BARRIERFLAGINVALID Invalid %0 flags on DXIL operation '%1' -INSTR.BARRIERMODEFORNONCS sync in a non-Compute/Amplification/Mesh/Node Shader must only sync UAV (sync_uglobal). -INSTR.BARRIERMODENOMEMORY sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory). Only _t (thread group sync) is optional. -INSTR.BARRIERMODEUSELESSUGROUP sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal. -INSTR.BARRIERNONCONSTANTFLAGARGUMENT Memory type, access, or sync flag is not constant -INSTR.BARRIERREQUIRESNODE sync in a non-Node Shader must not sync node record memory. -INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER BufferUpdateCounter valid only when HasCounter is true. -INSTR.BUFFERUPDATECOUNTERONUAV BufferUpdateCounter valid only on UAV. -INSTR.CALLOLOAD Call to DXIL intrinsic must match overload signature -INSTR.CANNOTPULLPOSITION pull-model evaluation of position disallowed -INSTR.CBUFFERCLASSFORCBUFFERHANDLE Expect Cbuffer for CBufferLoad handle. -INSTR.CBUFFEROUTOFBOUND Cbuffer access out of bound. -INSTR.CHECKACCESSFULLYMAPPED CheckAccessFullyMapped should only be used on resource status. -INSTR.COORDINATECOUNTFORRAWTYPEDBUF raw/typed buffer don't need 2 coordinates. -INSTR.COORDINATECOUNTFORSTRUCTBUF structured buffer require 2 coordinates. -INSTR.CREATEHANDLEIMMRANGEID Local resource must map to global resource. -INSTR.DXILSTRUCTUSER Dxil struct types should only be used by ExtractValue. -INSTR.DXILSTRUCTUSEROUTOFBOUND Index out of bound when extract value from dxil struct types. -INSTR.EVALINTERPOLATIONMODE Interpolation mode on %0 used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample. -INSTR.EXTRACTVALUE ExtractValue should only be used on dxil struct types and cmpxchg. -INSTR.FAILTORESLOVETGSMPOINTER TGSM pointers must originate from an unambiguous TGSM global variable. -INSTR.HANDLENOTFROMCREATEHANDLE Resource handle should returned by createHandle. -INSTR.ILLEGALDXILOPCODE DXILOpCode must be [0..%0]. %1 specified. -INSTR.ILLEGALDXILOPFUNCTION '%0' is not a DXILOpFuncition for DXILOpcode '%1'. -INSTR.IMMBIASFORSAMPLEB bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate. -INSTR.INBOUNDSACCESS Access to out-of-bounds memory is disallowed. -INSTR.MINPRECISIONNOTPRECISE Instructions marked precise may not refer to minprecision values. -INSTR.MINPRECISONBITCAST Bitcast on minprecison types is not allowed. -INSTR.MIPLEVELFORGETDIMENSION Use mip level on buffer when GetDimensions. -INSTR.MIPONUAVLOAD uav load don't support mipLevel/sampleIndex. -INSTR.MISSINGSETMESHOUTPUTCOUNTS Missing SetMeshOutputCounts call. -INSTR.MULTIPLEGETMESHPAYLOAD GetMeshPayload cannot be called multiple times. -INSTR.MULTIPLESETMESHOUTPUTCOUNTS SetMeshOUtputCounts cannot be called multiple times. -INSTR.NODERECORDHANDLEUSEAFTERCOMPLETE Invalid use of completed record handle. -INSTR.NOGENERICPTRADDRSPACECAST Address space cast between pointer types must have one part to be generic address space. -INSTR.NOIDIVBYZERO No signed integer division by zero. -INSTR.NOINDEFINITEACOS No indefinite arccosine. -INSTR.NOINDEFINITEASIN No indefinite arcsine. -INSTR.NOINDEFINITEDSXY No indefinite derivative calculation. -INSTR.NOINDEFINITELOG No indefinite logarithm. -INSTR.NONDOMINATINGDISPATCHMESH Non-Dominating DispatchMesh call. -INSTR.NONDOMINATINGSETMESHOUTPUTCOUNTS Non-Dominating SetMeshOutputCounts call. -INSTR.NOREADINGUNINITIALIZED Instructions should not read uninitialized value. -INSTR.NOTONCEDISPATCHMESH DispatchMesh must be called exactly once in an Amplification shader. -INSTR.NOUDIVBYZERO No unsigned integer division by zero. -INSTR.OFFSETONUAVLOAD uav load don't support offset. -INSTR.OLOAD DXIL intrinsic overload must be valid. -INSTR.ONLYONEALLOCCONSUME RWStructuredBuffers may increment or decrement their counters, but not both. -INSTR.OPCODERESERVED Instructions must not reference reserved opcodes. -INSTR.OPCONST DXIL intrinsic requires an immediate constant operand -INSTR.OPCONSTRANGE Constant values must be in-range for operation. -INSTR.OPERANDRANGE DXIL intrinsic operand must be within defined range -INSTR.PTRBITCAST Pointer type bitcast must be have same size. -INSTR.RESOURCECLASSFORLOAD load can only run on UAV/SRV resource. -INSTR.RESOURCECLASSFORSAMPLERGATHER sample, lod and gather should be on srv resource. -INSTR.RESOURCECLASSFORUAVSTORE store should be on uav resource. -INSTR.RESOURCECOORDINATEMISS coord uninitialized. -INSTR.RESOURCECOORDINATETOOMANY out of bound coord must be undef. -INSTR.RESOURCEKINDFORBUFFERLOADSTORE buffer load/store only works on Raw/Typed/StructuredBuffer. -INSTR.RESOURCEKINDFORCALCLOD lod requires resource declared as texture1D/2D/3D/Cube/CubeArray/1DArray/2DArray. -INSTR.RESOURCEKINDFORGATHER gather requires resource declared as texture/2D/Cube/2DArray/CubeArray. -INSTR.RESOURCEKINDFORGETDIM Invalid resource kind on GetDimensions. -INSTR.RESOURCEKINDFORSAMPLE sample/_l/_d requires resource declared as texture1D/2D/3D/Cube/1DArray/2DArray/CubeArray. -INSTR.RESOURCEKINDFORSAMPLEC samplec requires resource declared as texture1D/2D/Cube/1DArray/2DArray/CubeArray. -INSTR.RESOURCEKINDFORTEXTURELOAD texture load only works on Texture1D/1DArray/2D/2DArray/3D/MS2D/MS2DArray. -INSTR.RESOURCEKINDFORTEXTURESTORE texture store only works on Texture1D/1DArray/2D/2DArray/3D. -INSTR.RESOURCEKINDFORTRACERAY TraceRay should only use RTAccelerationStructure. -INSTR.RESOURCEMAPTOSINGLEENTRY Fail to map resource to resource table. -INSTR.RESOURCEOFFSETMISS offset uninitialized. -INSTR.RESOURCEOFFSETTOOMANY out of bound offset must be undef. -INSTR.RESOURCEUSER Resource should only be used by Load/GEP/Call. -INSTR.SAMPLECOMPTYPE sample_* instructions require resource to be declared to return UNORM, SNORM or FLOAT. -INSTR.SAMPLEINDEXFORLOAD2DMS load on Texture2DMS/2DMSArray require sampleIndex. -INSTR.SAMPLERMODEFORLOD lod instruction requires sampler declared in default mode. -INSTR.SAMPLERMODEFORSAMPLE sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode. -INSTR.SAMPLERMODEFORSAMPLEC sample_c_*/gather_c instructions require sampler declared in comparison mode. -INSTR.SIGNATUREOPERATIONNOTINENTRY Dxil operation for input output signature must be in entryPoints. -INSTR.STATUS Resource status should only be used by CheckAccessFullyMapped. -INSTR.STRUCTBITCAST Bitcast on struct types is not allowed. -INSTR.SVCONFLICTINGLAUNCHMODE Input system values are compatible with node shader launch mode. -INSTR.TEXTUREOFFSET offset texture instructions must take offset which can resolve to integer literal in the range -8 to 7. -INSTR.TGSMRACECOND Race condition writing to shared memory detected, consider making this write conditional. -INSTR.UNDEFINEDVALUEFORUAVSTORE Assignment of undefined values to UAV. -INSTR.UNDEFRESULTFORGETDIMENSION GetDimensions used undef dimension %0 on %1. -INSTR.WRITEMASKFORTYPEDUAVSTORE store on typed uav must write to all four components of the UAV. -INSTR.WRITEMASKGAPFORUAV UAV write mask must be contiguous, starting at x: .x, .xy, .xyz, or .xyzw. -INSTR.WRITEMASKMATCHVALUEFORUAVSTORE uav store write mask must match store value mask, write mask is %0 and store value mask is %1. -META.BARYCENTRICSFLOAT3 only 'float3' type is allowed for SV_Barycentrics. -META.BARYCENTRICSINTERPOLATION SV_Barycentrics cannot be used with 'nointerpolation' type. -META.BARYCENTRICSTWOPERSPECTIVES There can only be up to two input attributes of SV_Barycentrics with different perspective interpolation mode. -META.BRANCHFLATTEN Can't use branch and flatten attributes together. -META.CLIPCULLMAXCOMPONENTS Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components -META.CLIPCULLMAXROWS Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows. -META.COMPUTEWITHNODE Compute entry must not have node metadata -META.CONTROLFLOWHINTNOTONCONTROLFLOW Control flow hint only works on control flow inst. -META.DENSERESIDS Resource identifiers must be zero-based and dense. -META.DUPLICATESYSVALUE System value may only appear once in signature -META.ENTRYFUNCTION entrypoint not found. -META.FLAGSUSAGE Flags must match usage. -META.FORCECASEONSWITCH Attribute forcecase only works for switch. -META.GLCNOTONAPPENDCONSUME globallycoherent cannot be used with append/consume buffers: '%0'. -META.INTEGERINTERPMODE Interpolation mode on integer must be Constant -META.INTERPMODEINONEROW Interpolation mode must be identical for all elements packed into the same row. -META.INTERPMODEVALID Interpolation mode must be valid -META.INVALIDCONTROLFLOWHINT Invalid control flow hint. -META.KNOWN Named metadata should be known -META.MAXTESSFACTOR Hull Shader MaxTessFactor must be [%0..%1]. %2 specified. -META.NOENTRYPROPSFORENTRY Entry point %0 must have entry properties. -META.NOSEMANTICOVERLAP Semantics must not overlap -META.REQUIRED Required metadata missing. -META.SEMAKINDMATCHESNAME Semantic name must match system value, when defined. -META.SEMAKINDVALID Semantic kind must be valid -META.SEMANTICCOMPTYPE %0 must be %1. -META.SEMANTICINDEXMAX System value semantics have a maximum valid semantic index -META.SEMANTICLEN Semantic length must be at least 1 and at most 64. -META.SEMANTICSHOULDBEALLOCATED Semantic should have a valid packing location -META.SEMANTICSHOULDNOTBEALLOCATED Semantic should have a packing location of -1 -META.SIGNATURECOMPTYPE signature %0 specifies unrecognized or invalid component type. -META.SIGNATUREDATAWIDTH Data width must be identical for all elements packed into the same row. -META.SIGNATUREILLEGALCOMPONENTORDER Component ordering for packed elements must be: arbitrary < system value < system generated value -META.SIGNATUREINDEXCONFLICT Only elements with compatible indexing rules may be packed together -META.SIGNATUREOUTOFRANGE Signature elements must fit within maximum signature size -META.SIGNATUREOVERLAP Signature elements may not overlap in packing location. -META.STRUCTBUFALIGNMENT StructuredBuffer stride not aligned -META.STRUCTBUFALIGNMENTOUTOFBOUND StructuredBuffer stride out of bounds -META.SYSTEMVALUEROWS System value may only have 1 row -META.TARGET Target triple must be 'dxil-ms-dx' -META.TESSELLATOROUTPUTPRIMITIVE Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW. -META.TESSELLATORPARTITION Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even. -META.TEXTURETYPE elements of typed buffers and textures must fit in four 32-bit quantities. -META.USED All metadata must be used by dxil. -META.VALIDSAMPLERMODE Invalid sampler mode on sampler . -META.VALUERANGE Metadata value must be within range. -META.VERSIONSUPPORTED Version in metadata must be supported. -META.WELLFORMED Metadata must be well-formed in operand count and types. -SM.64BITRAWBUFFERLOADSTORE i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3. -SM.AMPLIFICATIONSHADERPAYLOADSIZE For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. -SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. -SM.APPENDANDCONSUMEONSAMEUAV BufferUpdateCounter inc and dec on a given UAV (%d) cannot both be in the same shader for shader model less than 5.1. -SM.CBUFFERARRAYOFFSETALIGNMENT CBuffer array offset must be aligned to 16-bytes -SM.CBUFFERELEMENTOVERFLOW CBuffer elements must not overflow -SM.CBUFFEROFFSETOVERLAP CBuffer offsets must not overlap -SM.CBUFFERSIZE CBuffer size must not exceed 65536 bytes -SM.CBUFFERTEMPLATETYPEMUSTBESTRUCT D3D12 constant/texture buffer template element can only be a struct. -SM.COMPLETEPOSITION Not all elements of SV_Position were written. -SM.CONSTANTINTERPMODE Interpolation mode must be constant for MS primitive output. -SM.COUNTERONLYONSTRUCTBUF BufferUpdateCounter valid only on structured buffers. -SM.CSNOSIGNATURES Compute shaders must not have shader signatures. -SM.DOMAINLOCATIONIDXOOB DomainLocation component index out of bounds for the domain. -SM.DSINPUTCONTROLPOINTCOUNTRANGE DS input control point count must be [0..%0]. %1 specified. -SM.DXILVERSION Target shader model requires specific Dxil Version -SM.GSINSTANCECOUNTRANGE GS instance count must be [1..%0]. %1 specified. -SM.GSOUTPUTVERTEXCOUNTRANGE GS output vertex count must be [0..%0]. %1 specified. -SM.GSTOTALOUTPUTVERTEXDATARANGE Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2. This value cannot be greater than %3. -SM.GSVALIDINPUTPRIMITIVE GS input primitive unrecognized. -SM.GSVALIDOUTPUTPRIMITIVETOPOLOGY GS output primitive topology unrecognized. -SM.HSINPUTCONTROLPOINTCOUNTRANGE HS input control point count must be [0..%0]. %1 specified. -SM.HULLPASSTHRUCONTROLPOINTCOUNTMATCH For pass thru hull shader, input control point count must match output control point count -SM.INCOMPATIBLECALLINENTRY Features used in internal function calls must be compatible with entry -SM.INCOMPATIBLEDERIVINCOMPUTESHADERMODEL Derivatives in compute-model shaders require shader model 6.6 and above -SM.INCOMPATIBLEDERIVLAUNCH Node shaders only support derivatives in broadcasting launch mode -SM.INCOMPATIBLEOPERATION Operations used in entry function must be compatible with shader stage and other properties -SM.INCOMPATIBLEREQUIRESGROUP Functions requiring groupshared memory must be called from shaders with a visible group -SM.INCOMPATIBLESHADERMODEL Functions may only use features available in the current shader model -SM.INCOMPATIBLESTAGE Functions may only use features available in the entry function's stage -SM.INCOMPATIBLETHREADGROUPDIM When derivatives are used in compute-model shaders, the thread group dimensions must be compatible -SM.INSIDETESSFACTORSIZEMATCHDOMAIN InsideTessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. -SM.INVALIDRESOURCECOMPTYPE Invalid resource return type. -SM.INVALIDRESOURCEKIND Invalid resources kind. -SM.INVALIDSAMPLERFEEDBACKTYPE Invalid sampler feedback type. -SM.INVALIDTEXTUREKINDONUAV TextureCube[Array] resources are not supported with UAVs. -SM.ISOLINEOUTPUTPRIMITIVEMISMATCH Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain. -SM.MAXMSSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. -SM.MAXTGSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. -SM.MAXTHEADGROUP Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1. -SM.MESHPSIGROWCOUNT For shader '%0', primitive output signatures are taking up more than %1 rows. -SM.MESHSHADERINOUTSIZE For shader '%0', payload plus output size is greater than %1. -SM.MESHSHADERMAXPRIMITIVECOUNT MS max primitive output count must be [0..%0]. %1 specified. -SM.MESHSHADERMAXVERTEXCOUNT MS max vertex output count must be [0..%0]. %1 specified. -SM.MESHSHADEROUTPUTSIZE For shader '%0', vertex plus primitive output size is greater than %1. -SM.MESHSHADERPAYLOADSIZE For mesh shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. -SM.MESHSHADERPAYLOADSIZEDECLARED For mesh shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. -SM.MESHTOTALSIGROWCOUNT For shader '%0', vertex and primitive output signatures are taking up more than %1 rows. -SM.MESHVSIGROWCOUNT For shader '%0', vertex output signatures are taking up more than %1 rows. -SM.MULTISTREAMMUSTBEPOINT When multiple GS output streams are used they must be pointlists -SM.NAME Target shader model name must be known -SM.NOINTERPMODE Interpolation mode must be undefined for VS input/PS output/patch constant. -SM.NOPSOUTPUTIDX Pixel shader output registers are not indexable. -SM.OPCODE Opcode must be defined in target shader model -SM.OPCODEININVALIDFUNCTION Invalid DXIL opcode usage like StorePatchConstant in patch constant function -SM.OPERAND Operand must be defined in target shader model. -SM.OUTPUTCONTROLPOINTCOUNTRANGE output control point count must be [%0..%1]. %2 specified. -SM.OUTPUTCONTROLPOINTSTOTALSCALARS Total number of scalars across all HS output control points must not exceed . -SM.PATCHCONSTANTONLYFORHSDS patch constant signature only valid in HS and DS. -SM.PSCONSISTENTINTERP Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample). -SM.PSCOVERAGEANDINNERCOVERAGE InnerCoverage and Coverage are mutually exclusive. -SM.PSMULTIPLEDEPTHSEMANTIC Pixel Shader only allows one type of depth semantic to be declared. -SM.PSOUTPUTSEMANTIC Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found. -SM.PSTARGETCOL0 SV_Target packed location must start at column 0. -SM.PSTARGETINDEXMATCHESROW SV_Target semantic index must match packed row location. -SM.RAYSHADERPAYLOADSIZE For shader '%0', %1 size is smaller than argument's allocation size. -SM.RAYSHADERSIGNATURES Ray tracing shader '%0' should not have any shader signatures. -SM.RESOURCERANGEOVERLAP Resource ranges must not overlap -SM.ROVONLYINPS RasterizerOrdered objects are only allowed in 5.0+ pixel shaders. -SM.SAMPLECOUNTONLYON2DMS Only Texture2DMS/2DMSArray could has sample count. -SM.SEMANTIC Semantic must be defined in target shader model -SM.STREAMINDEXRANGE Stream index (%0) must between 0 and %1. -SM.TESSFACTORFORDOMAIN Required TessFactor for domain not found declared anywhere in Patch Constant data. -SM.TESSFACTORSIZEMATCHDOMAIN TessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. -SM.TGSMUNSUPPORTED Thread Group Shared Memory not supported %0. -SM.THREADGROUPCHANNELRANGE Declared Thread Group %0 size %1 outside valid range [%2..%3]. -SM.TRIOUTPUTPRIMITIVEMISMATCH Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain. -SM.UNDEFINEDOUTPUT Not all elements of output %0 were written. -SM.VALIDDOMAIN Invalid Tessellator Domain specified. Must be isoline, tri or quad. -SM.VIEWIDNEEDSSLOT ViewID requires compatible space in pixel shader input signature -SM.WAVESIZEALLZEROWHENUNDEFINED WaveSize Max and Preferred must be 0 when Min is 0 -SM.WAVESIZEEXPECTSONEPARAM WaveSize tag expects exactly 1 parameter. -SM.WAVESIZEMAXANDPREFERREDZEROWHENNORANGE WaveSize Max and Preferred must be 0 to encode min==max -SM.WAVESIZEMAXGREATERTHANMIN WaveSize Max must greater than Min -SM.WAVESIZENEEDSCONSTANTOPERANDS WaveSize metadata operands must be constant values. -SM.WAVESIZENEEDSSM66OR67 WaveSize is valid only for Shader Model 6.6 and 6.7. -SM.WAVESIZEONCOMPUTEORNODE WaveSize only allowed on compute or node shaders -SM.WAVESIZEPREFERREDINRANGE WaveSize Preferred must be within Min..Max range -SM.WAVESIZERANGEEXPECTSTHREEPARAMS WaveSize Range tag expects exactly 3 parameters. -SM.WAVESIZERANGENEEDSSM68PLUS WaveSize Range is valid only for Shader Model 6.8 and higher. -SM.WAVESIZETAGDUPLICATE WaveSize or WaveSizeRange tag may only appear once per entry point. -SM.WAVESIZEVALUE WaveSize value must be a power of 2 in range [4..128] -SM.ZEROHSINPUTCONTROLPOINTWITHINPUT When HS input control point count is 0, no input signature should exist. -TYPES.DEFINED Type must be defined based on DXIL primitives -TYPES.I8 I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics. -TYPES.INTWIDTH Int type must be of valid width -TYPES.NOMULTIDIM Only one dimension allowed for array type. -TYPES.NOPTRTOPTR Pointers to pointers, or pointers in structures are not allowed. -TYPES.NOVECTOR Vector types must not be present -========================================= ======================================================================================================================================================================================================================================================================================================== +===================================================== ======================================================================================================================================================================================================================================================================================================== +Rule Code Description +===================================================== ======================================================================================================================================================================================================================================================================================================== +BITCODE.VALID Module must be bitcode-valid +CONTAINER.CONTENTINVALID DXIL Container Content is well-formed +CONTAINER.CONTENTMATCHES DXIL Container Content must match Module +CONTAINER.PARTINVALID DXIL Container must not contain unknown parts +CONTAINER.PARTMATCHES DXIL Container Parts must match Module +CONTAINER.PARTMISSING DXIL Container requires certain parts, corresponding to module +CONTAINER.PARTREPEATED DXIL Container must have only one of each part type +CONTAINER.ROOTSIGNATUREINCOMPATIBLE Root Signature in DXIL Container must be compatible with shader +CONTAINER.UNUSEDITEMINTABLE Items in Table must be used +DECL.ALLOCATERAYQUERY2FLAGSARECONST constRayFlags and RayQueryFlags for AllocateRayQuery2 must be constant +DECL.ALLOCATERAYQUERYFLAGSARECONST RayFlags for AllocateRayQuery must be constant +DECL.ALLOWOPACITYMICROMAPSEXPECTEDGIVENFORCEOMM2STATE When the ForceOMM2State ConstRayFlag is given as an argument to a RayQuery object, AllowOpacityMicromaps is expected as a RayQueryFlag argument +DECL.ATTRSTRUCT Attributes parameter must be struct type +DECL.DXILFNEXTERN External function must be a DXIL function +DECL.DXILNSRESERVED The DXIL reserved prefixes must only be used by built-in functions and types +DECL.EXTRAARGS Extra arguments not allowed for shader functions +DECL.FNATTRIBUTE Functions should only contain known function attributes +DECL.FNFLATTENPARAM Function parameters must not use struct types +DECL.FNISCALLED Functions can only be used by call instructions +DECL.MULTIPLENODEINPUTS A node shader may not have more than one input record +DECL.NODELAUNCHINPUTTYPE Invalid input record type for node launch type +DECL.NOTUSEDEXTERNAL External declaration should not be used +DECL.PARAMSTRUCT Callable function parameter must be struct type +DECL.PAYLOADSTRUCT Payload parameter must be struct type +DECL.RAYQUERYINFNSIG Rayquery objects not allowed in function signatures +DECL.RESOURCEINFNSIG Resources not allowed in function signatures +DECL.SHADERMISSINGARG payload/params/attributes parameter is required for certain shader types +DECL.SHADERRETURNVOID Shader functions must return void +DECL.USEDEXTERNALFUNCTION External function must be used +DECL.USEDINTERNAL Internal declaration must be used +FLOW.DEADLOOP Loop must have break. +FLOW.FUNCTIONCALL Function with parameter is not permitted +FLOW.NORECURSION Recursion is not permitted. +FLOW.REDUCIBLE Execution flow must be reducible. +INSTR.ALLOWED Instructions must be of an allowed type. +INSTR.ATOMICCONST Constant destination to atomic. +INSTR.ATOMICINTRINNONUAV Non-UAV destination to atomic intrinsic. +INSTR.ATOMICOPNONGROUPSHAREDORRECORD Non-groupshared or node record destination to atomic operation. +INSTR.ATTRIBUTEATVERTEXNOINTERPOLATION Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function. +INSTR.BARRIERFLAGINVALID Invalid %0 flags on DXIL operation '%1' +INSTR.BARRIERMODEFORNONCS sync in a non-Compute/Amplification/Mesh/Node Shader must only sync UAV (sync_uglobal). +INSTR.BARRIERMODENOMEMORY sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory). Only _t (thread group sync) is optional. +INSTR.BARRIERMODEUSELESSUGROUP sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal. +INSTR.BARRIERNONCONSTANTFLAGARGUMENT Memory type, access, or sync flag is not constant +INSTR.BARRIERREQUIRESNODE sync in a non-Node Shader must not sync node record memory. +INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER BufferUpdateCounter valid only when HasCounter is true. +INSTR.BUFFERUPDATECOUNTERONUAV BufferUpdateCounter valid only on UAV. +INSTR.CALLOLOAD Call to DXIL intrinsic must match overload signature +INSTR.CANNOTPULLPOSITION pull-model evaluation of position disallowed +INSTR.CBUFFERCLASSFORCBUFFERHANDLE Expect Cbuffer for CBufferLoad handle. +INSTR.CBUFFEROUTOFBOUND Cbuffer access out of bound. +INSTR.CHECKACCESSFULLYMAPPED CheckAccessFullyMapped should only be used on resource status. +INSTR.CONSTALIGNFORRAWBUF Raw Buffer alignment value must be a constant. +INSTR.COORDINATECOUNTFORRAWTYPEDBUF raw/typed buffer offset must be undef. +INSTR.COORDINATECOUNTFORSTRUCTBUF structured buffer requires defined index and offset coordinates. +INSTR.CREATEHANDLEIMMRANGEID Local resource must map to global resource. +INSTR.DXILSTRUCTUSER Dxil struct types should only be used by ExtractValue. +INSTR.DXILSTRUCTUSEROUTOFBOUND Index out of bound when extract value from dxil struct types. +INSTR.EVALINTERPOLATIONMODE Interpolation mode on %0 used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample. +INSTR.EXTRACTVALUE ExtractValue should only be used on dxil struct types and cmpxchg. +INSTR.FAILTORESLOVETGSMPOINTER TGSM pointers must originate from an unambiguous TGSM global variable. +INSTR.HANDLENOTFROMCREATEHANDLE Resource handle should returned by createHandle. +INSTR.ILLEGALDXILOPCODE DXILOpCode must be [0..%0]. %1 specified. +INSTR.ILLEGALDXILOPFUNCTION '%0' is not a DXILOpFuncition for DXILOpcode '%1'. +INSTR.IMMBIASFORSAMPLEB bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate. +INSTR.INBOUNDSACCESS Access to out-of-bounds memory is disallowed. +INSTR.MAYREORDERTHREADUNDEFCOHERENCEHINTPARAM Use of undef coherence hint or num coherence hint bits in MaybeReorderThread. +INSTR.MINPRECISIONNOTPRECISE Instructions marked precise may not refer to minprecision values. +INSTR.MINPRECISONBITCAST Bitcast on minprecison types is not allowed. +INSTR.MIPLEVELFORGETDIMENSION Use mip level on buffer when GetDimensions. +INSTR.MIPONUAVLOAD uav load don't support mipLevel/sampleIndex. +INSTR.MISSINGSETMESHOUTPUTCOUNTS Missing SetMeshOutputCounts call. +INSTR.MULTIPLEGETMESHPAYLOAD GetMeshPayload cannot be called multiple times. +INSTR.MULTIPLESETMESHOUTPUTCOUNTS SetMeshOUtputCounts cannot be called multiple times. +INSTR.NODERECORDHANDLEUSEAFTERCOMPLETE Invalid use of completed record handle. +INSTR.NOGENERICPTRADDRSPACECAST Address space cast between pointer types must have one part to be generic address space. +INSTR.NOIDIVBYZERO No signed integer division by zero. +INSTR.NOINDEFINITEACOS No indefinite arccosine. +INSTR.NOINDEFINITEASIN No indefinite arcsine. +INSTR.NOINDEFINITEDSXY No indefinite derivative calculation. +INSTR.NOINDEFINITELOG No indefinite logarithm. +INSTR.NONDOMINATINGDISPATCHMESH Non-Dominating DispatchMesh call. +INSTR.NONDOMINATINGSETMESHOUTPUTCOUNTS Non-Dominating SetMeshOutputCounts call. +INSTR.NOREADINGUNINITIALIZED Instructions should not read uninitialized value. +INSTR.NOTONCEDISPATCHMESH DispatchMesh must be called exactly once in an Amplification shader. +INSTR.NOUDIVBYZERO No unsigned integer division by zero. +INSTR.OFFSETONUAVLOAD uav load don't support offset. +INSTR.OLOAD DXIL intrinsic overload must be valid. +INSTR.ONLYONEALLOCCONSUME RWStructuredBuffers may increment or decrement their counters, but not both. +INSTR.OPCODERESERVED Instructions must not reference reserved opcodes. +INSTR.OPCONST DXIL intrinsic requires an immediate constant operand +INSTR.OPCONSTRANGE Constant values must be in-range for operation. +INSTR.OPERANDRANGE DXIL intrinsic operand must be within defined range +INSTR.PTRBITCAST Pointer type bitcast must be have same size. +INSTR.RESOURCECLASSFORLOAD load can only run on UAV/SRV resource. +INSTR.RESOURCECLASSFORSAMPLERGATHER sample, lod and gather should be on srv resource. +INSTR.RESOURCECLASSFORUAVSTORE store should be on uav resource. +INSTR.RESOURCECOORDINATEMISS coord uninitialized. +INSTR.RESOURCECOORDINATETOOMANY out of bound coord must be undef. +INSTR.RESOURCEKINDFORBUFFERLOADSTORE buffer load/store only works on Raw/Typed/StructuredBuffer. +INSTR.RESOURCEKINDFORCALCLOD lod requires resource declared as texture1D/2D/3D/Cube/CubeArray/1DArray/2DArray. +INSTR.RESOURCEKINDFORGATHER gather requires resource declared as texture/2D/Cube/2DArray/CubeArray. +INSTR.RESOURCEKINDFORGETDIM Invalid resource kind on GetDimensions. +INSTR.RESOURCEKINDFORSAMPLE sample/_l/_d requires resource declared as texture1D/2D/3D/Cube/1DArray/2DArray/CubeArray. +INSTR.RESOURCEKINDFORSAMPLEC samplec requires resource declared as texture1D/2D/Cube/1DArray/2DArray/CubeArray. +INSTR.RESOURCEKINDFORTEXTURELOAD texture load only works on Texture1D/1DArray/2D/2DArray/3D/MS2D/MS2DArray. +INSTR.RESOURCEKINDFORTEXTURESTORE texture store only works on Texture1D/1DArray/2D/2DArray/3D. +INSTR.RESOURCEKINDFORTRACERAY TraceRay should only use RTAccelerationStructure. +INSTR.RESOURCEMAPTOSINGLEENTRY Fail to map resource to resource table. +INSTR.RESOURCEOFFSETMISS offset uninitialized. +INSTR.RESOURCEOFFSETTOOMANY out of bound offset must be undef. +INSTR.RESOURCEUSER Resource should only be used by Load/GEP/Call. +INSTR.SAMPLECOMPTYPE sample_* instructions require resource to be declared to return UNORM, SNORM or FLOAT. +INSTR.SAMPLEINDEXFORLOAD2DMS load on Texture2DMS/2DMSArray require sampleIndex. +INSTR.SAMPLERMODEFORLOD lod instruction requires sampler declared in default mode. +INSTR.SAMPLERMODEFORSAMPLE sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode. +INSTR.SAMPLERMODEFORSAMPLEC sample_c_*/gather_c instructions require sampler declared in comparison mode. +INSTR.SIGNATUREOPERATIONNOTINENTRY Dxil operation for input output signature must be in entryPoints. +INSTR.STATUS Resource status should only be used by CheckAccessFullyMapped. +INSTR.STRUCTBITCAST Bitcast on struct types is not allowed. +INSTR.SVCONFLICTINGLAUNCHMODE Input system values are compatible with node shader launch mode. +INSTR.TEXTUREOFFSET offset texture instructions must take offset which can resolve to integer literal in the range -8 to 7. +INSTR.TGSMRACECOND Race condition writing to shared memory detected, consider making this write conditional. +INSTR.UNDEFHITOBJECT HitObject is undef. +INSTR.UNDEFINEDVALUEFORUAVSTORE Assignment of undefined values to UAV. +INSTR.UNDEFRESULTFORGETDIMENSION GetDimensions used undef dimension %0 on %1. +INSTR.WRITEMASKFORTYPEDUAVSTORE store on typed uav must write to all four components of the UAV. +INSTR.WRITEMASKGAPFORUAV UAV write mask must be contiguous, starting at x: .x, .xy, .xyz, or .xyzw. +INSTR.WRITEMASKMATCHVALUEFORUAVSTORE uav store write mask must match store value mask, write mask is %0 and store value mask is %1. +META.BARYCENTRICSFLOAT3 only 'float3' type is allowed for SV_Barycentrics. +META.BARYCENTRICSINTERPOLATION SV_Barycentrics cannot be used with 'nointerpolation' type. +META.BARYCENTRICSTWOPERSPECTIVES There can only be up to two input attributes of SV_Barycentrics with different perspective interpolation mode. +META.BRANCHFLATTEN Can't use branch and flatten attributes together. +META.CLIPCULLMAXCOMPONENTS Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components +META.CLIPCULLMAXROWS Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows. +META.COMPUTEWITHNODE Compute entry must not have node metadata +META.CONTROLFLOWHINTNOTONCONTROLFLOW Control flow hint only works on control flow inst. +META.DENSERESIDS Resource identifiers must be zero-based and dense. +META.DUPLICATESYSVALUE System value may only appear once in signature +META.ENTRYFUNCTION entrypoint not found. +META.FLAGSUSAGE Flags must match usage. +META.FORCECASEONSWITCH Attribute forcecase only works for switch. +META.GLCNOTONAPPENDCONSUME globallycoherent cannot be used with append/consume buffers: '%0'. +META.INTEGERINTERPMODE Interpolation mode on integer must be Constant +META.INTERPMODEINONEROW Interpolation mode must be identical for all elements packed into the same row. +META.INTERPMODEVALID Interpolation mode must be valid +META.INVALIDCONTROLFLOWHINT Invalid control flow hint. +META.KNOWN Named metadata should be known +META.MAXTESSFACTOR Hull Shader MaxTessFactor must be [%0..%1]. %2 specified. +META.NOENTRYPROPSFORENTRY Entry point %0 must have entry properties. +META.NOSEMANTICOVERLAP Semantics must not overlap +META.REQUIRED Required metadata missing. +META.SEMAKINDMATCHESNAME Semantic name must match system value, when defined. +META.SEMAKINDVALID Semantic kind must be valid +META.SEMANTICCOMPTYPE %0 must be %1. +META.SEMANTICINDEXMAX System value semantics have a maximum valid semantic index +META.SEMANTICLEN Semantic length must be at least 1 and at most 64. +META.SEMANTICSHOULDBEALLOCATED Semantic should have a valid packing location +META.SEMANTICSHOULDNOTBEALLOCATED Semantic should have a packing location of -1 +META.SIGNATURECOMPTYPE signature %0 specifies unrecognized or invalid component type. +META.SIGNATUREDATAWIDTH Data width must be identical for all elements packed into the same row. +META.SIGNATUREILLEGALCOMPONENTORDER Component ordering for packed elements must be: arbitrary < system value < system generated value +META.SIGNATUREINDEXCONFLICT Only elements with compatible indexing rules may be packed together +META.SIGNATUREOUTOFRANGE Signature elements must fit within maximum signature size +META.SIGNATUREOVERLAP Signature elements may not overlap in packing location. +META.STRUCTBUFALIGNMENT StructuredBuffer stride not aligned +META.STRUCTBUFALIGNMENTOUTOFBOUND StructuredBuffer stride out of bounds +META.SYSTEMVALUEROWS System value may only have 1 row +META.TARGET Target triple must be 'dxil-ms-dx' +META.TESSELLATOROUTPUTPRIMITIVE Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW. +META.TESSELLATORPARTITION Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even. +META.TEXTURETYPE elements of typed buffers and textures must fit in four 32-bit quantities. +META.USED All metadata must be used by dxil. +META.VALIDSAMPLERMODE Invalid sampler mode on sampler . +META.VALUERANGE Metadata value must be within range. +META.VERSIONSUPPORTED Version in metadata must be supported. +META.WELLFORMED Metadata must be well-formed in operand count and types. +SM.64BITRAWBUFFERLOADSTORE i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3. +SM.AMPLIFICATIONSHADERPAYLOADSIZE For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. +SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. +SM.APPENDANDCONSUMEONSAMEUAV BufferUpdateCounter inc and dec on a given UAV (%d) cannot both be in the same shader for shader model less than 5.1. +SM.CBUFFERARRAYOFFSETALIGNMENT CBuffer array offset must be aligned to 16-bytes +SM.CBUFFERELEMENTOVERFLOW CBuffer elements must not overflow +SM.CBUFFEROFFSETOVERLAP CBuffer offsets must not overlap +SM.CBUFFERSIZE CBuffer size must not exceed 65536 bytes +SM.CBUFFERTEMPLATETYPEMUSTBESTRUCT D3D12 constant/texture buffer template element can only be a struct. +SM.COMPLETEPOSITION Not all elements of SV_Position were written. +SM.CONSTANTINTERPMODE Interpolation mode must be constant for MS primitive output. +SM.COUNTERONLYONSTRUCTBUF BufferUpdateCounter valid only on structured buffers. +SM.CSNOSIGNATURES Compute shaders must not have shader signatures. +SM.DOMAINLOCATIONIDXOOB DomainLocation component index out of bounds for the domain. +SM.DSINPUTCONTROLPOINTCOUNTRANGE DS input control point count must be [0..%0]. %1 specified. +SM.DXILVERSION Target shader model requires specific Dxil Version +SM.GSINSTANCECOUNTRANGE GS instance count must be [1..%0]. %1 specified. +SM.GSOUTPUTVERTEXCOUNTRANGE GS output vertex count must be [0..%0]. %1 specified. +SM.GSTOTALOUTPUTVERTEXDATARANGE Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2. This value cannot be greater than %3. +SM.GSVALIDINPUTPRIMITIVE GS input primitive unrecognized. +SM.GSVALIDOUTPUTPRIMITIVETOPOLOGY GS output primitive topology unrecognized. +SM.HSINPUTCONTROLPOINTCOUNTRANGE HS input control point count must be [0..%0]. %1 specified. +SM.HULLPASSTHRUCONTROLPOINTCOUNTMATCH For pass thru hull shader, input control point count must match output control point count +SM.INCOMPATIBLECALLINENTRY Features used in internal function calls must be compatible with entry +SM.INCOMPATIBLEDERIVINCOMPUTESHADERMODEL Derivatives in compute-model shaders require shader model 6.6 and above +SM.INCOMPATIBLEDERIVLAUNCH Node shaders only support derivatives in broadcasting launch mode +SM.INCOMPATIBLEOPERATION Operations used in entry function must be compatible with shader stage and other properties +SM.INCOMPATIBLEREQUIRESGROUP Functions requiring groupshared memory must be called from shaders with a visible group +SM.INCOMPATIBLESHADERMODEL Functions may only use features available in the current shader model +SM.INCOMPATIBLESTAGE Functions may only use features available in the entry function's stage +SM.INCOMPATIBLETHREADGROUPDIM When derivatives are used in compute-model shaders, the thread group dimensions must be compatible +SM.INSIDETESSFACTORSIZEMATCHDOMAIN InsideTessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. +SM.INVALIDRESOURCECOMPTYPE Invalid resource return type. +SM.INVALIDRESOURCEKIND Invalid resources kind. +SM.INVALIDSAMPLERFEEDBACKTYPE Invalid sampler feedback type. +SM.INVALIDTEXTUREKINDONUAV TextureCube[Array] resources are not supported with UAVs. +SM.ISOLINEOUTPUTPRIMITIVEMISMATCH Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain. +SM.MAXMSSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. +SM.MAXTGSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. +SM.MAXTHEADGROUP Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1. +SM.MESHPSIGROWCOUNT For shader '%0', primitive output signatures are taking up more than %1 rows. +SM.MESHSHADERINOUTSIZE For shader '%0', payload plus output size is greater than %1. +SM.MESHSHADERMAXPRIMITIVECOUNT MS max primitive output count must be [0..%0]. %1 specified. +SM.MESHSHADERMAXVERTEXCOUNT MS max vertex output count must be [0..%0]. %1 specified. +SM.MESHSHADEROUTPUTSIZE For shader '%0', vertex plus primitive output size is greater than %1. +SM.MESHSHADERPAYLOADSIZE For mesh shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. +SM.MESHSHADERPAYLOADSIZEDECLARED For mesh shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. +SM.MESHTOTALSIGROWCOUNT For shader '%0', vertex and primitive output signatures are taking up more than %1 rows. +SM.MESHVSIGROWCOUNT For shader '%0', vertex output signatures are taking up more than %1 rows. +SM.MULTISTREAMMUSTBEPOINT When multiple GS output streams are used they must be pointlists +SM.NAME Target shader model name must be known +SM.NOINTERPMODE Interpolation mode must be undefined for VS input/PS output/patch constant. +SM.NOPSOUTPUTIDX Pixel shader output registers are not indexable. +SM.OPCODE Opcode must be defined in target shader model +SM.OPCODEININVALIDFUNCTION Invalid DXIL opcode usage like StorePatchConstant in patch constant function +SM.OPERAND Operand must be defined in target shader model. +SM.OUTPUTCONTROLPOINTCOUNTRANGE output control point count must be [%0..%1]. %2 specified. +SM.OUTPUTCONTROLPOINTSTOTALSCALARS Total number of scalars across all HS output control points must not exceed . +SM.PATCHCONSTANTONLYFORHSDS patch constant signature only valid in HS and DS. +SM.PROGRAMVERSION Program Version in Dxil Container does not match Dxil Module shader model version +SM.PSCONSISTENTINTERP Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample). +SM.PSCOVERAGEANDINNERCOVERAGE InnerCoverage and Coverage are mutually exclusive. +SM.PSMULTIPLEDEPTHSEMANTIC Pixel Shader only allows one type of depth semantic to be declared. +SM.PSOUTPUTSEMANTIC Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found. +SM.PSTARGETCOL0 SV_Target packed location must start at column 0. +SM.PSTARGETINDEXMATCHESROW SV_Target semantic index must match packed row location. +SM.RAYSHADERPAYLOADSIZE For shader '%0', %1 size is smaller than argument's allocation size. +SM.RAYSHADERSIGNATURES Ray tracing shader '%0' should not have any shader signatures. +SM.RESOURCERANGEOVERLAP Resource ranges must not overlap +SM.ROVONLYINPS RasterizerOrdered objects are only allowed in 5.0+ pixel shaders. +SM.SAMPLECOUNTONLYON2DMS Only Texture2DMS/2DMSArray could has sample count. +SM.SEMANTIC Semantic must be defined in target shader model +SM.STREAMINDEXRANGE Stream index (%0) must between 0 and %1. +SM.TESSFACTORFORDOMAIN Required TessFactor for domain not found declared anywhere in Patch Constant data. +SM.TESSFACTORSIZEMATCHDOMAIN TessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. +SM.TGSMUNSUPPORTED Thread Group Shared Memory not supported %0. +SM.THREADGROUPCHANNELRANGE Declared Thread Group %0 size %1 outside valid range [%2..%3]. +SM.TRIOUTPUTPRIMITIVEMISMATCH Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain. +SM.UNDEFINEDOUTPUT Not all elements of output %0 were written. +SM.VALIDDOMAIN Invalid Tessellator Domain specified. Must be isoline, tri or quad. +SM.VIEWIDNEEDSSLOT ViewID requires compatible space in pixel shader input signature +SM.WAVESIZEALLZEROWHENUNDEFINED WaveSize Max and Preferred must be 0 when Min is 0 +SM.WAVESIZEEXPECTSONEPARAM WaveSize tag expects exactly 1 parameter. +SM.WAVESIZEMAXANDPREFERREDZEROWHENNORANGE WaveSize Max and Preferred must be 0 to encode min==max +SM.WAVESIZEMAXGREATERTHANMIN WaveSize Max must greater than Min +SM.WAVESIZENEEDSCONSTANTOPERANDS WaveSize metadata operands must be constant values. +SM.WAVESIZENEEDSSM66OR67 WaveSize is valid only for Shader Model 6.6 and 6.7. +SM.WAVESIZEONCOMPUTEORNODE WaveSize only allowed on compute or node shaders +SM.WAVESIZEPREFERREDINRANGE WaveSize Preferred must be within Min..Max range +SM.WAVESIZERANGEEXPECTSTHREEPARAMS WaveSize Range tag expects exactly 3 parameters. +SM.WAVESIZERANGENEEDSSM68PLUS WaveSize Range is valid only for Shader Model 6.8 and higher. +SM.WAVESIZETAGDUPLICATE WaveSize or WaveSizeRange tag may only appear once per entry point. +SM.WAVESIZEVALUE WaveSize value must be a power of 2 in range [4..128] +SM.ZEROHSINPUTCONTROLPOINTWITHINPUT When HS input control point count is 0, no input signature should exist. +TYPES.DEFINED Type must be defined based on DXIL primitives +TYPES.I8 I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics. +TYPES.INTWIDTH Int type must be of valid width +TYPES.NOMULTIDIM Only one dimension allowed for array type. +TYPES.NOPTRTOPTR Pointers to pointers, or pointers in structures are not allowed. +TYPES.NOVECTOR Vector types must not be present +===================================================== ======================================================================================================================================================================================================================================================================================================== .. VALRULES-RST:END From 10bff1319a28e8ad2aa0c5aa894ba8ec4c3a2e2b Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Wed, 16 Apr 2025 05:35:25 +0200 Subject: [PATCH 86/88] Fix field names in long vector DICompositeType (#7332) Fix OOB accesses for debug info vector field names for vectors of length >4. --- tools/clang/lib/CodeGen/CGDebugInfo.cpp | 11 ++++++- .../hlsl/types/longvec-field-di.hlsl | 33 +++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-field-di.hlsl diff --git a/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/tools/clang/lib/CodeGen/CGDebugInfo.cpp index 206f7d9523..d947887d62 100644 --- a/tools/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/tools/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1047,8 +1047,17 @@ bool CGDebugInfo::TryCollectHLSLRecordElements(const RecordType *Ty, unsigned VecSize = hlsl::GetHLSLVecSize(QualTy); unsigned ElemSizeInBits = CGM.getContext().getTypeSize(ElemQualTy); unsigned CurrentAlignedOffset = 0; + SmallString<8> FieldNameBuf; for (unsigned ElemIdx = 0; ElemIdx < VecSize; ++ElemIdx) { - StringRef FieldName = StringRef(&"xyzw"[ElemIdx], 1); + StringRef FieldName; + if (VecSize <= 4) { + FieldName = StringRef(&"xyzw"[ElemIdx], 1); + } else { + FieldNameBuf.clear(); + llvm::raw_svector_ostream OS(FieldNameBuf); + OS << 'c' << ElemIdx; + FieldName = OS.str(); + } CurrentAlignedOffset = llvm::RoundUpToAlignment(CurrentAlignedOffset, AlignBits); llvm::DIType *FieldType = diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-field-di.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-field-di.hlsl new file mode 100644 index 0000000000..935ec3cc13 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-field-di.hlsl @@ -0,0 +1,33 @@ +// RUN: %dxc -Zi -Qembed_debug -T lib_6_9 %s -DNUM=8 | FileCheck %s --check-prefix=CHECK-LONG +// RUN: %dxc -Zi -Qembed_debug -T lib_6_9 %s -DNUM=4 | FileCheck %s --check-prefix=CHECK-SHORT + +// Test debug info for short and long vector types + +RWByteAddressBuffer buf; + +export vector lv_global_arr_ret() { + vector d = buf.Load >(0); + return d; +} + +// CHECK-LONG: ![[TYDI:[^ ]+]] = !DICompositeType(tag: DW_TAG_class_type, name: "vector", file: !{{[^ ]+}}, size: 256, align: 32, elements: ![[ELEMDI:[^ ]+]], +// CHECK-LONG: ![[ELEMDI]] = !{![[C0:[^ ]+]], ![[C1:[^ ]+]], ![[C2:[^ ]+]], ![[C3:[^ ]+]], ![[C4:[^ ]+]], ![[C5:[^ ]+]], ![[C6:[^ ]+]], ![[C7:[^ ]+]]} +// CHECK-LONG: ![[C0]] = !DIDerivedType(tag: DW_TAG_member, name: "c0", scope: !{{[^ ]+}} file: !{{[^ ]+}}, baseType: ![[BASETY:[^ ]+]], size: 32, align: 32, flags: DIFlagPublic) +// CHECK-LONG: ![[BASETY]] = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float) +// CHECK-LONG: ![[C1]] = !DIDerivedType(tag: DW_TAG_member, name: "c1", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 32, flags: DIFlagPublic) +// CHECK-LONG: ![[C2]] = !DIDerivedType(tag: DW_TAG_member, name: "c2", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 64, flags: DIFlagPublic) +// CHECK-LONG: ![[C3]] = !DIDerivedType(tag: DW_TAG_member, name: "c3", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 96, flags: DIFlagPublic) +// CHECK-LONG: ![[C4]] = !DIDerivedType(tag: DW_TAG_member, name: "c4", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 128, flags: DIFlagPublic) +// CHECK-LONG: ![[C5]] = !DIDerivedType(tag: DW_TAG_member, name: "c5", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 160, flags: DIFlagPublic) +// CHECK-LONG: ![[C6]] = !DIDerivedType(tag: DW_TAG_member, name: "c6", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 192, flags: DIFlagPublic) +// CHECK-LONG: ![[C7]] = !DIDerivedType(tag: DW_TAG_member, name: "c7", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 224, flags: DIFlagPublic) +// CHECK-LONG: !{{[^ ]+}} = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, line: 9, type: ![[TYDI]]) + +// CHECK-SHORT: ![[TYDI:[^ ]+]] = !DICompositeType(tag: DW_TAG_class_type, name: "vector", file: !{{[^ ]+}}, size: 128, align: 32, elements: ![[ELEMDI:[^ ]+]], +// CHECK-SHORT: ![[ELEMDI]] = !{![[X:[^ ]+]], ![[Y:[^ ]+]], ![[Z:[^ ]+]], ![[W:[^ ]+]]} +// CHECK-SHORT: ![[X]] = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY:[^ ]+]], size: 32, align: 32, flags: DIFlagPublic) +// CHECK-SHORT: ![[BASETY]] = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float) +// CHECK-SHORT: ![[Y]] = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 32, flags: DIFlagPublic) +// CHECK-SHORT: ![[Z]] = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 64, flags: DIFlagPublic) +// CHECK-SHORT: ![[W]] = !DIDerivedType(tag: DW_TAG_member, name: "w", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 96, flags: DIFlagPublic) +// CHECK-SHORT: !{{[^ ]+}} = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, line: 9, type: ![[TYDI]]) \ No newline at end of file From 0a470b51535265b759c0c3a3078fa8f97fd5eb12 Mon Sep 17 00:00:00 2001 From: Steve Urquhart <53908460+SteveUrquhart@users.noreply.github.com> Date: Wed, 16 Apr 2025 11:00:58 -0400 Subject: [PATCH 87/88] [SPIRV] Remove patch decoration from gl_TessCoord (#7187) (#7349) This PR fixes https://github.com/microsoft/DirectXShaderCompiler/issues/7187. gl_TessCoord is not a per-patch builtin and therefore the SPIRV should not be decorated with Patch. This is clear in the GLSL specification, and a SPIRV spec clarification is online here: https://gitlab.khronos.org/spirv/SPIR-V/-/issues/819 --- tools/clang/lib/SPIRV/DeclResultIdMapper.cpp | 3 ++- tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv | 1 - tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl | 1 - tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl | 1 - 4 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp index 0358873589..de73d5e417 100644 --- a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp +++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp @@ -3522,7 +3522,8 @@ SpirvVariable *DeclResultIdMapper::createSpirvInterfaceVariable( // Decorate with PerPrimitiveNV for per-primitive out variables. spvBuilder.decoratePerPrimitiveNV(varInstr, varInstr->getSourceLocation()); - } else { + } else if (stageVar.getSemanticInfo().getKind() != + hlsl::Semantic::Kind::DomainLocation) { spvBuilder.decoratePatch(varInstr, varInstr->getSourceLocation()); } } diff --git a/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv b/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv index 3b0c060a0d..9d915a84f2 100644 --- a/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv +++ b/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv @@ -96,7 +96,6 @@ DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input, // CHECK-NEXT: OpDecorate %in_var_TANVCORNER Patch // CHECK-NEXT: OpDecorate %in_var_TANWEIGHTS Patch // CHECK-NEXT: OpDecorate %gl_TessCoord BuiltIn TessCoord -// CHECK-NEXT: OpDecorate %gl_TessCoord Patch // CHECK-NEXT: OpDecorate %gl_Position BuiltIn Position // CHECK-NEXT: OpDecorate %in_var_BEZIERPOS Location 0 // CHECK-NEXT: OpDecorate %in_var_TANGENT Location 1 diff --git a/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl b/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl index 5e4049f8c3..391e09a428 100644 --- a/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl +++ b/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl @@ -4,7 +4,6 @@ // CHECK-SAME: %gl_TessCoord // CHECK: OpDecorate %gl_TessCoord BuiltIn TessCoord -// CHECK: OpDecorate %gl_TessCoord Patch // CHECK: %gl_TessCoord = OpVariable %_ptr_Input_v3float Input diff --git a/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl b/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl index a8fe81e021..6f073aeb46 100644 --- a/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl +++ b/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl @@ -85,7 +85,6 @@ struct DsOut { // CHECK: OpDecorateString %gl_PointSize UserSemantic "PSIZE" // CHECK: OpDecorate %gl_TessCoord BuiltIn TessCoord // CHECK: OpDecorateString %gl_TessCoord UserSemantic "SV_DomainLocation" -// CHECK: OpDecorate %gl_TessCoord Patch // CHECK: OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter // CHECK: OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor" // CHECK: OpDecorate %gl_TessLevelOuter Patch From 0beaa767f7712f0ed0ab72e1e17cb94c25f84c34 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Thu, 17 Apr 2025 18:20:49 +0200 Subject: [PATCH 88/88] [SER] MaybeReorderThread + Make(Nop|Miss) HLSL -> DXIL lowering and tests (#7262) - HLSL -> DXIL lowering - ast, hlsl->dxil, dxilgen, and ScalarReplAggregatesHLSL tests SER implementation tracker (#7214) --- include/dxc/HLSL/HLOperations.h | 4 + lib/HLSL/HLOperationLower.cpp | 116 ++++++++++++-- .../Scalar/ScalarReplAggregatesHLSL.cpp | 8 + tools/clang/lib/Sema/SemaHLSL.cpp | 1 + .../hlsl/intrinsics/maybereorder.hlsl | 37 +++++ .../objects/HitObject/hitobject_make.hlsl | 75 +++++++++ .../hlsl/objects/HitObject/lit.local.cfg | 1 + .../DXC/Passes/DxilGen/hitobject_dxilgen.ll | 5 +- .../Passes/DxilGen/maybereorder_dxilgen.ll | 3 +- .../hitobject_make_scalarrepl.ll | 142 ++++++++++++++++++ .../objects/HitObject/hitobject_make.hlsl | 12 -- .../objects/HitObject/hitobject_make_ast.hlsl | 24 --- .../hlsl/objects/HitObject/maybereorder.hlsl | 13 -- .../objects/HitObject/maybereorder_ast.hlsl | 28 ---- 14 files changed, 376 insertions(+), 93 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/lit.local.cfg create mode 100644 tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll delete mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl delete mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl delete mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl delete mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h index f87d324baf..a7db8612a6 100644 --- a/include/dxc/HLSL/HLOperations.h +++ b/include/dxc/HLSL/HLOperations.h @@ -433,6 +433,10 @@ const unsigned kNodeHandleToResCastOpIdx = 1; const unsigned kAnnotateNodeHandleNodePropIdx = 2; const unsigned kAnnotateNodeRecordHandleNodeRecordPropIdx = 2; +// HitObject::MakeMiss +const unsigned kHitObjectMakeMiss_NumOp = 8; +const unsigned kHitObjectMakeMissRayDescOpIdx = 4; + } // namespace HLOperandIndex llvm::Function *GetOrCreateHLFunction(llvm::Module &M, diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index b5114fa34b..be45021e41 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -12,6 +12,7 @@ // // /////////////////////////////////////////////////////////////////////////////// +#include "dxc/DXIL/DxilConstants.h" #define _USE_MATH_DEFINES #include #include @@ -6183,19 +6184,114 @@ Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, // Shader Execution Reordering. namespace { -Value *TranslateHitObjectMake(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, +Value *TranslateHitObjectMake(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *HlslOP = &Helper.hlslOP; + IRBuilder<> Builder(CI); + unsigned SrcIdx = 1; + Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); + if (Opcode == OP::OpCode::HitObject_MakeNop) { + Value *HitObject = TrivialDxilOperation( + Opcode, {nullptr}, Type::getVoidTy(CI->getContext()), CI, HlslOP); + Builder.CreateStore(HitObject, HitObjectPtr); + DXASSERT( + CI->use_empty(), + "Default ctor return type is a Clang artifact. Value must not be used"); + return nullptr; + } + + DXASSERT_NOMSG(CI->getNumArgOperands() == + HLOperandIndex::kHitObjectMakeMiss_NumOp); + Value *RayFlags = CI->getArgOperand(SrcIdx++); + Value *MissShaderIdx = CI->getArgOperand(SrcIdx++); + DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectMakeMissRayDescOpIdx); + Value *RayDescOrigin = CI->getArgOperand(SrcIdx++); + Value *RayDescOriginX = + Builder.CreateExtractElement(RayDescOrigin, (uint64_t)0); + Value *RayDescOriginY = + Builder.CreateExtractElement(RayDescOrigin, (uint64_t)1); + Value *RayDescOriginZ = + Builder.CreateExtractElement(RayDescOrigin, (uint64_t)2); + + Value *RayDescTMin = CI->getArgOperand(SrcIdx++); + Value *RayDescDirection = CI->getArgOperand(SrcIdx++); + Value *RayDescDirectionX = + Builder.CreateExtractElement(RayDescDirection, (uint64_t)0); + Value *RayDescDirectionY = + Builder.CreateExtractElement(RayDescDirection, (uint64_t)1); + Value *RayDescDirectionZ = + Builder.CreateExtractElement(RayDescDirection, (uint64_t)2); + + Value *RayDescTMax = CI->getArgOperand(SrcIdx++); + DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); + + Value *OutHitObject = TrivialDxilOperation( + Opcode, + {nullptr, RayFlags, MissShaderIdx, RayDescOriginX, RayDescOriginY, + RayDescOriginZ, RayDescTMin, RayDescDirectionX, RayDescDirectionY, + RayDescDirectionZ, RayDescTMax}, + Helper.voidTy, CI, HlslOP); + Builder.CreateStore(OutHitObject, HitObjectPtr); + return nullptr; } Value *TranslateMaybeReorderThread(CallInst *CI, IntrinsicOp IOP, - OP::OpCode opcode, - HLOperationLowerHelper &helper, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return nullptr; // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + + // clang-format off + // Match MaybeReorderThread overload variants: + // void MaybeReorderThread(, + // HitObject Hit); + // void MaybeReorderThread(, + // uint CoherenceHint, + // uint NumCoherenceHintBitsFromLSB ); + // void MaybeReorderThread(, + // HitObject Hit, + // uint CoherenceHint, + // uint NumCoherenceHintBitsFromLSB); + // clang-format on + const unsigned NumHLArgs = CI->getNumArgOperands(); + DXASSERT_NOMSG(NumHLArgs >= 2); + + // Use a NOP HitObject for MaybeReorderThread without HitObject. + Value *HitObject = nullptr; + unsigned HLIndex = 1; + if (3 == NumHLArgs) { + HitObject = TrivialDxilOperation(DXIL::OpCode::HitObject_MakeNop, {nullptr}, + Type::getVoidTy(CI->getContext()), CI, OP); + } else { + Value *FirstParam = CI->getArgOperand(HLIndex); + DXASSERT_NOMSG(isa(FirstParam->getType())); + IRBuilder<> Builder(CI); + HitObject = Builder.CreateLoad(FirstParam); + HLIndex++; + } + + // If there are trailing parameters, these have to be the two coherence bit + // parameters + Value *CoherenceHint = nullptr; + Value *NumCoherenceHintBits = nullptr; + if (2 != NumHLArgs) { + DXASSERT_NOMSG(HLIndex + 2 == NumHLArgs); + CoherenceHint = CI->getArgOperand(HLIndex++); + NumCoherenceHintBits = CI->getArgOperand(HLIndex++); + DXASSERT_NOMSG(Helper.i32Ty == CoherenceHint->getType()); + DXASSERT_NOMSG(Helper.i32Ty == NumCoherenceHintBits->getType()); + } else { + CoherenceHint = UndefValue::get(Helper.i32Ty); + NumCoherenceHintBits = OP->GetU32Const(0); + } + + TrivialDxilOperation( + OpCode, {nullptr, HitObject, CoherenceHint, NumCoherenceHintBits}, + Type::getVoidTy(CI->getContext()), CI, OP); + return nullptr; } Value *TranslateHitObjectFromRayQuery(CallInst *CI, IntrinsicOp IOP, @@ -6968,11 +7064,9 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMake, - DXIL::OpCode::NumOpCodes_Dxil_1_8}, // FIXME: Just a placeholder Dxil - // opcode + DXIL::OpCode::HitObject_MakeNop}, {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread, - DXIL::OpCode::NumOpCodes_Dxil_1_8}, // FIXME: Just a placeholder Dxil - // opcode + DXIL::OpCode::MaybeReorderThread}, {IntrinsicOp::IOP_Vkstatic_pointer_cast, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_Vkreinterpret_pointer_cast, UnsupportedVulkanIntrinsic, diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index ec17fce9c8..e487079b94 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -2775,6 +2775,14 @@ void SROA_Helper::RewriteCall(CallInst *CI) { RewriteCallArg(CI, HLOperandIndex::kCallShaderPayloadOpIdx, /*bIn*/ true, /*bOut*/ true); } break; + case IntrinsicOp::MOP_DxHitObject_MakeMiss: { + if (OldVal == + CI->getArgOperand(HLOperandIndex::kHitObjectMakeMissRayDescOpIdx)) { + RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, + /*loadElts*/ true); + DeadInsts.push_back(CI); + } + } break; case IntrinsicOp::MOP_TraceRayInline: { if (OldVal == CI->getArgOperand(HLOperandIndex::kTraceRayInlineRayDescOpIdx)) { diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 230c7e65d9..418425a468 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -12066,6 +12066,7 @@ void Sema::DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM, case hlsl::IntrinsicOp::MOP_TraceRayInline: DiagnoseTraceRayInline(*this, CE); break; + case hlsl::IntrinsicOp::MOP_DxHitObject_MakeMiss: case hlsl::IntrinsicOp::MOP_DxHitObject_MakeNop: DiagnoseReachableSERCall(*this, CE, EntrySK, EntryDecl, false); break; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder.hlsl new file mode 100644 index 0000000000..08836dfbaf --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder.hlsl @@ -0,0 +1,37 @@ +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL +// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL +// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST + +// AST: |-FunctionDecl {{[^ ]+}} <> implicit used MaybeReorderThread 'void (dx::HitObject)' extern +// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <> HitObject 'dx::HitObject':'dx::HitObject' +// AST-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 359 +// AST-NEXT: | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// AST: |-FunctionDecl {{[^ ]+}} <> implicit used MaybeReorderThread 'void (dx::HitObject, unsigned int, unsigned int)' extern +// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <> HitObject 'dx::HitObject':'dx::HitObject' +// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <> CoherenceHint 'unsigned int' +// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <> NumCoherenceHintBitsFromLSB 'unsigned int' +// AST-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 359 +// AST-NEXT: | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// AST: `-FunctionDecl {{[^ ]+}} <> implicit used MaybeReorderThread 'void (unsigned int, unsigned int)' extern +// AST-NEXT: |-ParmVarDecl {{[^ ]+}} <> CoherenceHint 'unsigned int' +// AST-NEXT: |-ParmVarDecl {{[^ ]+}} <> NumCoherenceHintBitsFromLSB 'unsigned int' +// AST-NEXT: |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 359 +// AST-NEXT: `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %[[NOP:[^ ]+]]) +// FCGL-NEXT: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32)"(i32 359, %dx.types.HitObject* %[[NOP]], i32 241, i32 3) +// FCGL-NEXT: call void @"dx.hl.op..void (i32, i32, i32)"(i32 359, i32 242, i32 7) + +// DXIL: call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[NOP:[^ ]+]], i32 undef, i32 0) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) +// DXIL-NEXT: call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[NOP]], i32 241, i32 3) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) +// DXIL-NEXT: call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[NOP]], i32 242, i32 7) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + dx::MaybeReorderThread(hit); + dx::MaybeReorderThread(hit, 0xf1, 3); + dx::MaybeReorderThread(0xf2, 7); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl new file mode 100644 index 0000000000..1e947b2296 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl @@ -0,0 +1,75 @@ +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL +// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL +// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST + +// AST: | |-CXXRecordDecl {{[^ ]+}} <> implicit referenced class HitObject definition +// AST-NEXT: | | |-FinalAttr {{[^ ]+}} <> Implicit final +// AST-NEXT: | | |-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST-NEXT: | | |-HLSLHitObjectAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | |-FieldDecl {{[^ ]+}} <> implicit h 'int' +// AST-NEXT: | | |-CXXConstructorDecl {{[^ ]+}} <> used HitObject 'void ()' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 358 +// AST-NEXT: | | | `-HLSLCXXOverloadAttr {{[^ ]+}} <> Implicit + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> MakeMiss +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TRayFlags +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TMissShaderIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TRay +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit MakeMiss 'TResult (TRayFlags, TMissShaderIndex, TRay) const' static +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> RayFlags 'TRayFlags' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> MissShaderIndex 'TMissShaderIndex' +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> Ray 'TRay' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used MakeMiss 'dx::HitObject (unsigned int, unsigned int, RayDesc)' static +// AST-NEXT: | | | |-TemplateArgument type 'dx::HitObject' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'RayDesc' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MakeMiss 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> RayFlags 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MissShaderIndex 'RayDesc' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 387 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> MakeNop +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit MakeNop 'TResult () const' static +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used MakeNop 'dx::HitObject ()' static +// AST-NEXT: | | | |-TemplateArgument type 'dx::HitObject' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 358 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// FCGL: %{{[^ ]+}} = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %{{[^ ]+}}) +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %{{[^ ]+}}) +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32 387, %dx.types.HitObject* %{{[^ ]+}}, i32 0, i32 1, %struct.RayDesc* %{{[^ ]+}}) +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32 387, %dx.types.HitObject* %{{[^ ]+}}, i32 0, i32 2, %struct.RayDesc* %{{[^ ]+}}) + +// Expect HitObject_Make* calls with identical parameters to be folded. +// DXIL: {{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() +// DXIL-NOT: {{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop +// DXIL: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 0, i32 1, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0x3FA99999A0000000, float 1.000000e+03) ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax) +// DXIL-NOT: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 0, i32 1 +// DXIL: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 0, i32 2, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0x3FA99999A0000000, float 1.000000e+03) ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax) + +void Use(in dx::HitObject hit) { + dx::MaybeReorderThread(hit); +} + +[shader("raygeneration")] +void main() { + dx::HitObject nop; + Use(nop); + + dx::HitObject nop2 = dx::HitObject::MakeNop(); + Use(nop2); + + RayDesc ray = {{0,0,0}, {0,0,1}, 0.05, 1000.0}; + dx::HitObject miss = dx::HitObject::MakeMiss(0, 1, ray); + Use(miss); + + dx::HitObject miss2 = dx::HitObject::MakeMiss(0, 1, ray); + Use(miss2); + + dx::HitObject miss3 = dx::HitObject::MakeMiss(0, 2, ray); + Use(miss3); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/lit.local.cfg b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/lit.local.cfg new file mode 100644 index 0000000000..ba86568f9a --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/lit.local.cfg @@ -0,0 +1 @@ +config.unsupported = 'dxil-1-9' not in config.available_features diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll index 01dafe5e86..17a968675f 100644 --- a/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll +++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll @@ -1,9 +1,6 @@ ; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s ; REQUIRES: dxil-1-9 -; CHECK-NOT: @dx.op.hitObject_ -; CHECK-NOT: @dx.op.maybeReorderThread - ; ; Buffer Definitions: ; @@ -37,9 +34,11 @@ entry: %tmp = alloca %dx.types.HitObject, align 4 %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !19 ; line:9 col:3 call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !19 ; line:9 col:3 +; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !23 ; line:9 col:17 %2 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !24 ; line:10 col:3 call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !24 ; line:10 col:3 +; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %tmp), !dbg !24 ; line:10 col:3 %3 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !24 ; line:10 col:3 call void @llvm.lifetime.end(i64 4, i8* %3) #0, !dbg !24 ; line:10 col:3 diff --git a/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll index f5130bca3f..ca25b1e115 100644 --- a/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll +++ b/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll @@ -1,8 +1,6 @@ ; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s ; REQUIRES: dxil-1-9 -; CHECK-NOT: @dx.op.hitObject_ -; CHECK-NOT: @dx.op.maybeReorderThread ; ; Buffer Definitions: @@ -36,6 +34,7 @@ entry: %hit = alloca %dx.types.HitObject, align 4 %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !19 ; line:9 col:3 call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !19 ; line:9 col:3 +; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !23 ; line:9 col:17 call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %hit), !dbg !24 ; line:10 col:3 call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32)"(i32 359, %dx.types.HitObject* %hit, i32 241, i32 3), !dbg !25 ; line:11 col:3 diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll new file mode 100644 index 0000000000..89ee886c2e --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll @@ -0,0 +1,142 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; +; Buffer Definitions: +; +; cbuffer $Globals +; { +; +; [0 x i8] (type annotation not present) +; +; } +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; $Globals cbuffer NA NA CB0 cb4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%ConstantBuffer = type opaque +%dx.types.HitObject = type { i8* } +%"class.dx::HitObject" = type { i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } + +@"$Globals" = external constant %ConstantBuffer + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %hit = alloca %dx.types.HitObject, align 4 + %tmp = alloca %dx.types.HitObject, align 4 + %ray = alloca %struct.RayDesc, align 4 +; CHECK-NOT: %{{[^ ]+}} = alloca %struct.RayDesc + %tmp2 = alloca %dx.types.HitObject, align 4 +; CHECK: %[[HIT0:[^ ]+]] = alloca %dx.types.HitObject, align 4 +; CHECK: %[[HIT1:[^ ]+]] = alloca %dx.types.HitObject, align 4 +; CHECK: %[[HIT2:[^ ]+]] = alloca %dx.types.HitObject, align 4 + %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !23 ; line:42 col:3 + call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !23 ; line:42 col:3 +; CHECK: %[[THIS0:[^ ]+]] = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %[[HIT0]]) +; CHECK-NOT: %[[THIS0]] + %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !27 ; line:42 col:17 + %2 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !28 ; line:43 col:3 + call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !28 ; line:43 col:3 +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %[[HIT1]]) + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %tmp), !dbg !28 ; line:43 col:3 + %3 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !28 ; line:43 col:3 + call void @llvm.lifetime.end(i64 4, i8* %3) #0, !dbg !28 ; line:43 col:3 + %4 = bitcast %struct.RayDesc* %ray to i8*, !dbg !29 ; line:44 col:3 + call void @llvm.lifetime.start(i64 32, i8* %4) #0, !dbg !29 ; line:44 col:3 + %5 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 0, !dbg !30 ; line:44 col:17 + store <3 x float> zeroinitializer, <3 x float>* %5, !dbg !30 ; line:44 col:17 + %6 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 1, !dbg !30 ; line:44 col:17 + store float 0.000000e+00, float* %6, !dbg !30 ; line:44 col:17 + %7 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 2, !dbg !30 ; line:44 col:17 + store <3 x float> , <3 x float>* %7, !dbg !30 ; line:44 col:17 + %8 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 3, !dbg !30 ; line:44 col:17 + store float 1.000000e+03, float* %8, !dbg !30 ; line:44 col:17 + %9 = bitcast %dx.types.HitObject* %tmp2 to i8*, !dbg !31 ; line:45 col:3 + call void @llvm.lifetime.start(i64 4, i8* %9) #0, !dbg !31 ; line:45 col:3 +; CHECK: store <3 x float> zeroinitializer, <3 x float>* %[[pRDO:[^ ]+]], +; CHECK: store float 0.000000e+00, float* %[[pRDTMIN:[^ ]+]], +; CHECK: store <3 x float> , <3 x float>* %[[pRDD:[^ ]+]], +; CHECK: store float 1.000000e+03, float* %[[pRDTMAX:[^ ]+]], +; CHECK-DAG: %[[RDO:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDO]], +; CHECK-DAG: %[[RDTMIN:[^ ]+]] = load float, float* %[[pRDTMIN]], +; CHECK-DAG: %[[RDD:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDD]], +; CHECK-DAG: %[[RDTMAX:[^ ]+]] = load float, float* %[[pRDTMAX]], +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 387, %dx.types.HitObject* %[[HIT2]], i32 0, i32 1, <3 x float> %[[RDO]], float %[[RDTMIN]], <3 x float> %[[RDD]], float %[[RDTMAX]]) + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32 387, %dx.types.HitObject* %tmp2, i32 0, i32 1, %struct.RayDesc* %ray), !dbg !31 ; line:45 col:3 + %10 = bitcast %dx.types.HitObject* %tmp2 to i8*, !dbg !31 ; line:45 col:3 + call void @llvm.lifetime.end(i64 4, i8* %10) #0, !dbg !31 ; line:45 col:3 + %11 = bitcast %struct.RayDesc* %ray to i8*, !dbg !32 ; line:46 col:1 + call void @llvm.lifetime.end(i64 32, i8* %11) #0, !dbg !32 ; line:46 col:1 + %12 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !32 ; line:46 col:1 + call void @llvm.lifetime.end(i64 4, i8* %12) #0, !dbg !32 ; line:46 col:1 + ret void, !dbg !32 ; line:46 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*) #0 + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!dx.version = !{!2} +!dx.valver = !{!2} +!dx.shaderModel = !{!3} +!dx.typeAnnotations = !{!4, !12} +!dx.entryPoints = !{!16} +!dx.fnprops = !{!20} +!dx.options = !{!21, !22} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{i32 1, i32 9} +!3 = !{!"lib", i32 6, i32 9} +!4 = !{i32 0, %"class.dx::HitObject" undef, !5, %struct.RayDesc undef, !7} +!5 = !{i32 4, !6} +!6 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 1, void ()* @"\01?main@@YAXXZ", !13} +!13 = !{!14} +!14 = !{i32 1, !15, !15} +!15 = !{} +!16 = !{null, !"", null, !17, null} +!17 = !{null, null, !18, null} +!18 = !{!19} +!19 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!20 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!21 = !{i32 -2147483584} +!22 = !{i32 -1} +!23 = !DILocation(line: 42, column: 3, scope: !24) +!24 = !DISubprogram(name: "main", scope: !25, file: !25, line: 41, type: !26, isLocal: false, isDefinition: true, scopeLine: 41, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!25 = !DIFile(filename: "tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl", directory: "") +!26 = !DISubroutineType(types: !15) +!27 = !DILocation(line: 42, column: 17, scope: !24) +!28 = !DILocation(line: 43, column: 3, scope: !24) +!29 = !DILocation(line: 44, column: 3, scope: !24) +!30 = !DILocation(line: 44, column: 17, scope: !24) +!31 = !DILocation(line: 45, column: 3, scope: !24) +!32 = !DILocation(line: 46, column: 1, scope: !24) \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl deleted file mode 100644 index 4e09b770ec..0000000000 --- a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s -// REQUIRES: dxil-1-9 - -// TODO: Implement lowering for dx::HitObject::MakeNop - -// CHECK-NOT: call - -[shader("raygeneration")] -void main() { - dx::HitObject hit; - dx::HitObject::MakeNop(); -} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl deleted file mode 100644 index fd2fbc5974..0000000000 --- a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl +++ /dev/null @@ -1,24 +0,0 @@ -// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: | |-CXXRecordDecl {{[^ ]+}} <> implicit referenced class HitObject definition -// CHECK-NEXT: | | |-FinalAttr {{[^ ]+}} <> Implicit final -// CHECK-NEXT: | | |-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" -// CHECK-NEXT: | | |-HLSLHitObjectAttr {{[^ ]+}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{[^ ]+}} <> implicit h 'int' -// CHECK-NEXT: | | |-CXXConstructorDecl {{[^ ]+}} <> used HitObject 'void ()' -// CHECK-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 358 -// CHECK-NEXT: | | | `-HLSLCXXOverloadAttr {{[^ ]+}} <> Implicit - -// CHECK: | | |-FunctionTemplateDecl {{[^ ]+}} <> MakeNop -// CHECK-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult -// CHECK-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit MakeNop 'TResult () const' static -// CHECK-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used MakeNop 'dx::HitObject ()' static -// CHECK-NEXT: | | | |-TemplateArgument type 'dx::HitObject' -// CHECK-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 358 -// CHECK-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" - -[shader("raygeneration")] -void main() { - dx::HitObject hit; - dx::HitObject::MakeNop(); -} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl deleted file mode 100644 index 8824cffaec..0000000000 --- a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl +++ /dev/null @@ -1,13 +0,0 @@ -// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s - -// TODO: Implement lowering for dx::MaybeReorderThread - -// CHECK-NOT: call - -[shader("raygeneration")] -void main() { - dx::HitObject hit; - dx::MaybeReorderThread(hit); - dx::MaybeReorderThread(hit, 0xf1, 3); - dx::MaybeReorderThread(0xf2, 7); -} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl deleted file mode 100644 index d570ef021f..0000000000 --- a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl +++ /dev/null @@ -1,28 +0,0 @@ -// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s - -// CHECK: |-FunctionDecl {{[^ ]+}} <> implicit used MaybeReorderThread 'void (dx::HitObject)' extern -// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <> HitObject 'dx::HitObject':'dx::HitObject' -// CHECK-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 359 -// CHECK-NEXT: | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" - -// CHECK: |-FunctionDecl {{[^ ]+}} <> implicit used MaybeReorderThread 'void (dx::HitObject, unsigned int, unsigned int)' extern -// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <> HitObject 'dx::HitObject':'dx::HitObject' -// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <> CoherenceHint 'unsigned int' -// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <> NumCoherenceHintBitsFromLSB 'unsigned int' -// CHECK-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 359 -// CHECK-NEXT: | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" - -// CHECK: `-FunctionDecl {{[^ ]+}} <> implicit used MaybeReorderThread 'void (unsigned int, unsigned int)' extern -// CHECK-NEXT: |-ParmVarDecl {{[^ ]+}} <> CoherenceHint 'unsigned int' -// CHECK-NEXT: |-ParmVarDecl {{[^ ]+}} <> NumCoherenceHintBitsFromLSB 'unsigned int' -// CHECK-NEXT: |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 359 -// CHECK-NEXT: `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" - - -[shader("raygeneration")] -void main() { - dx::HitObject hit; - dx::MaybeReorderThread(hit); - dx::MaybeReorderThread(hit, 0xf1, 3); - dx::MaybeReorderThread(0xf2, 7); -} \ No newline at end of file