From 5304402f05369d7205b1f028e3d6ba574979a035 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Thu, 21 Nov 2024 13:03:53 -0700
Subject: [PATCH 01/88] [SM6.9] Allow native vectors longer than 4

Remove errors in Sema diagnostics for vectors longer than 4 in 6.9.
Test for failures using long vectors in unspported contexts and for correct codegen in
supported contexts. Verify errors persist in pre-6.9 shader models

The type buffer cache expects a max vector size of 4. By just skipping the cache for longer vectors, we don't overrun and store float7 vectors in the double3 slot or retrieve the double3 in place of float7.

Testing is for acceptance, mangling and basic copying that takes place
at the high level to ensure they are being accepted and recognized
correctly. The intent is not to tully test the passing of data as that
requires enabling vector operations to do properly. This test is used to
verify that these same constructs are disallowed in 6.8 and earlier.

A separate test verifies that disallowed contexts produce the
appropriate errors

Fixes #7117
---
 tools/clang/lib/Sema/SemaHLSL.cpp             |  12 +-
 .../CodeGenDXIL/hlsl/types/longvec_decls.hlsl | 263 ++++++++++++++++++
 .../hlsl/types/invalid_longvecs_sm68.hlsl     |  34 +++
 3 files changed, 306 insertions(+), 3 deletions(-)
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec_decls.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/types/invalid_longvecs_sm68.hlsl

diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index ba0801dd52..69cd2a88e3 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -3928,7 +3928,9 @@ class HLSLExternalSource : public ExternalSemaSource {
   }
 
   QualType LookupVectorType(HLSLScalarType scalarType, unsigned int colCount) {
-    QualType qt = m_vectorTypes[scalarType][colCount - 1];
+    QualType qt;
+    if (colCount < 4)
+      qt = m_vectorTypes[scalarType][colCount - 1];
     if (qt.isNull()) {
       if (m_scalarTypes[scalarType].isNull()) {
         LookupScalarTypeDef(scalarType);
@@ -3936,7 +3938,8 @@ class HLSLExternalSource : public ExternalSemaSource {
       qt = GetOrCreateVectorSpecialization(*m_context, m_sema,
                                            m_vectorTemplateDecl,
                                            m_scalarTypes[scalarType], colCount);
-      m_vectorTypes[scalarType][colCount - 1] = qt;
+      if (colCount < 4)
+        m_vectorTypes[scalarType][colCount - 1] = qt;
     }
     return qt;
   }
@@ -5055,7 +5058,10 @@ class HLSLExternalSource : public ExternalSemaSource {
 
   bool CheckRangedTemplateArgument(SourceLocation diagLoc,
                                    llvm::APSInt &sintValue) {
-    if (!sintValue.isStrictlyPositive() || sintValue.getLimitedValue() > 4) {
+    const auto *SM =
+        hlsl::ShaderModel::GetByName(m_sema->getLangOpts().HLSLProfile.c_str());
+    if (!sintValue.isStrictlyPositive() ||
+        (sintValue.getLimitedValue() > 4 && !SM->IsSM69Plus())) {
       m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_4);
       return true;
     }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec_decls.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec_decls.hlsl
new file mode 100644
index 0000000000..d6672e7678
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec_decls.hlsl
@@ -0,0 +1,263 @@
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float     -DNUM=7 %s | FileCheck %s
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=bool      -DNUM=7 %s | FileCheck %s
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=uint64_t  -DNUM=7 %s | FileCheck %s
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=double    -DNUM=7 %s | FileCheck %s
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float16_t -DNUM=7 -enable-16bit-types %s | FileCheck %s
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=int16_t   -DNUM=7 -enable-16bit-types %s | FileCheck %s
+
+// A test to verify that declarations of longvecs are permitted in all the accepted places.
+// Only tests for acceptance, most codegen is ignored for now.
+
+// CHECK: %struct.LongVec = type { <4 x float>, <7 x [[STY:[a-z0-9]*]]> }
+struct LongVec {
+  float4 f;
+  vector<TYPE,NUM> vec;
+};
+
+
+// Just some dummies to capture the types and mangles.
+// CHECK: @"\01?dummy@@3[[MNG:F|M|N|_N|_K|\$f16@]]A" = external addrspace(3) global [[STY]]
+groupshared TYPE dummy;
+
+// CHECK-DAG: @"\01?gs_vec@@3V?$vector@[[MNG]]$06@@A" = external addrspace(3) global <7 x [[STY]]>
+// CHECK-DAG: @"\01?gs_vec_arr@@3PAV?$vector@[[MNG]]$06@@A" = external addrspace(3) global [10 x <7 x [[STY]]>]
+// CHECK-DAG: @"\01?gs_vec_rec@@3ULongVec@@A" = external addrspace(3) global %struct.LongVec
+groupshared vector<TYPE, NUM> gs_vec;
+groupshared vector<TYPE, NUM> gs_vec_arr[10];
+groupshared LongVec gs_vec_rec;
+
+// CHECK-DAG: @static_vec = internal global <7 x [[STY]]>
+// CHECK-DAG: @static_vec_arr = internal global [10 x <7 x [[STY]]>] zeroinitializer
+// CHECK-DAG: @static_vec_rec = internal global %struct.LongVec
+static vector<TYPE, NUM> static_vec;
+static vector<TYPE, NUM> static_vec_arr[10];
+static LongVec static_vec_rec;
+
+// CHECK: define [[RTY:[a-z0-9]*]] @"\01?getVal@@YA[[MNG]][[MNG]]@Z"([[RTY]] {{.*}}%t)
+export TYPE getVal(TYPE t) {TYPE ret = dummy; dummy = t; return ret;}
+
+// CHECK: define <7 x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_param_passthru
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@V1@@Z"(<7 x [[RTY]]> %vec1)
+// CHECK:   ret <7 x [[RTY]]>
+export vector<TYPE, NUM> lv_param_passthru(vector<TYPE, NUM> vec1) {
+  vector<TYPE, NUM> ret = vec1;
+  return ret;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_in_out
+// CHECK-SAME: @@YAXV?$vector@[[MNG]]$06@@AIAV1@@Z"(<7 x [[RTY]]> %vec1, <7 x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2)
+// CHECK:   store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* %vec2, align 4
+// CHECK:   ret void
+export void lv_param_in_out(in vector<TYPE, NUM> vec1, out vector<TYPE, NUM> vec2) {
+  vec2 = vec1;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_inout
+// CHECK-SAME: @@YAXAIAV?$vector@[[MNG]]$06@@0@Z"(<7 x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec1, <7 x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2)
+// CHECK:   load <7 x [[STY]]>, <7 x [[STY]]>* %vec1, align 4
+// CHECK:   load <7 x [[STY]]>, <7 x [[STY]]>* %vec2, align 4
+// CHECK:   store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* %vec1, align 4
+// CHECK:   store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* %vec2, align 4
+// CHECK:   ret void
+export void lv_param_inout(inout vector<TYPE, NUM> vec1, inout vector<TYPE, NUM> vec2) {
+  vector<TYPE, NUM> tmp = vec1;
+  vec1 = vec2;
+  vec2 = tmp;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_in_out_rec@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2)
+// CHECK: memcpy
+// CHECK:   ret void
+export void lv_param_in_out_rec(in LongVec vec1, out LongVec vec2) {
+  vec2 = vec1;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_inout_rec@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2)
+// CHECK: memcpy
+// CHECK:   ret void
+export void lv_param_inout_rec(inout LongVec vec1, inout LongVec vec2) {
+  LongVec tmp = vec1;
+  vec1 = vec2;
+  vec2 = tmp;
+}
+
+// CHECK-LABEL: define void @"\01?lv_global_assign
+// CHECK-SAME: @@YAXV?$vector@[[MNG]]$06@@@Z"(<7 x [[RTY]]> %vec)
+// CHECK:   store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* @static_vec
+// CHECK:   ret void
+export void lv_global_assign(vector<TYPE, NUM> vec) {
+  static_vec = vec;
+}
+
+// CHECK: define <7 x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_global_ret
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@XZ"()
+// CHECK:   load <7 x [[STY]]>, <7 x [[STY]]>* @static_vec
+// CHECK:   ret <7 x [[RTY]]>
+export vector<TYPE, NUM> lv_global_ret() {
+  vector<TYPE, NUM> ret = static_vec;
+  return ret;
+}
+
+// CHECK-LABEL: define void @"\01?lv_gs_assign
+// CHECK-SAME: @@YAXV?$vector@[[MNG]]$06@@@Z"(<7 x [[RTY]]> %vec)
+// CHECK:   store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$06@@A"
+// CHECK:   ret void
+export void lv_gs_assign(vector<TYPE, NUM> vec) {
+  gs_vec = vec;
+}
+
+// CHECK: define <7 x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_gs_ret
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@XZ"()
+// CHECK:   load <7 x [[STY]]>, <7 x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$06@@A"
+// CHECK:   ret <7 x [[RTY]]>
+export vector<TYPE, NUM> lv_gs_ret() {
+  vector<TYPE, NUM> ret = gs_vec;
+  return ret;
+}
+
+#define DIMS 10
+
+// CHECK-LABEL: define void @"\01?lv_param_arr_passthru
+// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$06@@V1@@Z"([10 x <7 x [[STY]]>]* noalias sret %agg.result, <7 x [[RTY]]> %vec)
+// Arrays are returned in the params
+// CHECK: ret void
+export vector<TYPE, NUM> lv_param_arr_passthru(vector<TYPE, NUM> vec)[10] {
+  vector<TYPE, NUM> ret[10];
+  for (int i = 0; i < DIMS; i++)
+    ret[i] = vec;
+  return ret;
+}
+
+// CHECK-LABEL: define void @"\01?lv_global_arr_assign
+// CHECK-SAME: @@YAXY09V?$vector@[[MNG]]$06@@@Z"([10 x <7 x [[STY]]>]* %vec)
+// CHECK: ret void
+export void lv_global_arr_assign(vector<TYPE, NUM> vec[10]) {
+  for (int i = 0; i < DIMS; i++)
+    static_vec_arr[i] = vec[i];
+}
+
+// CHECK-LABEL: define void @"\01?lv_global_arr_ret
+// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$06@@XZ"([10 x <7 x [[STY]]>]* noalias sret %agg.result)
+// Arrays are returned in the params
+// CHECK: ret void
+export vector<TYPE, NUM> lv_global_arr_ret()[10] {
+  vector<TYPE, NUM> ret[10];
+  for (int i = 0; i < DIMS; i++)
+    ret[i] = static_vec_arr[i];
+  return ret;
+}
+
+// CHECK-LABEL: define void @"\01?lv_gs_arr_assign
+// CHECK-SAME: @@YAXY09V?$vector@[[MNG]]$06@@@Z"([10 x <7 x [[STY]]>]* %vec)
+// ret void
+export void lv_gs_arr_assign(vector<TYPE, NUM> vec[10]) {
+  for (int i = 0; i < DIMS; i++)
+    gs_vec_arr[i] = vec[i];
+}
+
+// CHECK-LABEL: define void @"\01?lv_gs_arr_ret
+// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$06@@XZ"([10 x <7 x [[STY]]>]* noalias sret %agg.result)
+export vector<TYPE, NUM> lv_gs_arr_ret()[10] {
+  vector<TYPE, NUM> ret[10];
+  for (int i = 0; i < DIMS; i++)
+    ret[i] = gs_vec_arr[i];
+  return ret;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_rec_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec)
+// CHECK: memcpy
+// Aggregates are returned in the params
+// CHECK:   ret void
+export LongVec lv_param_rec_passthru(LongVec vec) {
+  LongVec ret = vec;
+  return ret;
+}
+
+// CHECK-LABEL: define void @"\01?lv_global_rec_assign@@YAXULongVec@@@Z"(%struct.LongVec* %vec)
+// CHECK: memcpy
+// CHECK:   ret void
+export void lv_global_rec_assign(LongVec vec) {
+  static_vec_rec = vec;
+}
+
+// CHECK-LABEL: define void @"\01?lv_global_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result)
+// CHECK: memcpy
+// Aggregates are returned in the params
+// CHECK:   ret void
+export LongVec lv_global_rec_ret() {
+  LongVec ret = static_vec_rec;
+  return ret;
+}
+
+// CHECK-LABEL: define void @"\01?lv_gs_rec_assign@@YAXULongVec@@@Z"(%struct.LongVec* %vec)
+// CHECK: memcpy
+// CHECK:   ret void
+export void lv_gs_rec_assign(LongVec vec) {
+  gs_vec_rec = vec;
+}
+
+// CHECK-LABEL: define void @"\01?lv_gs_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result)
+// CHECK: memcpy
+// Aggregates are returned in the params
+// CHECK:   ret void
+export LongVec lv_gs_rec_ret() {
+  LongVec ret = gs_vec_rec;
+  return ret;
+}
+
+// CHECK: define <7 x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_splat
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@[[MNG]]@Z"([[RTY]] {{.*}}%scalar)
+// CHECK:   ret <7 x [[RTY]]>
+export vector<TYPE,NUM> lv_splat(TYPE scalar) {
+  vector<TYPE,NUM> ret = scalar;
+  return ret;
+}
+
+// CHECK: define <6 x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_initlist
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@XZ"()
+// CHECK:   ret <6 x [[RTY]]>
+export vector<TYPE, 6> lv_initlist() {
+  vector<TYPE, 6> ret = {1, 2, 3, 4, 5, 6};
+  return ret;
+}
+
+// CHECK: define <6 x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_initlist_vec
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@V?$vector@[[MNG]]$02@@@Z"(<3 x [[RTY]]> %vec)
+// CHECK:   ret <6 x [[RTY]]>
+export vector<TYPE, 6> lv_initlist_vec(vector<TYPE, 3> vec) {
+  vector<TYPE, 6> ret = {vec, 4.0, 5.0, 6.0};
+  return ret;
+}
+
+// CHECK: define <6 x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_vec_vec
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@V?$vector@[[MNG]]$02@@0@Z"(<3 x [[RTY]]> %vec1, <3 x [[RTY]]> %vec2)
+// CHECK:   ret <6 x [[RTY]]>
+export vector<TYPE, 6> lv_vec_vec(vector<TYPE, 3> vec1, vector<TYPE, 3> vec2) {
+  vector<TYPE, 6> ret = {vec1, vec2};
+  return ret;
+}
+
+// CHECK: define <7 x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_array_cast
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@Y06[[MNG]]@Z"([7 x [[STY]]]* %arr)
+// CHECK:   ret <7 x [[RTY]]>
+export vector<TYPE, NUM> lv_array_cast(TYPE arr[NUM]) {
+  vector<TYPE, NUM> ret = (vector<TYPE,NUM>)arr;
+  return ret;
+}
+
+// CHECK: define <6 x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_ctor
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@[[MNG]]@Z"([[RTY]] {{.*}}%s)
+// CHECK:  ret <6 x [[RTY]]>
+export vector<TYPE, 6> lv_ctor(TYPE s) {
+  vector<TYPE, 6> ret = vector<TYPE,6>(1.0, 2.0, 3.0, 4.0, 5.0, s);
+  return ret;
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvecs_sm68.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvecs_sm68.hlsl
new file mode 100644
index 0000000000..42eb6b077c
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvecs_sm68.hlsl
@@ -0,0 +1,34 @@
+// RUN: %dxc -T ps_6_8 -verify %s
+
+#define TYPE float
+#define NUM 5
+
+struct LongVec {
+  float4 f;
+  vector<TYPE,NUM> vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+};
+groupshared vector<TYPE, NUM> gs_vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+groupshared vector<TYPE, NUM> gs_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+
+static vector<TYPE, NUM> static_vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+static vector<TYPE, NUM> static_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+
+export vector<TYPE, NUM> lv_param_passthru( // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+                                           vector<TYPE, NUM> vec1) { // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, NUM> ret = vec1; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, NUM> arr[10]; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+  arr[1]= vec1;
+  return ret;
+}
+
+export void lv_param_in_out(in vector<TYPE, NUM> vec1, // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+                            out vector<TYPE, NUM> vec2) { // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+  vec2 = vec1;
+}
+
+export void lv_param_inout(inout vector<TYPE, NUM> vec1, // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+                           inout vector<TYPE, NUM> vec2) { // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, NUM> tmp = vec1; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+  vec1 = vec2;
+  vec2 = tmp;
+}

From e010223f74f9c2d96d51349849cd5f5e99542e99 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Thu, 5 Dec 2024 10:55:40 -1000
Subject: [PATCH 02/88] Produce errors for long vectors in invalid contexts

Disallow long vectors, and arrays or structs containing long vectors in
cbuffers, entry functions, node records, tessellation patchs, or special intrinsic parameters with
user-defined struct parameters.
---
 tools/clang/include/clang/AST/HlslTypes.h     |   6 +-
 tools/clang/include/clang/Basic/Attr.td       |  12 ++
 .../clang/Basic/DiagnosticSemaKinds.td        |   4 +-
 tools/clang/include/clang/Sema/SemaHLSL.h     |   2 +
 tools/clang/lib/AST/ASTContextHLSL.cpp        |  19 ++-
 tools/clang/lib/Sema/SemaDXR.cpp              |   6 +
 tools/clang/lib/Sema/SemaHLSL.cpp             | 127 +++++++++++++++--
 tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp   |  10 ++
 .../hlsl/types/invalid_longvec_decls.hlsl     | 132 ++++++++++++++++++
 .../hlsl/types/invalid_longvec_decls_68.hlsl  | 108 ++++++++++++++
 .../hlsl/types/invalid_longvec_decls_hs.hlsl  |  24 ++++
 11 files changed, 426 insertions(+), 24 deletions(-)
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_hs.hlsl

diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h
index d11fd598e6..2aa9afa5f9 100644
--- a/tools/clang/include/clang/AST/HlslTypes.h
+++ b/tools/clang/include/clang/AST/HlslTypes.h
@@ -370,12 +370,14 @@ void AddStdIsEqualImplementation(clang::ASTContext &context, clang::Sema &sema);
 clang::CXXRecordDecl *DeclareTemplateTypeWithHandle(
     clang::ASTContext &context, llvm::StringRef name,
     uint8_t templateArgCount = 1,
-    clang::TypeSourceInfo *defaultTypeArgValue = nullptr);
+    clang::TypeSourceInfo *defaultTypeArgValue = nullptr,
+    clang::InheritableAttr *Attr = nullptr);
 
 clang::CXXRecordDecl *DeclareTemplateTypeWithHandleInDeclContext(
     clang::ASTContext &context, clang::DeclContext *declContext,
     llvm::StringRef name, uint8_t templateArgCount,
-    clang::TypeSourceInfo *defaultTypeArgValue);
+    clang::TypeSourceInfo *defaultTypeArgValue,
+    clang::InheritableAttr *Attr = nullptr);
 
 clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandle(
     clang::ASTContext &context, llvm::StringRef typeName,
diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td
index 6d2295dc4a..5ca9d4b333 100644
--- a/tools/clang/include/clang/Basic/Attr.td
+++ b/tools/clang/include/clang/Basic/Attr.td
@@ -992,6 +992,18 @@ def HLSLNodeTrackRWInputSharing : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def HLSLCBuffer : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+}
+
+def HLSLTessPatch : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+}
+
 def HLSLNodeObject : InheritableAttr {
   let Spellings = []; // No spellings!
   let Subjects = SubjectList<[CXXRecord]>;
diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f79b8f6045..c85f6a6863 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7691,8 +7691,6 @@ def err_hlsl_control_flow_cond_not_scalar : Error<
   "%0 statement conditional expressions must evaluate to a scalar">;
 def err_hlsl_unsupportedvectortype : Error<
   "%0 is declared with type %1, but only primitive scalar values are supported">;
-def err_hlsl_unsupportedvectorsize : Error<
-  "%0 is declared with size %1, but only values 1 through 4 are supported">;
 def err_hlsl_unsupportedmatrixsize : Error<
   "%0 is declared with size %1x%2, but only values 1 through 4 are supported">;
 def err_hlsl_norm_float_only : Error<
@@ -7843,6 +7841,8 @@ def err_hlsl_load_from_mesh_out_arrays: Error<
    "output arrays of a mesh shader can not be read from">;
 def err_hlsl_out_indices_array_incorrect_access: Error<
    "a vector in out indices array must be accessed as a whole">;
+def err_hlsl_unsupported_long_vector: Error<
+   "Vectors of over 4 elements in %0 are not supported">;
 def err_hlsl_logical_binop_scalar : Error<
    "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">;
 def err_hlsl_ternary_scalar : Error<
diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h
index 40b030b430..c52131b8a5 100644
--- a/tools/clang/include/clang/Sema/SemaHLSL.h
+++ b/tools/clang/include/clang/Sema/SemaHLSL.h
@@ -128,6 +128,8 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema,
                                           const clang::InitListExpr *InitList,
                                           const clang::QualType EltTy);
 
+bool HasLongVecs(const clang::QualType &qt);
+
 bool IsConversionToLessOrEqualElements(clang::Sema *self,
                                        const clang::ExprResult &sourceExpr,
                                        const clang::QualType &targetType,
diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp
index 3c058950e0..978c97aeb5 100644
--- a/tools/clang/lib/AST/ASTContextHLSL.cpp
+++ b/tools/clang/lib/AST/ASTContextHLSL.cpp
@@ -903,18 +903,19 @@ void hlsl::AddStdIsEqualImplementation(clang::ASTContext &context,
 /// <parm name="templateArgCount">Number of template arguments (one or
 /// two).</param> <parm name="defaultTypeArgValue">If assigned, the default
 /// argument for the element template.</param>
-CXXRecordDecl *
-hlsl::DeclareTemplateTypeWithHandle(ASTContext &context, StringRef name,
-                                    uint8_t templateArgCount,
-                                    TypeSourceInfo *defaultTypeArgValue) {
+CXXRecordDecl *hlsl::DeclareTemplateTypeWithHandle(
+    ASTContext &context, StringRef name, uint8_t templateArgCount,
+    TypeSourceInfo *defaultTypeArgValue, InheritableAttr *Attr) {
   return DeclareTemplateTypeWithHandleInDeclContext(
       context, context.getTranslationUnitDecl(), name, templateArgCount,
-      defaultTypeArgValue);
+      defaultTypeArgValue, Attr);
 }
 
 CXXRecordDecl *hlsl::DeclareTemplateTypeWithHandleInDeclContext(
     ASTContext &context, DeclContext *declContext, StringRef name,
-    uint8_t templateArgCount, TypeSourceInfo *defaultTypeArgValue) {
+    uint8_t templateArgCount, TypeSourceInfo *defaultTypeArgValue,
+    InheritableAttr *Attr) {
+
   DXASSERT(templateArgCount != 0,
            "otherwise caller should be creating a class or struct");
   DXASSERT(templateArgCount <= 2, "otherwise the function needs to be updated "
@@ -968,6 +969,9 @@ CXXRecordDecl *hlsl::DeclareTemplateTypeWithHandleInDeclContext(
 
   typeDeclBuilder.addField("h", elementType);
 
+  if (Attr)
+    typeDeclBuilder.getRecordDecl()->addAttr(Attr);
+
   return typeDeclBuilder.getRecordDecl();
 }
 
@@ -1131,6 +1135,9 @@ hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, bool bTBuf) {
   typeDeclBuilder.addField(
       "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle.
 
+  typeDeclBuilder.getRecordDecl()->addAttr(
+      HLSLCBufferAttr::CreateImplicit(context));
+
   typeDeclBuilder.getRecordDecl();
 
   return templateRecordDecl;
diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp
index 6d838fb203..cb16ced5df 100644
--- a/tools/clang/lib/Sema/SemaDXR.cpp
+++ b/tools/clang/lib/Sema/SemaDXR.cpp
@@ -810,6 +810,12 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload,
     return;
   }
 
+  if (hlsl::HasLongVecs(Payload->getType())) {
+    S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector)
+        << "payload parameters";
+    return;
+  }
+
   CollectNonAccessableFields(PayloadType, CallerStage, {}, {},
                              NonWriteableFields, NonReadableFields);
 
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 69cd2a88e3..c5a30e00fa 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -3733,10 +3733,14 @@ class HLSLExternalSource : public ExternalSemaSource {
         DXASSERT(templateArgCount == 1 || templateArgCount == 2,
                  "otherwise a new case has been added");
 
+        InheritableAttr *Attr = nullptr;
+        if (kind == AR_OBJECT_INPUTPATCH || kind == AR_OBJECT_OUTPUTPATCH)
+          Attr = HLSLTessPatchAttr::CreateImplicit(*m_context);
+
         TypeSourceInfo *typeDefault =
             TemplateHasDefaultType(kind) ? float4TypeSourceInfo : nullptr;
         recordDecl = DeclareTemplateTypeWithHandle(
-            *m_context, typeName, templateArgCount, typeDefault);
+            *m_context, typeName, templateArgCount, typeDefault, Attr);
       }
       m_objectTypeDecls[i] = recordDecl;
       m_objectTypeDeclsMap[i] = std::make_pair(recordDecl, i);
@@ -4896,10 +4900,6 @@ class HLSLExternalSource : public ExternalSemaSource {
            AR_BASIC_UNKNOWN;
   }
 
-  /// <summary>Checks whether the specified value is a valid vector
-  /// size.</summary>
-  bool IsValidVectorSize(size_t length) { return 1 <= length && length <= 4; }
-
   /// <summary>Checks whether the specified value is a valid matrix row or
   /// column size.</summary>
   bool IsValidMatrixColOrRowSize(size_t length) {
@@ -4935,11 +4935,6 @@ class HLSLExternalSource : public ExternalSemaSource {
                                            false);
       } else if (objectKind == AR_TOBJ_VECTOR) {
         bool valid = true;
-        if (!IsValidVectorSize(GetHLSLVecSize(type))) {
-          valid = false;
-          m_sema->Diag(argLoc, diag::err_hlsl_unsupportedvectorsize)
-              << type << GetHLSLVecSize(type);
-        }
         if (!IsScalarType(GetMatrixOrVectorElementType(type))) {
           valid = false;
           m_sema->Diag(argLoc, diag::err_hlsl_unsupportedvectortype)
@@ -5085,11 +5080,12 @@ class HLSLExternalSource : public ExternalSemaSource {
       return false;
     }
     // Allow object type for Constant/TextureBuffer.
-    if (templateName == "ConstantBuffer" || templateName == "TextureBuffer") {
+    if (Template->getTemplatedDecl()->hasAttr<HLSLCBufferAttr>()) {
       if (TemplateArgList.size() == 1) {
         const TemplateArgumentLoc &argLoc = TemplateArgList[0];
         const TemplateArgument &arg = argLoc.getArgument();
-        DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, "");
+        DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type,
+                 "cbuffer with non-type template arg");
         QualType argType = arg.getAsType();
         SourceLocation argSrcLoc = argLoc.getLocation();
         if (IsScalarType(argType) || IsVectorType(m_sema, argType) ||
@@ -5099,6 +5095,12 @@ class HLSLExternalSource : public ExternalSemaSource {
               << argType;
           return true;
         }
+        if (HasLongVecs(argType)) {
+          m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector)
+              << "cbuffers";
+          return true;
+        }
+
         if (auto *TST = dyn_cast<TemplateSpecializationType>(argType)) {
           // This is a bit of a special case we need to handle. Because the
           // buffer types don't use their template parameter in a way that would
@@ -5182,8 +5184,20 @@ class HLSLExternalSource : public ExternalSemaSource {
         return true;
       }
       return false;
+    } else if (Template->getTemplatedDecl()->hasAttr<HLSLTessPatchAttr>()) {
+      DXASSERT(TemplateArgList.size() == 1,
+               "Tessellation patch has more than one template arg");
+      const TemplateArgumentLoc &argLoc = TemplateArgList[0];
+      const TemplateArgument &arg = argLoc.getArgument();
+      DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, "");
+      QualType argType = arg.getAsType();
+      if (HasLongVecs(argType)) {
+        m_sema->Diag(argLoc.getLocation(),
+                     diag::err_hlsl_unsupported_long_vector)
+            << "tessellation patches";
+        return true;
+      }
     }
-
     bool isMatrix = Template->getCanonicalDecl() ==
                     m_matrixTemplateDecl->getCanonicalDecl();
     bool isVector = Template->getCanonicalDecl() ==
@@ -11423,10 +11437,17 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc,
   HLSLExternalSource *source = HLSLExternalSource::FromSema(self);
   ArTypeObjectKind shapeKind = source->GetTypeObjectKind(ArgTy);
   switch (shapeKind) {
+  case AR_TOBJ_VECTOR:
+    if (GetHLSLVecSize(ArgTy) > 4) {
+      self->Diag(ArgLoc.getLocation(), diag::err_hlsl_unsupported_long_vector)
+          << "node records";
+      Empty = false;
+      return false;
+    }
+    LLVM_FALLTHROUGH;
   case AR_TOBJ_ARRAY:
   case AR_TOBJ_BASIC:
   case AR_TOBJ_MATRIX:
-  case AR_TOBJ_VECTOR:
     Empty = false;
     return false;
   case AR_TOBJ_OBJECT:
@@ -11888,6 +11909,33 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType,
   return false;
 }
 
+bool hlsl::HasLongVecs(const QualType &qt) {
+  if (qt.isNull()) {
+    return false;
+  }
+
+  if (IsHLSLVecType(qt)) {
+    if (GetHLSLVecSize(qt) > 4)
+      return true;
+  } else if (qt->isArrayType()) {
+    const ArrayType *arrayType = qt->getAsArrayTypeUnsafe();
+    return HasLongVecs(arrayType->getElementType());
+  } else if (qt->isStructureOrClassType()) {
+    const RecordType *recordType = qt->getAs<RecordType>();
+    const RecordDecl *recordDecl = recordType->getDecl();
+    if (recordDecl->isInvalidDecl())
+      return false;
+    RecordDecl::field_iterator begin = recordDecl->field_begin();
+    RecordDecl::field_iterator end = recordDecl->field_end();
+    for (; begin != end; begin++) {
+      const FieldDecl *fieldDecl = *begin;
+      if (HasLongVecs(fieldDecl->getType()))
+        return true;
+    }
+  }
+  return false;
+}
+
 bool hlsl::IsConversionToLessOrEqualElements(
     clang::Sema *self, const clang::ExprResult &sourceExpr,
     const clang::QualType &targetType, bool explicitConversion) {
@@ -14211,6 +14259,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
                        *pDispatchGrid = nullptr, *pMaxDispatchGrid = nullptr;
   bool usageIn = false;
   bool usageOut = false;
+  bool isGroupShared = false;
 
   for (clang::AttributeList *pAttr = D.getDeclSpec().getAttributes().getList();
        pAttr != NULL; pAttr = pAttr->getNext()) {
@@ -14234,6 +14283,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
       }
       break;
     case AttributeList::AT_HLSLGroupShared:
+      isGroupShared = true;
       if (!isGlobal) {
         Diag(pAttr->getLoc(), diag::err_hlsl_varmodifierna)
             << pAttr->getName() << declarationType << pAttr->getRange();
@@ -14514,6 +14564,12 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
     result = false;
   }
 
+  // Disallow long vecs from cbuffers.
+  if (isGlobal && !isStatic && !isGroupShared && HasLongVecs(qt)) {
+    Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) << "cbuffers";
+    result = false;
+  }
+
   // SPIRV change starts
 #ifdef ENABLE_SPIRV_CODEGEN
   // Validate that Vulkan specific feature is only used when targeting SPIR-V
@@ -15402,6 +15458,16 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) {
   return false;
 }
 
+// Verify that user-defined intrinsic struct args contain no long vectors
+static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) {
+  if (HasLongVecs(Arg->getType())) {
+    S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector)
+        << "user-defined struct parameter";
+    return true;
+  }
+  return false;
+}
+
 static bool CheckIntrinsicGetAttributeAtVertex(Sema *S, FunctionDecl *FDecl,
                                                CallExpr *TheCall) {
   assert(TheCall->getNumArgs() > 0);
@@ -15419,6 +15485,12 @@ static bool CheckIntrinsicGetAttributeAtVertex(Sema *S, FunctionDecl *FDecl,
 bool Sema::CheckHLSLIntrinsicCall(FunctionDecl *FDecl, CallExpr *TheCall) {
   auto attr = FDecl->getAttr<HLSLIntrinsicAttr>();
 
+  if (!attr)
+    return false;
+
+  if (!IsBuiltinTable(attr->getGroup()))
+    return false;
+
   switch (hlsl::IntrinsicOp(attr->getOpcode())) {
   case hlsl::IntrinsicOp::IOP_GetAttributeAtVertex:
     // See #hlsl-specs/issues/181. Feature is broken. For SPIR-V we want
@@ -15430,6 +15502,22 @@ bool Sema::CheckHLSLIntrinsicCall(FunctionDecl *FDecl, CallExpr *TheCall) {
     // existing ones. See the ExtensionTest.EvalAttributeCollision test.
     assert(FDecl->getName() == "GetAttributeAtVertex");
     return CheckIntrinsicGetAttributeAtVertex(this, FDecl, TheCall);
+  case hlsl::IntrinsicOp::IOP_DispatchMesh:
+    assert(TheCall->getNumArgs() > 3);
+    assert(FDecl->getName() == "DispatchMesh");
+    return CheckUDTIntrinsicArg(this, TheCall->getArg(3)->IgnoreCasts());
+  case hlsl::IntrinsicOp::IOP_CallShader:
+    assert(TheCall->getNumArgs() > 1);
+    assert(FDecl->getName() == "CallShader");
+    return CheckUDTIntrinsicArg(this, TheCall->getArg(1)->IgnoreCasts());
+  case hlsl::IntrinsicOp::IOP_TraceRay:
+    assert(TheCall->getNumArgs() > 7);
+    assert(FDecl->getName() == "TraceRay");
+    return CheckUDTIntrinsicArg(this, TheCall->getArg(7)->IgnoreCasts());
+  case hlsl::IntrinsicOp::IOP_ReportHit:
+    assert(TheCall->getNumArgs() > 2);
+    assert(FDecl->getName() == "ReportHit");
+    return CheckUDTIntrinsicArg(this, TheCall->getArg(2)->IgnoreCasts());
   default:
     break;
   }
@@ -16110,6 +16198,17 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) {
     return;
   }
 
+  // Check general parameter characteristics
+  // Would be nice to check for resources here as they crash the compiler now.
+  for (const auto *param : FD->params())
+    if (HasLongVecs(param->getType()))
+      S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector)
+          << "entry function parameters";
+
+  if (HasLongVecs(FD->getReturnType()))
+    S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector)
+        << "entry function return type";
+
   DXIL::ShaderKind Stage =
       ShaderModel::KindFromFullName(shaderAttr->getStage());
   llvm::StringRef StageName = shaderAttr->getStage();
diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
index cf5d741541..ee5ea567ce 100644
--- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
+++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
@@ -520,6 +520,16 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
               << hullPatchCount.value();
         }
       }
+      for (const auto *param : pPatchFnDecl->params())
+        if (HasLongVecs(param->getType()))
+          self->Diag(param->getLocation(),
+                     diag::err_hlsl_unsupported_long_vector)
+              << "patch constant function parameters";
+
+      if (HasLongVecs(pPatchFnDecl->getReturnType()))
+        self->Diag(pPatchFnDecl->getLocation(),
+                   diag::err_hlsl_unsupported_long_vector)
+            << "patch constant function return type";
     }
 
     DXIL::ShaderKind EntrySK = shaderModel->GetKind();
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls.hlsl
new file mode 100644
index 0000000000..ae52983772
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls.hlsl
@@ -0,0 +1,132 @@
+// RUN: %dxc  -DTYPE=float -DNUM=7 -T ps_6_9 -verify %s
+
+struct [raypayload] LongVec {
+  float4 f : write(closesthit) : read(caller);
+  vector<TYPE,NUM> vec : write(closesthit) : read(caller);
+};
+
+struct LongVecParm {
+  float f;
+  float4 tar2 : SV_Target2;
+  vector<TYPE,NUM> vec;
+};
+
+vector<TYPE, NUM> global_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+
+vector<TYPE, NUM> global_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+
+LongVec global_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+
+cbuffer BadBuffy {
+  vector<TYPE, NUM> cb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+  vector<TYPE, NUM> cb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+  LongVec cb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+};
+
+tbuffer BadTuffy {
+  vector<TYPE, NUM> tb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+  vector<TYPE, NUM> tb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+  LongVec tb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+};
+
+ConstantBuffer< LongVec > const_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+TextureBuffer< LongVec > tex_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+
+vector<TYPE, 5> main( // expected-error{{Vectors of over 4 elements in entry function return type are not supported}}
+                     vector<TYPE, NUM> vec : V, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+                     LongVecParm parm : P) : SV_Target { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+  parm.f = vec; // expected-warning {{implicit truncation of vector type}}
+  parm.tar2 = vec; // expected-warning {{implicit truncation of vector type}}
+  return vec; // expected-warning {{implicit truncation of vector type}}
+}
+
+[shader("domain")]
+[domain("tri")]
+void ds_main(OutputPatch<LongVec, 3> TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
+
+void PatchConstantFunction(InputPatch<LongVec, 3> inpatch, // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
+			   OutputPatch<LongVec, 3> outpatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
+
+
+[shader("hull")]
+[domain("tri")]
+[outputtopology("triangle_cw")]
+[outputcontrolpoints(32)]
+[patchconstantfunc("PatchConstantFunction")]
+void hs_main(InputPatch<LongVec, 3> TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
+
+RaytracingAccelerationStructure RTAS;
+
+[shader("raygeneration")]
+void raygen() {
+  LongVec p = (LongVec)0;
+  RayDesc ray = (RayDesc)0;
+  TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
+  CallShader(0, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
+}
+
+[shader("closesthit")]
+void closesthit(inout LongVec payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+		in LongVec attribs ) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+  RayDesc ray;
+  TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
+  CallShader(0, payload); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
+}
+
+[shader("anyhit")]
+void AnyHit( inout LongVec payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+	      in LongVec attribs  ) // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+{
+}
+
+[shader("miss")]
+void Miss(inout LongVec payload){ // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+  RayDesc ray;
+  TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
+  CallShader(0, payload); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
+}
+
+[shader("intersection")]
+void Intersection() {
+  float hitT = RayTCurrent();
+  LongVec attr = (LongVec)0;
+  bool bReported = ReportHit(hitT, 0, attr); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
+}
+
+[shader("callable")]
+void callable1(inout LongVec p) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+  CallShader(0, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
+}
+
+groupshared LongVec as_pld;
+
+[shader("amplification")]
+[numthreads(1,1,1)]
+void Amp() {
+  DispatchMesh(1,1,1,as_pld); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
+}
+
+struct LongVecRec {
+  uint3 grid : SV_DispatchGrid;
+  vector<TYPE,NUM> vec;
+};
+
+[Shader("node")]
+[NodeLaunch("broadcasting")]
+[NumThreads(8,1,1)]
+[NodeMaxDispatchGrid(8,1,1)]
+void broadcast(DispatchNodeInputRecord<LongVecRec> input,  // expected-error{{Vectors of over 4 elements in node records are not supported}}
+                NodeOutput<LongVec> output) // expected-error{{Vectors of over 4 elements in node records are not supported}}
+{
+  ThreadNodeOutputRecords<LongVec> touts; // expected-error{{Vectors of over 4 elements in node records are not supported}}
+  GroupNodeOutputRecords<LongVec> gouts; // expected-error{{Vectors of over 4 elements in node records are not supported}}
+}
+
+[Shader("node")]
+[NodeLaunch("coalescing")]
+[NumThreads(8,1,1)]
+void coalesce(GroupNodeInputRecords<LongVec> input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}}
+
+[Shader("node")]
+[NodeLaunch("thread")]
+void threader(ThreadNodeInputRecord<LongVec> input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}}
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl
new file mode 100644
index 0000000000..8aac527c1f
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl
@@ -0,0 +1,108 @@
+// RUN: %dxc  -DTYPE=float -DNUM=7 -T ps_6_8 -verify %s
+
+// CHECK: %struct.LongVec = type { <4 x float>, <7 x [[STY:[a-z0-9]*]]> }
+struct LongVec {
+  float4 f;
+  vector<TYPE,NUM> vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+};
+
+static vector<TYPE, NUM> static_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+static vector<TYPE, NUM> static_vec_arr[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+
+groupshared vector<TYPE, NUM> gs_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+groupshared vector<TYPE, NUM> gs_vec_arr[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+
+export vector<TYPE, NUM> lv_param_passthru(vector<TYPE, NUM> vec1) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, NUM> ret = vec1; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  return ret;
+}
+
+export void lv_param_in_out(in vector<TYPE, NUM> vec1, out vector<TYPE, NUM> vec2) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  vec2 = vec1;
+}
+
+export void lv_param_inout(inout vector<TYPE, NUM> vec1, inout vector<TYPE, NUM> vec2) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, NUM> tmp = vec1; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  vec1 = vec2;
+  vec2 = tmp;
+}
+
+export void lv_global_assign(vector<TYPE, NUM> vec) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  static_vec = vec;
+}
+
+export vector<TYPE, NUM> lv_global_ret() { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, NUM> ret = static_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  return ret;
+}
+
+export void lv_gs_assign(vector<TYPE, NUM> vec) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  gs_vec = vec;
+}
+
+export vector<TYPE, NUM> lv_gs_ret() { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, NUM> ret = gs_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  return ret;
+}
+
+export vector<TYPE, NUM> lv_param_arr_passthru(vector<TYPE, NUM> vec)[10] { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, NUM> ret[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  for (int i = 0; i < 10; i++)
+    ret[i] = vec;
+  return ret;
+}
+
+export void lv_global_arr_assign(vector<TYPE, NUM> vec[10]) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  for (int i = 0; i < 10; i++)
+    static_vec_arr[i] = vec[i];
+}
+
+export vector<TYPE, NUM> lv_global_arr_ret()[10] { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, NUM> ret[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  for (int i = 0; i < 10; i++)
+    ret[i] = static_vec_arr[i];
+  return ret;
+}
+
+export void lv_gs_arr_assign(vector<TYPE, NUM> vec[10]) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  for (int i = 0; i < 10; i++)
+    gs_vec_arr[i] = vec[i];
+}
+
+export vector<TYPE, NUM> lv_gs_arr_ret()[10] { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, NUM> ret[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  for (int i = 0; i < 10; i++)
+    ret[i] = gs_vec_arr[i];
+  return ret;
+}
+
+export vector<TYPE,NUM> lv_splat(TYPE scalar) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE,NUM> ret = scalar; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  return ret;
+}
+
+export vector<TYPE, 6> lv_initlist() { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, 6> ret = {1, 2, 3, 4, 5, 6}; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  return ret;
+}
+
+export vector<TYPE, 6> lv_initlist_vec(vector<TYPE, 3> vec) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, 6> ret = {vec, 4.0, 5.0, 6.0}; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  return ret;
+}
+
+export vector<TYPE, 6> lv_vec_vec(vector<TYPE, 3> vec1, vector<TYPE, 3> vec2) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, 6> ret = {vec1, vec2}; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  return ret;
+}
+
+export vector<TYPE, NUM> lv_array_cast(TYPE arr[NUM]) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, NUM> ret = (vector<TYPE,NUM>)arr; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  return ret;
+}
+
+export vector<TYPE, 6> lv_ctor(TYPE s) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, 6> ret = vector<TYPE,6>(1.0, 2.0, 3.0, 4.0, 5.0, s); // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
+  return ret;
+}
+
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_hs.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_hs.hlsl
new file mode 100644
index 0000000000..185233ad0f
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_hs.hlsl
@@ -0,0 +1,24 @@
+// RUN: %dxc -DTYPE=float -DNUM=7 -T hs_6_9 -verify %s
+
+struct HsConstantData {
+  float Edges[3] : SV_TessFactor;
+  vector <float, 7> vec;
+};
+
+struct LongVec {
+  float4 f;
+  vector<TYPE,NUM> vec;
+};
+
+HsConstantData PatchConstantFunction( // expected-error{{Vectors of over 4 elements in patch constant function return type are not supported}}
+				      vector<TYPE,NUM> vec : V, // expected-error{{Vectors of over 4 elements in patch constant function parameters are not supported}}
+				      LongVec lv : L) { // expected-error{{Vectors of over 4 elements in patch constant function parameters are not supported}}
+  return (HsConstantData)0;
+}
+
+[domain("tri")]
+[outputtopology("triangle_cw")]
+[outputcontrolpoints(32)]
+[patchconstantfunc("PatchConstantFunction")]
+void main() {
+}

From cd72abec4341d7de07fbd7f7807f145b0960134a Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Tue, 18 Feb 2025 11:17:17 -0700
Subject: [PATCH 03/88] fix assert for tesselation patch template args

This got lost somewhere
---
 tools/clang/lib/Sema/SemaHLSL.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index c5a30e00fa..aea960f2e8 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -5185,8 +5185,8 @@ class HLSLExternalSource : public ExternalSemaSource {
       }
       return false;
     } else if (Template->getTemplatedDecl()->hasAttr<HLSLTessPatchAttr>()) {
-      DXASSERT(TemplateArgList.size() == 1,
-               "Tessellation patch has more than one template arg");
+      DXASSERT(TemplateArgList.size() > 0,
+               "Tessellation patch should have at least one template args");
       const TemplateArgumentLoc &argLoc = TemplateArgList[0];
       const TemplateArgument &arg = argLoc.getArgument();
       DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, "");

From 1f12a3f08fd896ee005170c8ff7025f3de204950 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Sun, 2 Mar 2025 22:33:24 -0700
Subject: [PATCH 04/88] Refactor builtin type detection with attributes

Expand resource attribute to all resource types by adding reskind and
resclass arguments indicating the specific resource type. Change detection
in HlslTypes to use these attribute arguments. Similarly add vertex
number arguments to output stream attribute and a boolean indicator of
input or output for tessellation patches.

Add geomstream attr to detect those objects

Use attribute to detect tesselation patches
Removes template arg counts and startswith stirngs to identify
tesslations patches and distinguish them from multisampled textures
---
 tools/clang/include/clang/AST/HlslTypes.h |   7 +-
 tools/clang/include/clang/Basic/Attr.td   |   6 +-
 tools/clang/lib/AST/ASTContextHLSL.cpp    |  28 ++--
 tools/clang/lib/AST/HlslTypes.cpp         | 190 +++++-----------------
 tools/clang/lib/Sema/SemaHLSL.cpp         |  68 ++++++--
 5 files changed, 121 insertions(+), 178 deletions(-)

diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h
index 2aa9afa5f9..9aeb97d3ee 100644
--- a/tools/clang/include/clang/AST/HlslTypes.h
+++ b/tools/clang/include/clang/AST/HlslTypes.h
@@ -350,7 +350,8 @@ void AddHLSLNodeOutputRecordTemplate(
 
 clang::CXXRecordDecl *DeclareRecordTypeWithHandle(clang::ASTContext &context,
                                                   llvm::StringRef name,
-                                                  bool isCompleteType = true);
+                                                  bool isCompleteType = true,
+                                                  clang::InheritableAttr *Attr = nullptr);
 
 void AddRaytracingConstants(clang::ASTContext &context);
 void AddSamplerFeedbackConstants(clang::ASTContext &context);
@@ -382,11 +383,11 @@ clang::CXXRecordDecl *DeclareTemplateTypeWithHandleInDeclContext(
 clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandle(
     clang::ASTContext &context, llvm::StringRef typeName,
     llvm::StringRef templateParamName,
-    clang::TagTypeKind tagKind = clang::TagTypeKind::TTK_Class);
+    clang::InheritableAttr *Attr = nullptr);
 clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandleInDeclContext(
     clang::ASTContext &context, clang::DeclContext *declContext,
     llvm::StringRef typeName, llvm::StringRef templateParamName,
-    clang::TagTypeKind tagKind = clang::TagTypeKind::TTK_Class);
+    clang::InheritableAttr *Attr = nullptr);
 clang::CXXRecordDecl *DeclareConstantBufferViewType(clang::ASTContext &context,
                                                     bool bTBuf);
 clang::CXXRecordDecl *DeclareRayQueryType(clang::ASTContext &context);
diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td
index 7304bba06e..e344e7b851 100644
--- a/tools/clang/include/clang/Basic/Attr.td
+++ b/tools/clang/include/clang/Basic/Attr.td
@@ -993,14 +993,16 @@ def HLSLNodeTrackRWInputSharing : InheritableAttr {
 }
 
 
-def HLSLCBuffer : InheritableAttr {
+def HLSLTessPatch : InheritableAttr {
   let Spellings = []; // No spellings!
+  let Args = [BoolArgument<"IsInput">];
   let Subjects = SubjectList<[CXXRecord]>;
   let Documentation = [Undocumented];
 }
 
-def HLSLTessPatch : InheritableAttr {
+def HLSLStreamOutput : InheritableAttr {
   let Spellings = []; // No spellings!
+  let Args = [UnsignedArgument<"Vertices">];
   let Subjects = SubjectList<[CXXRecord]>;
   let Documentation = [Undocumented];
 }
diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp
index 978c97aeb5..e71f37b663 100644
--- a/tools/clang/lib/AST/ASTContextHLSL.cpp
+++ b/tools/clang/lib/AST/ASTContextHLSL.cpp
@@ -525,11 +525,15 @@ hlsl::DeclareRecordTypeWithHandleAndNoMemberFunctions(ASTContext &context,
 /// </summary>
 CXXRecordDecl *
 hlsl::DeclareRecordTypeWithHandle(ASTContext &context, StringRef name,
-                                  bool isCompleteType /*= true */) {
+                                  bool isCompleteType /*= true */,
+                                  InheritableAttr *Attr) {
   BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(), name,
                                          TagDecl::TagKind::TTK_Struct);
   typeDeclBuilder.startDefinition();
   typeDeclBuilder.addField("h", GetHLSLObjectHandleType(context));
+  if (Attr)
+    typeDeclBuilder.getRecordDecl()->addAttr(Attr);
+
   if (isCompleteType)
     return typeDeclBuilder.completeDefinition();
   return typeDeclBuilder.getRecordDecl();
@@ -939,11 +943,9 @@ CXXRecordDecl *hlsl::DeclareTemplateTypeWithHandleInDeclContext(
   QualType elementType = context.getTemplateTypeParmType(
       /*templateDepth*/ 0, 0, ParameterPackFalse, elementTemplateParamDecl);
 
-  if (templateArgCount > 1 &&
-      // Only need array type for inputpatch and outputpatch.
-      // Avoid Texture2DMS which may use 0 count.
-      // TODO: use hlsl types to do the check.
-      !name.startswith("Texture") && !name.startswith("RWTexture")) {
+  // Only need array type for inputpatch and outputpatch.
+  if (Attr && isa<HLSLTessPatchAttr>(Attr)) {
+    DXASSERT(templateArgCount == 2, "Tess patches need 2 template params");
     Expr *countExpr = DeclRefExpr::Create(
         context, NestedNameSpecifierLoc(), NoLoc, countTemplateParamDecl, false,
         DeclarationNameInfo(countTemplateParamDecl->getDeclName(), NoLoc),
@@ -1099,22 +1101,25 @@ CXXMethodDecl *hlsl::CreateObjectFunctionDeclarationWithParams(
 
 CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandle(
     ASTContext &context, StringRef typeName, StringRef templateParamName,
-    TagTypeKind tagKind) {
+    InheritableAttr *Attr) {
   return DeclareUIntTemplatedTypeWithHandleInDeclContext(
       context, context.getTranslationUnitDecl(), typeName, templateParamName,
-      tagKind);
+      Attr);
 }
 
 CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandleInDeclContext(
     ASTContext &context, DeclContext *declContext, StringRef typeName,
-    StringRef templateParamName, TagTypeKind tagKind) {
+    StringRef templateParamName, InheritableAttr *Attr) {
   // template<uint kind> FeedbackTexture2D[Array] { ... }
-  BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName, tagKind);
+  BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName, TagTypeKind::TTK_Class);
   typeDeclBuilder.addIntegerTemplateParam(templateParamName,
                                           context.UnsignedIntTy);
   typeDeclBuilder.startDefinition();
   typeDeclBuilder.addField(
       "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle.
+  if (Attr)
+    typeDeclBuilder.getRecordDecl()->addAttr(Attr);
+
   return typeDeclBuilder.getRecordDecl();
 }
 
@@ -1136,7 +1141,8 @@ hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, bool bTBuf) {
       "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle.
 
   typeDeclBuilder.getRecordDecl()->addAttr(
-      HLSLCBufferAttr::CreateImplicit(context));
+      HLSLResourceAttr::CreateImplicit(context, (unsigned)DXIL::ResourceKind::CBuffer,
+                                       (unsigned)DXIL::ResourceClass::CBuffer));
 
   typeDeclBuilder.getRecordDecl();
 
diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp
index d83b307463..5f7e93fbee 100644
--- a/tools/clang/lib/AST/HlslTypes.cpp
+++ b/tools/clang/lib/AST/HlslTypes.cpp
@@ -474,160 +474,73 @@ clang::QualType GetHLSLMatElementType(clang::QualType type) {
   QualType elemTy = arg0.getAsType();
   return elemTy;
 }
+
+
+template<typename AttrType>
+static AttrType *getAttr(clang::QualType type) {
+  type = type.getCanonicalType();
+  if (const RecordType *RT = type->getAs<RecordType>()) {
+    if (const auto *Spec =
+            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl()))
+      if (const auto *Template =
+              dyn_cast<ClassTemplateDecl>(Spec->getSpecializedTemplate()))
+        return Template->getTemplatedDecl()->getAttr<AttrType>();
+    if (const auto *Decl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
+      return Decl->getAttr<AttrType>();
+  }
+  return nullptr;
+}
+
 // TODO: Add type cache to ASTContext.
 bool IsHLSLInputPatchType(QualType type) {
   type = type.getCanonicalType();
-  if (const RecordType *RT = dyn_cast<RecordType>(type)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(
-                RT->getAsCXXRecordDecl())) {
-      if (templateDecl->getName() == "InputPatch") {
-        return true;
-      }
-    }
-  }
+  if (const HLSLTessPatchAttr *Attr = getAttr<HLSLTessPatchAttr>(type))
+    return Attr->getIsInput();
   return false;
 }
+
 bool IsHLSLOutputPatchType(QualType type) {
   type = type.getCanonicalType();
-  if (const RecordType *RT = dyn_cast<RecordType>(type)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(
-                RT->getAsCXXRecordDecl())) {
-      if (templateDecl->getName() == "OutputPatch") {
-        return true;
-      }
-    }
-  }
+  if (const HLSLTessPatchAttr *Attr = getAttr<HLSLTessPatchAttr>(type))
+    return !Attr->getIsInput();
   return false;
 }
+
 bool IsHLSLPointStreamType(QualType type) {
   type = type.getCanonicalType();
-  if (const RecordType *RT = dyn_cast<RecordType>(type)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(
-                RT->getAsCXXRecordDecl())) {
-      if (templateDecl->getName() == "PointStream")
-        return true;
-    }
-  }
+  if (const HLSLStreamOutputAttr *Attr = getAttr<HLSLStreamOutputAttr>(type))
+    return Attr->getVertices() == 1;
   return false;
 }
+
 bool IsHLSLLineStreamType(QualType type) {
   type = type.getCanonicalType();
-  if (const RecordType *RT = dyn_cast<RecordType>(type)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(
-                RT->getAsCXXRecordDecl())) {
-      if (templateDecl->getName() == "LineStream")
-        return true;
-    }
-  }
+  if (const HLSLStreamOutputAttr *Attr = getAttr<HLSLStreamOutputAttr>(type))
+    return Attr->getVertices() == 2;
   return false;
 }
+
 bool IsHLSLTriangleStreamType(QualType type) {
   type = type.getCanonicalType();
-  if (const RecordType *RT = dyn_cast<RecordType>(type)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(
-                RT->getAsCXXRecordDecl())) {
-      if (templateDecl->getName() == "TriangleStream")
-        return true;
-    }
-  }
+  if (const HLSLStreamOutputAttr *Attr = getAttr<HLSLStreamOutputAttr>(type))
+    return Attr->getVertices() == 3;
   return false;
 }
+
 bool IsHLSLStreamOutputType(QualType type) {
-  type = type.getCanonicalType();
-  if (const RecordType *RT = dyn_cast<RecordType>(type)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(
-                RT->getAsCXXRecordDecl())) {
-      if (templateDecl->getName() == "PointStream")
-        return true;
-      if (templateDecl->getName() == "LineStream")
-        return true;
-      if (templateDecl->getName() == "TriangleStream")
-        return true;
-    }
-  }
+  if (getAttr<HLSLStreamOutputAttr>(type))
+    return true;
   return false;
 }
-bool IsHLSLResourceType(clang::QualType type) {
-  if (const RecordType *RT = type->getAs<RecordType>()) {
-    StringRef name = RT->getDecl()->getName();
-    if (name == "Texture1D" || name == "RWTexture1D")
-      return true;
-    if (name == "Texture2D" || name == "RWTexture2D")
-      return true;
-    if (name == "Texture2DMS" || name == "RWTexture2DMS")
-      return true;
-    if (name == "Texture3D" || name == "RWTexture3D")
-      return true;
-    if (name == "TextureCube" || name == "RWTextureCube")
-      return true;
-
-    if (name == "Texture1DArray" || name == "RWTexture1DArray")
-      return true;
-    if (name == "Texture2DArray" || name == "RWTexture2DArray")
-      return true;
-    if (name == "Texture2DMSArray" || name == "RWTexture2DMSArray")
-      return true;
-    if (name == "TextureCubeArray" || name == "RWTextureCubeArray")
-      return true;
-
-    if (name == "FeedbackTexture2D" || name == "FeedbackTexture2DArray")
-      return true;
-
-    if (name == "RasterizerOrderedTexture1D" ||
-        name == "RasterizerOrderedTexture2D" ||
-        name == "RasterizerOrderedTexture3D" ||
-        name == "RasterizerOrderedTexture1DArray" ||
-        name == "RasterizerOrderedTexture2DArray" ||
-        name == "RasterizerOrderedBuffer" ||
-        name == "RasterizerOrderedByteAddressBuffer" ||
-        name == "RasterizerOrderedStructuredBuffer")
-      return true;
-
-    if (name == "ByteAddressBuffer" || name == "RWByteAddressBuffer")
-      return true;
-
-    if (name == "StructuredBuffer" || name == "RWStructuredBuffer")
-      return true;
-
-    if (name == "AppendStructuredBuffer" || name == "ConsumeStructuredBuffer")
-      return true;
-
-    if (name == "Buffer" || name == "RWBuffer")
-      return true;
-
-    if (name == "SamplerState" || name == "SamplerComparisonState")
-      return true;
 
-    if (name == "ConstantBuffer" || name == "TextureBuffer")
-      return true;
-
-    if (name == "RaytracingAccelerationStructure")
-      return true;
-  }
+bool IsHLSLResourceType(clang::QualType type) {
+  if (getAttr<HLSLResourceAttr>(type))
+    return true;
   return false;
 }
 
-static HLSLNodeObjectAttr *getNodeAttr(clang::QualType type) {
-  if (const RecordType *RT = type->getAs<RecordType>()) {
-    if (const auto *Spec =
-            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl()))
-      if (const auto *Template =
-              dyn_cast<ClassTemplateDecl>(Spec->getSpecializedTemplate()))
-        return Template->getTemplatedDecl()->getAttr<HLSLNodeObjectAttr>();
-    if (const auto *Decl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
-      return Decl->getAttr<HLSLNodeObjectAttr>();
-  }
-  return nullptr;
-}
-
 DXIL::NodeIOKind GetNodeIOType(clang::QualType type) {
-  if (const HLSLNodeObjectAttr *Attr = getNodeAttr(type))
+  if (const HLSLNodeObjectAttr *Attr = getAttr<HLSLNodeObjectAttr>(type))
     return Attr->getNodeIOType();
   return DXIL::NodeIOKind::Invalid;
 }
@@ -654,27 +567,20 @@ bool IsHLSLDynamicSamplerType(clang::QualType type) {
 }
 
 bool IsHLSLNodeType(clang::QualType type) {
-  if (const HLSLNodeObjectAttr *Attr = getNodeAttr(type))
+  if (const HLSLNodeObjectAttr *Attr = getAttr<HLSLNodeObjectAttr>(type))
     return true;
   return false;
 }
 
 bool IsHLSLObjectWithImplicitMemberAccess(clang::QualType type) {
-  if (const RecordType *RT = type->getAs<RecordType>()) {
-    StringRef name = RT->getDecl()->getName();
-    if (name == "ConstantBuffer" || name == "TextureBuffer")
-      return true;
-  }
+  if (const HLSLResourceAttr *Attr = getAttr<HLSLResourceAttr>(type))
+    return Attr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer;
   return false;
 }
 
 bool IsHLSLObjectWithImplicitROMemberAccess(clang::QualType type) {
-  if (const RecordType *RT = type->getAs<RecordType>()) {
-    StringRef name = RT->getDecl()->getName();
-    // Read-only records
-    if (name == "ConstantBuffer" || name == "TextureBuffer")
-      return true;
-  }
+  if (const HLSLResourceAttr *Attr = getAttr<HLSLResourceAttr>(type))
+    return Attr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer;
   return false;
 }
 
@@ -701,14 +607,8 @@ bool IsHLSLNodeOutputType(clang::QualType type) {
 }
 
 bool IsHLSLStructuredBufferType(clang::QualType type) {
-  if (const RecordType *RT = type->getAs<RecordType>()) {
-    StringRef name = RT->getDecl()->getName();
-    if (name == "StructuredBuffer" || name == "RWStructuredBuffer")
-      return true;
-
-    if (name == "AppendStructuredBuffer" || name == "ConsumeStructuredBuffer")
-      return true;
-  }
+  if (const HLSLResourceAttr *Attr = getAttr<HLSLResourceAttr>(type))
+    return Attr->getResKind() == (unsigned)DXIL::ResourceKind::StructuredBuffer;
   return false;
 }
 
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 0665d7441e..57eb388893 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -363,6 +363,8 @@ enum ArBasicKind {
 
 #define IS_BPROP_STREAM(_Props) (((_Props)&BPROP_STREAM) != 0)
 
+#define IS_BPROP_PATCH(_Props) (((_Props) & BPROP_PATCH) != 0)
+
 #define IS_BPROP_SAMPLER(_Props) (((_Props)&BPROP_SAMPLER) != 0)
 
 #define IS_BPROP_TEXTURE(_Props) (((_Props)&BPROP_TEXTURE) != 0)
@@ -616,6 +618,8 @@ C_ASSERT(ARRAYSIZE(g_uBasicKindProps) == AR_BASIC_MAXIMUM_COUNT);
 
 #define IS_BASIC_STREAM(_Kind) IS_BPROP_STREAM(GetBasicKindProps(_Kind))
 
+#define IS_BASIC_PATCH(_Kind) IS_BPROP_PATCH(GetBasicKindProps(_Kind))
+
 #define IS_BASIC_SAMPLER(_Kind) IS_BPROP_SAMPLER(GetBasicKindProps(_Kind))
 #define IS_BASIC_TEXTURE(_Kind) IS_BPROP_TEXTURE(GetBasicKindProps(_Kind))
 #define IS_BASIC_OBJECT(_Kind) IS_BPROP_OBJECT(GetBasicKindProps(_Kind))
@@ -3540,6 +3544,20 @@ class HLSLExternalSource : public ExternalSemaSource {
       if (kind == AR_OBJECT_LEGACY_EFFECT)
         effectKindIndex = i;
 
+      InheritableAttr *Attr = nullptr;
+      if (IS_BASIC_STREAM(kind))
+        Attr =
+          HLSLStreamOutputAttr::CreateImplicit(*m_context,
+                                               kind - AR_OBJECT_POINTSTREAM + 1);
+      else if (IS_BASIC_PATCH(kind))
+        Attr = HLSLTessPatchAttr::CreateImplicit(*m_context, kind == AR_OBJECT_INPUTPATCH);
+      else {
+        DXIL::ResourceKind ResKind = DXIL::ResourceKind::NumEntries;
+        DXIL::ResourceClass ResClass = DXIL::ResourceClass::Invalid;
+        if (GetBasicKindResourceKindAndClass(kind, ResKind, ResClass))
+          Attr = HLSLResourceAttr::CreateImplicit(*m_context, (unsigned)ResKind,
+                                                  (unsigned)ResClass);
+      }
       DXASSERT(kind < _countof(g_ArBasicTypeNames),
                "g_ArBasicTypeNames has the wrong number of entries");
       assert(kind < _countof(g_ArBasicTypeNames));
@@ -3609,10 +3627,10 @@ class HLSLExternalSource : public ExternalSemaSource {
         }
       } else if (kind == AR_OBJECT_FEEDBACKTEXTURE2D) {
         recordDecl = DeclareUIntTemplatedTypeWithHandle(
-            *m_context, "FeedbackTexture2D", "kind");
+            *m_context, "FeedbackTexture2D", "kind", Attr);
       } else if (kind == AR_OBJECT_FEEDBACKTEXTURE2D_ARRAY) {
         recordDecl = DeclareUIntTemplatedTypeWithHandle(
-            *m_context, "FeedbackTexture2DArray", "kind");
+            *m_context, "FeedbackTexture2DArray", "kind", Attr);
       } else if (kind == AR_OBJECT_EMPTY_NODE_INPUT) {
         recordDecl = DeclareNodeOrRecordType(
             *m_context, DXIL::NodeIOKind::EmptyInput,
@@ -3729,20 +3747,11 @@ class HLSLExternalSource : public ExternalSemaSource {
 #endif
       else if (templateArgCount == 0) {
         recordDecl = DeclareRecordTypeWithHandle(*m_context, typeName,
-                                                 /*isCompleteType*/ false);
+                                                 /*isCompleteType*/ false,
+                                                 Attr);
       } else {
         DXASSERT(templateArgCount == 1 || templateArgCount == 2,
                  "otherwise a new case has been added");
-
-        InheritableAttr *Attr = nullptr;
-        DXIL::ResourceKind ResKind = DXIL::ResourceKind::NumEntries;
-        DXIL::ResourceClass ResClass = DXIL::ResourceClass::Invalid;
-        if (GetBasicKindResourceKindAndClass(kind, ResKind, ResClass))
-          Attr = HLSLResourceAttr::CreateImplicit(*m_context, (unsigned)ResKind,
-                                                  (unsigned)ResClass);
-        else if (kind == AR_OBJECT_INPUTPATCH || kind == AR_OBJECT_OUTPUTPATCH)
-          Attr = HLSLTessPatchAttr::CreateImplicit(*m_context);
-
         TypeSourceInfo *typeDefault =
             TemplateHasDefaultType(kind) ? float4TypeSourceInfo : nullptr;
         recordDecl = DeclareTemplateTypeWithHandle(
@@ -4755,6 +4764,15 @@ class HLSLExternalSource : public ExternalSemaSource {
       ResKind = DXIL::ResourceKind::FeedbackTexture2DArray;
       ResClass = DXIL::ResourceClass::SRV;
       return true;
+    case AR_OBJECT_SAMPLER:
+    case AR_OBJECT_SAMPLERCOMPARISON:
+      ResKind = DXIL::ResourceKind::Sampler;
+      ResClass = DXIL::ResourceClass::Sampler;
+      return true;
+    case AR_OBJECT_ACCELERATION_STRUCT:
+      ResKind = DXIL::ResourceKind::RTAccelerationStructure;
+      ResClass = DXIL::ResourceClass::SRV;
+      return true;
     default:
       return false;
     }
@@ -5217,7 +5235,9 @@ class HLSLExternalSource : public ExternalSemaSource {
       return false;
     }
     // Allow object type for Constant/TextureBuffer.
-    if (Template->getTemplatedDecl()->hasAttr<HLSLCBufferAttr>()) {
+    HLSLResourceAttr *ResAttr =
+      Template->getTemplatedDecl()->getAttr<HLSLResourceAttr>();
+    if (ResAttr && ResAttr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer) {
       if (TemplateArgList.size() == 1) {
         const TemplateArgumentLoc &argLoc = TemplateArgList[0];
         const TemplateArgument &arg = argLoc.getArgument();
@@ -5326,7 +5346,8 @@ class HLSLExternalSource : public ExternalSemaSource {
                "Tessellation patch should have at least one template args");
       const TemplateArgumentLoc &argLoc = TemplateArgList[0];
       const TemplateArgument &arg = argLoc.getArgument();
-      DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, "");
+      DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type,
+               "Tessellation patch requires type template arg 0");
       QualType argType = arg.getAsType();
       if (HasLongVecs(argType)) {
         m_sema->Diag(argLoc.getLocation(),
@@ -5334,7 +5355,22 @@ class HLSLExternalSource : public ExternalSemaSource {
             << "tessellation patches";
         return true;
       }
+    } else if (Template->getTemplatedDecl()->hasAttr<HLSLStreamOutputAttr>()) {
+      DXASSERT(TemplateArgList.size() > 0,
+               "Geometry streams should have at least one template args");
+      const TemplateArgumentLoc &argLoc = TemplateArgList[0];
+      const TemplateArgument &arg = argLoc.getArgument();
+      DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type,
+               "Geometry stream requires type template arg 0");
+      QualType argType = arg.getAsType();
+      if (HasLongVecs(argType)) {
+        m_sema->Diag(argLoc.getLocation(),
+                     diag::err_hlsl_unsupported_long_vector)
+          << "geometry streams";
+        return true;
+      }
     }
+
     bool isMatrix = Template->getCanonicalDecl() ==
                     m_matrixTemplateDecl->getCanonicalDecl();
     bool isVector = Template->getCanonicalDecl() ==
@@ -5354,8 +5390,6 @@ class HLSLExternalSource : public ExternalSemaSource {
             // NOTE: IsValidTemplateArgumentType emits its own diagnostics
             return true;
           }
-          HLSLResourceAttr *ResAttr =
-              Template->getTemplatedDecl()->getAttr<HLSLResourceAttr>();
           if (ResAttr && IsTyped((DXIL::ResourceKind)ResAttr->getResKind())) {
             // Check vectors for being too large.
             if (IsVectorType(m_sema, argType)) {

From de6ac33353314da64d9f56154d7a428fcd2f320e Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Thu, 20 Feb 2025 19:11:31 -0700
Subject: [PATCH 05/88] Respond to feedback

Add setting for max vec size.

Determine long vector presence using DefinitionData bit?
OR
Rename testing for long vecs function?

Add attribute for geometry streams, produce and test errors for long vectors there.

Add and test errors for > 1024 element vectors.

Add vector size to error messages

good test changes
---
 include/dxc/DXIL/DxilConstants.h              |  2 +
 .../clang/Basic/DiagnosticSemaKinds.td        |  8 ++-
 tools/clang/include/clang/Basic/LangOptions.h |  3 +-
 tools/clang/include/clang/Sema/SemaHLSL.h     |  2 +-
 tools/clang/lib/Sema/SemaDXR.cpp              |  5 +-
 tools/clang/lib/Sema/SemaHLSL.cpp             | 65 ++++++++++---------
 tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp   |  7 +-
 .../clang/tools/dxcompiler/dxcompilerobj.cpp  |  7 ++
 8 files changed, 59 insertions(+), 40 deletions(-)

diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
index f8d5b740f7..ac894df1d6 100644
--- a/include/dxc/DXIL/DxilConstants.h
+++ b/include/dxc/DXIL/DxilConstants.h
@@ -147,6 +147,8 @@ const unsigned kMaxMSTotalSigRows = 32;
 const unsigned kMaxMSSMSize = 1024 * 28;
 const unsigned kMinWaveSize = 4;
 const unsigned kMaxWaveSize = 128;
+const unsigned kDefaultMaxVectorLength = 4;
+const unsigned kSM69MaxVectorLength = 1024;
 
 const float kMaxMipLodBias = 15.99f;
 const float kMinMipLodBias = -16.0f;
diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 441509d4c5..4d81b25ccc 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7519,8 +7519,8 @@ def err_hlsl_half_load_store: Error<
   "LoadHalf and StoreHalf are not supported for min precision mode">;
 def err_hlsl_interfaces_cannot_inherit: Error<
   "interfaces cannot inherit from other types">;
-def err_hlsl_invalid_range_1_4: Error<
-  "invalid value, valid range is between 1 and 4 inclusive">;
+def err_hlsl_invalid_range_1_plus: Error<
+  "invalid value, valid range is between 1 and %0 inclusive">;
 def err_hlsl_matrix_member_bad_format: Error<
   "invalid format for matrix subscript '%0'">;
 def err_hlsl_matrix_member_empty: Error<
@@ -7852,7 +7852,9 @@ def err_hlsl_load_from_mesh_out_arrays: Error<
 def err_hlsl_out_indices_array_incorrect_access: Error<
    "a vector in out indices array must be accessed as a whole">;
 def err_hlsl_unsupported_long_vector: Error<
-   "Vectors of over 4 elements in %0 are not supported">;
+   "Vectors of over %0 elements in %1 are not supported">;
+def err_hlsl_vector_too_long: Error<
+   "Vectors of over %0 elements in are not supported">;
 def err_hlsl_logical_binop_scalar : Error<
    "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">;
 def err_hlsl_ternary_scalar : Error<
diff --git a/tools/clang/include/clang/Basic/LangOptions.h b/tools/clang/include/clang/Basic/LangOptions.h
index 8dc15da5d8..433b767c8d 100644
--- a/tools/clang/include/clang/Basic/LangOptions.h
+++ b/tools/clang/include/clang/Basic/LangOptions.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_CLANG_BASIC_LANGOPTIONS_H
 #define LLVM_CLANG_BASIC_LANGOPTIONS_H
 
-#include "dxc/DXIL/DxilConstants.h" // For DXIL::DefaultLinkage
+#include "dxc/DXIL/DxilConstants.h" // For DXIL:: default values.
 #include "dxc/Support/HLSLVersion.h"
 #include "clang/Basic/CommentOptions.h"
 #include "clang/Basic/LLVM.h"
@@ -168,6 +168,7 @@ class LangOptions : public LangOptionsBase {
       hlsl::DXIL::DefaultLinkage::Default;
   /// Whether use row major as default matrix major.
   bool HLSLDefaultRowMajor = false;
+  unsigned MaxHLSLVectorLength = hlsl::DXIL::kDefaultMaxVectorLength;
   // HLSL Change Ends
 
   bool SPIRV = false;  // SPIRV Change
diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h
index c52131b8a5..786f82933d 100644
--- a/tools/clang/include/clang/Sema/SemaHLSL.h
+++ b/tools/clang/include/clang/Sema/SemaHLSL.h
@@ -128,7 +128,7 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema,
                                           const clang::InitListExpr *InitList,
                                           const clang::QualType EltTy);
 
-bool HasLongVecs(const clang::QualType &qt);
+bool ContainsVectorLongerThan(const clang::QualType &qt, unsigned length);
 
 bool IsConversionToLessOrEqualElements(clang::Sema *self,
                                        const clang::ExprResult &sourceExpr,
diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp
index cb16ced5df..07234554e2 100644
--- a/tools/clang/lib/Sema/SemaDXR.cpp
+++ b/tools/clang/lib/Sema/SemaDXR.cpp
@@ -810,9 +810,10 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload,
     return;
   }
 
-  if (hlsl::HasLongVecs(Payload->getType())) {
+  if (hlsl::ContainsVectorLongerThan(Payload->getType(),
+                                     DXIL::kDefaultMaxVectorLength)) {
     S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector)
-        << "payload parameters";
+        << DXIL::kDefaultMaxVectorLength << "payload parameters";
     return;
   }
 
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 57eb388893..fe3390a89e 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -5207,12 +5207,13 @@ class HLSLExternalSource : public ExternalSemaSource {
                                                 SourceLocation Loc);
 
   bool CheckRangedTemplateArgument(SourceLocation diagLoc,
-                                   llvm::APSInt &sintValue) {
-    const auto *SM =
-        hlsl::ShaderModel::GetByName(m_sema->getLangOpts().HLSLProfile.c_str());
+                                   llvm::APSInt &sintValue, bool IsVector) {
+    unsigned MaxLength = DXIL::kDefaultMaxVectorLength;
+    if (IsVector)
+      MaxLength = m_sema->getLangOpts().MaxHLSLVectorLength;
     if (!sintValue.isStrictlyPositive() ||
-        (sintValue.getLimitedValue() > 4 && !SM->IsSM69Plus())) {
-      m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_4);
+        sintValue.getLimitedValue() > MaxLength) {
+      m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_plus) << MaxLength;
       return true;
     }
 
@@ -5252,9 +5253,9 @@ class HLSLExternalSource : public ExternalSemaSource {
               << argType;
           return true;
         }
-        if (HasLongVecs(argType)) {
+        if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) {
           m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector)
-              << "cbuffers";
+              << DXIL::kDefaultMaxVectorLength << "cbuffers";
           return true;
         }
 
@@ -5349,10 +5350,10 @@ class HLSLExternalSource : public ExternalSemaSource {
       DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type,
                "Tessellation patch requires type template arg 0");
       QualType argType = arg.getAsType();
-      if (HasLongVecs(argType)) {
+      if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) {
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
-            << "tessellation patches";
+          << DXIL::kDefaultMaxVectorLength << "tessellation patches";
         return true;
       }
     } else if (Template->getTemplatedDecl()->hasAttr<HLSLStreamOutputAttr>()) {
@@ -5363,10 +5364,10 @@ class HLSLExternalSource : public ExternalSemaSource {
       DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type,
                "Geometry stream requires type template arg 0");
       QualType argType = arg.getAsType();
-      if (HasLongVecs(argType)) {
+      if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) {
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
-          << "geometry streams";
+          << DXIL::kDefaultMaxVectorLength << "geometry streams";
         return true;
       }
     }
@@ -5419,17 +5420,16 @@ class HLSLExternalSource : public ExternalSemaSource {
           llvm::APSInt constantResult;
           if (expr != nullptr &&
               expr->isIntegerConstantExpr(constantResult, *m_context)) {
-            if (CheckRangedTemplateArgument(argSrcLoc, constantResult)) {
+            if (CheckRangedTemplateArgument(argSrcLoc, constantResult,
+                                            isVector))
               return true;
-            }
           }
         }
       } else if (arg.getKind() == TemplateArgument::ArgKind::Integral) {
         if (isMatrix || isVector) {
           llvm::APSInt Val = arg.getAsIntegral();
-          if (CheckRangedTemplateArgument(argSrcLoc, Val)) {
+          if (CheckRangedTemplateArgument(argSrcLoc, Val, isVector))
             return true;
-          }
         }
       }
     }
@@ -11633,9 +11633,9 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc,
   ArTypeObjectKind shapeKind = source->GetTypeObjectKind(ArgTy);
   switch (shapeKind) {
   case AR_TOBJ_VECTOR:
-    if (GetHLSLVecSize(ArgTy) > 4) {
+    if (GetHLSLVecSize(ArgTy) > DXIL::kDefaultMaxVectorLength) {
       self->Diag(ArgLoc.getLocation(), diag::err_hlsl_unsupported_long_vector)
-          << "node records";
+          << DXIL::kDefaultMaxVectorLength << "node records";
       Empty = false;
       return false;
     }
@@ -12104,17 +12104,16 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType,
   return false;
 }
 
-bool hlsl::HasLongVecs(const QualType &qt) {
-  if (qt.isNull()) {
+bool hlsl::ContainsVectorLongerThan(const QualType &qt, unsigned length) {
+  if (qt.isNull())
     return false;
-  }
 
   if (IsHLSLVecType(qt)) {
-    if (GetHLSLVecSize(qt) > 4)
+    if (GetHLSLVecSize(qt) > length)
       return true;
   } else if (qt->isArrayType()) {
     const ArrayType *arrayType = qt->getAsArrayTypeUnsafe();
-    return HasLongVecs(arrayType->getElementType());
+    return ContainsVectorLongerThan(arrayType->getElementType(), length);
   } else if (qt->isStructureOrClassType()) {
     const RecordType *recordType = qt->getAs<RecordType>();
     const RecordDecl *recordDecl = recordType->getDecl();
@@ -12124,7 +12123,7 @@ bool hlsl::HasLongVecs(const QualType &qt) {
     RecordDecl::field_iterator end = recordDecl->field_end();
     for (; begin != end; begin++) {
       const FieldDecl *fieldDecl = *begin;
-      if (HasLongVecs(fieldDecl->getType()))
+      if (ContainsVectorLongerThan(fieldDecl->getType(), length))
         return true;
     }
   }
@@ -14760,8 +14759,10 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
   }
 
   // Disallow long vecs from cbuffers.
-  if (isGlobal && !isStatic && !isGroupShared && HasLongVecs(qt)) {
-    Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) << "cbuffers";
+  if (isGlobal && !isStatic && !isGroupShared &&
+      ContainsVectorLongerThan(qt, DXIL::kDefaultMaxVectorLength)) {
+    Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector)
+        << DXIL::kDefaultMaxVectorLength << "cbuffers";
     result = false;
   }
 
@@ -15655,9 +15656,9 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) {
 
 // Verify that user-defined intrinsic struct args contain no long vectors
 static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) {
-  if (HasLongVecs(Arg->getType())) {
+  if (ContainsVectorLongerThan(Arg->getType(), DXIL::kDefaultMaxVectorLength)) {
     S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector)
-        << "user-defined struct parameter";
+        << DXIL::kDefaultMaxVectorLength << "user-defined struct parameter";
     return true;
   }
   return false;
@@ -16396,13 +16397,15 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) {
   // Check general parameter characteristics
   // Would be nice to check for resources here as they crash the compiler now.
   for (const auto *param : FD->params())
-    if (HasLongVecs(param->getType()))
+    if (ContainsVectorLongerThan(param->getType(),
+                                 DXIL::kDefaultMaxVectorLength))
       S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector)
-          << "entry function parameters";
+          << DXIL::kDefaultMaxVectorLength << "entry function parameters";
 
-  if (HasLongVecs(FD->getReturnType()))
+  if (ContainsVectorLongerThan(FD->getReturnType(),
+                               DXIL::kDefaultMaxVectorLength))
     S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector)
-        << "entry function return type";
+        << DXIL::kDefaultMaxVectorLength << "entry function return type";
 
   DXIL::ShaderKind Stage =
       ShaderModel::KindFromFullName(shaderAttr->getStage());
diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
index ee5ea567ce..adb2352a56 100644
--- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
+++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
@@ -521,14 +521,17 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
         }
       }
       for (const auto *param : pPatchFnDecl->params())
-        if (HasLongVecs(param->getType()))
+        if (ContainsVectorLongerThan(param->getType(),
+                                     DXIL::kDefaultMaxVectorLength))
           self->Diag(param->getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
+              << DXIL::kDefaultMaxVectorLength
               << "patch constant function parameters";
 
-      if (HasLongVecs(pPatchFnDecl->getReturnType()))
+      if (ContainsVectorLongerThan(pPatchFnDecl->getReturnType(), 4))
         self->Diag(pPatchFnDecl->getLocation(),
                    diag::err_hlsl_unsupported_long_vector)
+            << DXIL::kDefaultMaxVectorLength
             << "patch constant function return type";
     }
 
diff --git a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
index c1c844d4be..11effb645b 100644
--- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
+++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
@@ -1440,6 +1440,13 @@ class DxcCompiler : public IDxcCompiler3,
         Opts.EnablePayloadQualifiers;
     compiler.getLangOpts().HLSLProfile = compiler.getCodeGenOpts().HLSLProfile =
         Opts.TargetProfile;
+    const ShaderModel *SM = hlsl::ShaderModel::GetByName(
+        compiler.getLangOpts().HLSLProfile.c_str());
+    if (SM->IsSM69Plus())
+      compiler.getLangOpts().MaxHLSLVectorLength = DXIL::kSM69MaxVectorLength;
+    else
+      compiler.getLangOpts().MaxHLSLVectorLength =
+          DXIL::kDefaultMaxVectorLength;
 
     // Enable dumping implicit top level decls either if it was specifically
     // requested or if we are not dumping the ast from the command line. That

From 76dde0d016eb183d36d97adb14f6058b4118921d Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Sun, 2 Mar 2025 23:49:18 -0700
Subject: [PATCH 06/88] Reaname and consolidate longvecs tests

Go for consistent test filename formatting. most LLVM tests have dashes,
so dashes it is. Remove redundant sm68 test
---
 ...{longvec_decls.hlsl => longvec-decls.hlsl} |   0
 ..._hs.hlsl => invalid-longvec-decls-hs.hlsl} |   0
 ..._decls.hlsl => invalid-longvec-decls.hlsl} |   0
 ...s_sm68.hlsl => invalid-longvecs-sm68.hlsl} |   0
 .../hlsl/types/invalid_longvec_decls_68.hlsl  | 108 ------------------
 5 files changed, 108 deletions(-)
 rename tools/clang/test/CodeGenDXIL/hlsl/types/{longvec_decls.hlsl => longvec-decls.hlsl} (100%)
 rename tools/clang/test/SemaHLSL/hlsl/types/{invalid_longvec_decls_hs.hlsl => invalid-longvec-decls-hs.hlsl} (100%)
 rename tools/clang/test/SemaHLSL/hlsl/types/{invalid_longvec_decls.hlsl => invalid-longvec-decls.hlsl} (100%)
 rename tools/clang/test/SemaHLSL/hlsl/types/{invalid_longvecs_sm68.hlsl => invalid-longvecs-sm68.hlsl} (100%)
 delete mode 100644 tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl

diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec_decls.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl
similarity index 100%
rename from tools/clang/test/CodeGenDXIL/hlsl/types/longvec_decls.hlsl
rename to tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_hs.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl
similarity index 100%
rename from tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_hs.hlsl
rename to tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
similarity index 100%
rename from tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls.hlsl
rename to tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvecs_sm68.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl
similarity index 100%
rename from tools/clang/test/SemaHLSL/hlsl/types/invalid_longvecs_sm68.hlsl
rename to tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl
deleted file mode 100644
index 8aac527c1f..0000000000
--- a/tools/clang/test/SemaHLSL/hlsl/types/invalid_longvec_decls_68.hlsl
+++ /dev/null
@@ -1,108 +0,0 @@
-// RUN: %dxc  -DTYPE=float -DNUM=7 -T ps_6_8 -verify %s
-
-// CHECK: %struct.LongVec = type { <4 x float>, <7 x [[STY:[a-z0-9]*]]> }
-struct LongVec {
-  float4 f;
-  vector<TYPE,NUM> vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-};
-
-static vector<TYPE, NUM> static_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-static vector<TYPE, NUM> static_vec_arr[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-
-groupshared vector<TYPE, NUM> gs_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-groupshared vector<TYPE, NUM> gs_vec_arr[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-
-export vector<TYPE, NUM> lv_param_passthru(vector<TYPE, NUM> vec1) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  vector<TYPE, NUM> ret = vec1; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  return ret;
-}
-
-export void lv_param_in_out(in vector<TYPE, NUM> vec1, out vector<TYPE, NUM> vec2) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  vec2 = vec1;
-}
-
-export void lv_param_inout(inout vector<TYPE, NUM> vec1, inout vector<TYPE, NUM> vec2) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  vector<TYPE, NUM> tmp = vec1; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  vec1 = vec2;
-  vec2 = tmp;
-}
-
-export void lv_global_assign(vector<TYPE, NUM> vec) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  static_vec = vec;
-}
-
-export vector<TYPE, NUM> lv_global_ret() { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  vector<TYPE, NUM> ret = static_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  return ret;
-}
-
-export void lv_gs_assign(vector<TYPE, NUM> vec) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  gs_vec = vec;
-}
-
-export vector<TYPE, NUM> lv_gs_ret() { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  vector<TYPE, NUM> ret = gs_vec; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  return ret;
-}
-
-export vector<TYPE, NUM> lv_param_arr_passthru(vector<TYPE, NUM> vec)[10] { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}} expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  vector<TYPE, NUM> ret[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  for (int i = 0; i < 10; i++)
-    ret[i] = vec;
-  return ret;
-}
-
-export void lv_global_arr_assign(vector<TYPE, NUM> vec[10]) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  for (int i = 0; i < 10; i++)
-    static_vec_arr[i] = vec[i];
-}
-
-export vector<TYPE, NUM> lv_global_arr_ret()[10] { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  vector<TYPE, NUM> ret[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  for (int i = 0; i < 10; i++)
-    ret[i] = static_vec_arr[i];
-  return ret;
-}
-
-export void lv_gs_arr_assign(vector<TYPE, NUM> vec[10]) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  for (int i = 0; i < 10; i++)
-    gs_vec_arr[i] = vec[i];
-}
-
-export vector<TYPE, NUM> lv_gs_arr_ret()[10] { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  vector<TYPE, NUM> ret[10]; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  for (int i = 0; i < 10; i++)
-    ret[i] = gs_vec_arr[i];
-  return ret;
-}
-
-export vector<TYPE,NUM> lv_splat(TYPE scalar) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  vector<TYPE,NUM> ret = scalar; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  return ret;
-}
-
-export vector<TYPE, 6> lv_initlist() { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  vector<TYPE, 6> ret = {1, 2, 3, 4, 5, 6}; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  return ret;
-}
-
-export vector<TYPE, 6> lv_initlist_vec(vector<TYPE, 3> vec) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  vector<TYPE, 6> ret = {vec, 4.0, 5.0, 6.0}; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  return ret;
-}
-
-export vector<TYPE, 6> lv_vec_vec(vector<TYPE, 3> vec1, vector<TYPE, 3> vec2) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  vector<TYPE, 6> ret = {vec1, vec2}; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  return ret;
-}
-
-export vector<TYPE, NUM> lv_array_cast(TYPE arr[NUM]) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  vector<TYPE, NUM> ret = (vector<TYPE,NUM>)arr; // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  return ret;
-}
-
-export vector<TYPE, 6> lv_ctor(TYPE s) { // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  vector<TYPE, 6> ret = vector<TYPE,6>(1.0, 2.0, 3.0, 4.0, 5.0, s); // expected-error {{invalid value, valid range is between 1 and 4 inclusive}}
-  return ret;
-}
-

From 765ab1c56eb412afdba517064716b1cccea3ed42 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Sun, 2 Mar 2025 23:03:09 -0700
Subject: [PATCH 07/88] Refactor, clarify, and expand testing

Expand existing tests to different target and contexts. Add thorough
testing for geometry streams and tessellation patches.

Add toolong vector test. Verify that vectors that are over the maximum
for 6.9 fail.

Add subobjects and template classes to tests. These are unfortunately
disabled because the code to make them work causes other tests to fail.
---
 .../CodeGenDXIL/hlsl/types/longvec-decls.hlsl | 305 +++++++++++-------
 .../hlsl/types/invalid-longvec-decls.hlsl     | 164 +++++++---
 .../SemaHLSL/hlsl/types/toolong-vectors.hlsl  | 116 +++++++
 3 files changed, 414 insertions(+), 171 deletions(-)
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl

diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl
index d6672e7678..8bc7b9e73d 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl
@@ -1,64 +1,137 @@
-// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float     -DNUM=7 %s | FileCheck %s
-// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=bool      -DNUM=7 %s | FileCheck %s
-// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=uint64_t  -DNUM=7 %s | FileCheck %s
-// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=double    -DNUM=7 %s | FileCheck %s
-// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float16_t -DNUM=7 -enable-16bit-types %s | FileCheck %s
-// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=int16_t   -DNUM=7 -enable-16bit-types %s | FileCheck %s
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float     -DNUM=5 %s | FileCheck %s -check-prefixes=CHECK,F5
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=bool      -DNUM=7 %s | FileCheck %s -check-prefixes=CHECK,B7
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=uint64_t  -DNUM=9 %s | FileCheck %s -check-prefixes=CHECK,L9
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=double    -DNUM=17 %s | FileCheck %s -check-prefixes=CHECK,D17
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float16_t -DNUM=256 -enable-16bit-types %s | FileCheck %s -check-prefixes=CHECK,H256
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=int16_t   -DNUM=1024 -enable-16bit-types %s | FileCheck %s -check-prefixes=CHECK,S1024
 
 // A test to verify that declarations of longvecs are permitted in all the accepted places.
 // Only tests for acceptance, most codegen is ignored for now.
 
-// CHECK: %struct.LongVec = type { <4 x float>, <7 x [[STY:[a-z0-9]*]]> }
+// CHECK: %struct.LongVec = type { <4 x float>, <[[NUM:[0-9]*]] x [[STY:[a-z0-9]*]]> }
 struct LongVec {
   float4 f;
   vector<TYPE,NUM> vec;
 };
 
+struct LongVecSub : LongVec {
+  int3 is;
+};
+
+template<int N>
+struct LongVecTpl {
+  float4 f;
+  vector<float,N> vec;
+};
 
 // Just some dummies to capture the types and mangles.
 // CHECK: @"\01?dummy@@3[[MNG:F|M|N|_N|_K|\$f16@]]A" = external addrspace(3) global [[STY]]
 groupshared TYPE dummy;
 
-// CHECK-DAG: @"\01?gs_vec@@3V?$vector@[[MNG]]$06@@A" = external addrspace(3) global <7 x [[STY]]>
-// CHECK-DAG: @"\01?gs_vec_arr@@3PAV?$vector@[[MNG]]$06@@A" = external addrspace(3) global [10 x <7 x [[STY]]>]
-// CHECK-DAG: @"\01?gs_vec_rec@@3ULongVec@@A" = external addrspace(3) global %struct.LongVec
+// Use the first groupshared to establish mangles and sizes
+// F5-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:M]]$[[VS:04]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]>
+// B7-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:_N]]$[[VS:06]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]>
+// L9-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:_K]]$[[VS:08]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]>
+// D17-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:N]]$[[VS:0BB@]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]>
+// H256-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:\$f16@]]$[[VS:0BAA@]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]>
+// S1024-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:F]]$[[VS:0EAA@]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]>
 groupshared vector<TYPE, NUM> gs_vec;
+
+// CHECK-DAG: @"\01?gs_vec_arr@@3PAV?$vector@[[MNG]]$[[VS]]@@A" = external addrspace(3) global [10 x <[[NUM]] x [[STY]]>]
 groupshared vector<TYPE, NUM> gs_vec_arr[10];
+// CHECK-DAG: @"\01?gs_vec_rec@@3ULongVec@@A" = external addrspace(3) global %struct.LongVec
 groupshared LongVec gs_vec_rec;
+// CHECK-DAG: @"\01?gs_vec_sub@@3ULongVecSub@@A" = external addrspace(3) global %struct.LongVecSub
+groupshared LongVecSub gs_vec_sub;
+// CHECK-DAG: @"\01?gs_vec_tpl@@3U?$LongVecTpl@$[[VS]]@@A" = external addrspace(3) global %"struct.LongVecTpl<[[NUM]]>"
+groupshared LongVecTpl<NUM> gs_vec_tpl;
 
-// CHECK-DAG: @static_vec = internal global <7 x [[STY]]>
-// CHECK-DAG: @static_vec_arr = internal global [10 x <7 x [[STY]]>] zeroinitializer
-// CHECK-DAG: @static_vec_rec = internal global %struct.LongVec
+// CHECK-DAG: @static_vec = internal global <[[NUM]] x [[STY]]>
 static vector<TYPE, NUM> static_vec;
+// CHECK-DAG: @static_vec_arr = internal global [10 x <[[NUM]] x [[STY]]>] zeroinitializer
 static vector<TYPE, NUM> static_vec_arr[10];
+// CHECK-DAG: @static_vec_rec = internal global %struct.LongVec
 static LongVec static_vec_rec;
+// CHECK-DAG: @static_vec_sub = internal global %struct.LongVecSub
+static LongVecSub static_vec_sub;
+// CHECK-DAG: @static_vec_tpl = internal global %"struct.LongVecTpl<[[NUM]]>"
+static LongVecTpl<NUM> static_vec_tpl;
 
 // CHECK: define [[RTY:[a-z0-9]*]] @"\01?getVal@@YA[[MNG]][[MNG]]@Z"([[RTY]] {{.*}}%t)
 export TYPE getVal(TYPE t) {TYPE ret = dummy; dummy = t; return ret;}
 
-// CHECK: define <7 x [[RTY]]>
+// CHECK: define <[[NUM]] x [[RTY]]>
 // CHECK-LABEL: @"\01?lv_param_passthru
-// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@V1@@Z"(<7 x [[RTY]]> %vec1)
-// CHECK:   ret <7 x [[RTY]]>
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@V1@@Z"(<[[NUM]] x [[RTY]]> %vec1)
+// CHECK:   ret <[[NUM]] x [[RTY]]>
 export vector<TYPE, NUM> lv_param_passthru(vector<TYPE, NUM> vec1) {
-  vector<TYPE, NUM> ret = vec1;
-  return ret;
+  return vec1;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_arr_passthru
+// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$[[VS]]@@Y09V1@@Z"([10 x <[[NUM]] x [[STY]]>]* noalias sret %agg.result, [10 x <[[NUM]] x [[STY]]>]* %vec)
+// CHECK: ret void
+export vector<TYPE, NUM> lv_param_arr_passthru(vector<TYPE, NUM> vec[10])[10] {
+  return vec;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_rec_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec)
+// CHECK: memcpy
+// CHECK:   ret void
+export LongVec lv_param_rec_passthru(LongVec vec) {
+  return vec;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_sub_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec)
+// CHECK: memcpy
+// CHECK:   ret void
+export LongVec lv_param_sub_passthru(LongVec vec) {
+  return vec;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_tpl_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec)
+// CHECK: memcpy
+// CHECK:   ret void
+export LongVec lv_param_tpl_passthru(LongVec vec) {
+  return vec;
 }
 
 // CHECK-LABEL: define void @"\01?lv_param_in_out
-// CHECK-SAME: @@YAXV?$vector@[[MNG]]$06@@AIAV1@@Z"(<7 x [[RTY]]> %vec1, <7 x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2)
-// CHECK:   store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* %vec2, align 4
+// CHECK-SAME: @@YAXV?$vector@[[MNG]]$[[VS]]@@AIAV1@@Z"(<[[NUM]] x [[RTY]]> %vec1, <[[NUM]] x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2)
+// CHECK:   store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* %vec2, align 4
 // CHECK:   ret void
 export void lv_param_in_out(in vector<TYPE, NUM> vec1, out vector<TYPE, NUM> vec2) {
   vec2 = vec1;
 }
 
+// CHECK-LABEL: define void @"\01?lv_param_in_out_rec@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2)
+// CHECK: memcpy
+// CHECK:   ret void
+export void lv_param_in_out_rec(in LongVec vec1, out LongVec vec2) {
+  vec2 = vec1;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_in_out_sub@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2)
+// CHECK: memcpy
+// CHECK:   ret void
+export void lv_param_in_out_sub(in LongVec vec1, out LongVec vec2) {
+  vec2 = vec1;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_in_out_tpl@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2)
+// CHECK: memcpy
+// CHECK:   ret void
+export void lv_param_in_out_tpl(in LongVec vec1, out LongVec vec2) {
+  vec2 = vec1;
+}
+
+
 // CHECK-LABEL: define void @"\01?lv_param_inout
-// CHECK-SAME: @@YAXAIAV?$vector@[[MNG]]$06@@0@Z"(<7 x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec1, <7 x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2)
-// CHECK:   load <7 x [[STY]]>, <7 x [[STY]]>* %vec1, align 4
-// CHECK:   load <7 x [[STY]]>, <7 x [[STY]]>* %vec2, align 4
-// CHECK:   store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* %vec1, align 4
-// CHECK:   store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* %vec2, align 4
+// CHECK-SAME: @@YAXAIAV?$vector@[[MNG]]$[[VS]]@@0@Z"(<[[NUM]] x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec1, <[[NUM]] x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2)
+// CHECK:   load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]>* %vec1, align 4
+// CHECK:   load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]>* %vec2, align 4
+// CHECK:   store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* %vec1, align 4
+// CHECK:   store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* %vec2, align 4
 // CHECK:   ret void
 export void lv_param_inout(inout vector<TYPE, NUM> vec1, inout vector<TYPE, NUM> vec2) {
   vector<TYPE, NUM> tmp = vec1;
@@ -66,152 +139,138 @@ export void lv_param_inout(inout vector<TYPE, NUM> vec1, inout vector<TYPE, NUM>
   vec2 = tmp;
 }
 
-// CHECK-LABEL: define void @"\01?lv_param_in_out_rec@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2)
+// CHECK-LABEL: define void @"\01?lv_param_inout_rec@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2)
 // CHECK: memcpy
 // CHECK:   ret void
-export void lv_param_in_out_rec(in LongVec vec1, out LongVec vec2) {
-  vec2 = vec1;
+export void lv_param_inout_rec(inout LongVec vec1, inout LongVec vec2) {
+  LongVec tmp = vec1;
+  vec1 = vec2;
+  vec2 = tmp;
 }
 
-// CHECK-LABEL: define void @"\01?lv_param_inout_rec@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2)
+// CHECK-LABEL: define void @"\01?lv_param_inout_sub@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2)
 // CHECK: memcpy
 // CHECK:   ret void
-export void lv_param_inout_rec(inout LongVec vec1, inout LongVec vec2) {
+export void lv_param_inout_sub(inout LongVec vec1, inout LongVec vec2) {
   LongVec tmp = vec1;
   vec1 = vec2;
   vec2 = tmp;
 }
 
-// CHECK-LABEL: define void @"\01?lv_global_assign
-// CHECK-SAME: @@YAXV?$vector@[[MNG]]$06@@@Z"(<7 x [[RTY]]> %vec)
-// CHECK:   store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]>* @static_vec
+// CHECK-LABEL: define void @"\01?lv_param_inout_tpl@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2)
+// CHECK: memcpy
 // CHECK:   ret void
-export void lv_global_assign(vector<TYPE, NUM> vec) {
-  static_vec = vec;
+export void lv_param_inout_tpl(inout LongVec vec1, inout LongVec vec2) {
+  LongVec tmp = vec1;
+  vec1 = vec2;
+  vec2 = tmp;
 }
 
-// CHECK: define <7 x [[RTY]]>
-// CHECK-LABEL: @"\01?lv_global_ret
-// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@XZ"()
-// CHECK:   load <7 x [[STY]]>, <7 x [[STY]]>* @static_vec
-// CHECK:   ret <7 x [[RTY]]>
-export vector<TYPE, NUM> lv_global_ret() {
-  vector<TYPE, NUM> ret = static_vec;
-  return ret;
+// CHECK-LABEL: define void @"\01?lv_global_assign
+// CHECK-SAME: @@YAXV?$vector@[[MNG]]$[[VS]]@@Y09V1@ULongVec@@ULongVecSub@@U?$LongVecTpl@$[[VS]]@@@Z"(<[[NUM]] x [[RTY]]> %vec, [10 x <[[NUM]] x [[STY]]>]* %arr, %struct.LongVec* %rec, %struct.LongVecSub* %sub, %"struct.LongVecTpl<[[NUM]]>"* %tpl)
+// CHECK:   store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* @static_vec
+// CHECK:   ret void
+export void lv_global_assign(vector<TYPE, NUM> vec, vector<TYPE, NUM> arr[10],
+                             LongVec rec, LongVecSub sub, LongVecTpl<NUM> tpl) {
+  static_vec = vec;
+  static_vec_arr = arr;
+  static_vec_rec = rec;
+  static_vec_sub = sub;
+  static_vec_tpl = tpl;
 }
 
 // CHECK-LABEL: define void @"\01?lv_gs_assign
-// CHECK-SAME: @@YAXV?$vector@[[MNG]]$06@@@Z"(<7 x [[RTY]]> %vec)
-// CHECK:   store <7 x [[STY]]> {{%.*}}, <7 x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$06@@A"
+// CHECK-SAME: @@YAXV?$vector@[[MNG]]$[[VS]]@@Y09V1@ULongVec@@ULongVecSub@@U?$LongVecTpl@$[[VS]]@@@Z"(<[[NUM]] x [[RTY]]> %vec, [10 x <[[NUM]] x [[STY]]>]* %arr, %struct.LongVec* %rec, %struct.LongVecSub* %sub, %"struct.LongVecTpl<[[NUM]]>"* %tpl)
+// CHECK:   store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$[[VS]]@@A"
 // CHECK:   ret void
-export void lv_gs_assign(vector<TYPE, NUM> vec) {
+export void lv_gs_assign(vector<TYPE, NUM> vec, vector<TYPE, NUM> arr[10],
+                         LongVec rec, LongVecSub sub, LongVecTpl<NUM> tpl) {
   gs_vec = vec;
+  gs_vec_arr = arr;
+  gs_vec_rec = sub;
+  gs_vec_tpl = tpl;
 }
 
-// CHECK: define <7 x [[RTY]]>
-// CHECK-LABEL: @"\01?lv_gs_ret
-// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@XZ"()
-// CHECK:   load <7 x [[STY]]>, <7 x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$06@@A"
-// CHECK:   ret <7 x [[RTY]]>
-export vector<TYPE, NUM> lv_gs_ret() {
-  vector<TYPE, NUM> ret = gs_vec;
-  return ret;
-}
-
-#define DIMS 10
-
-// CHECK-LABEL: define void @"\01?lv_param_arr_passthru
-// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$06@@V1@@Z"([10 x <7 x [[STY]]>]* noalias sret %agg.result, <7 x [[RTY]]> %vec)
-// Arrays are returned in the params
-// CHECK: ret void
-export vector<TYPE, NUM> lv_param_arr_passthru(vector<TYPE, NUM> vec)[10] {
-  vector<TYPE, NUM> ret[10];
-  for (int i = 0; i < DIMS; i++)
-    ret[i] = vec;
-  return ret;
-}
-
-// CHECK-LABEL: define void @"\01?lv_global_arr_assign
-// CHECK-SAME: @@YAXY09V?$vector@[[MNG]]$06@@@Z"([10 x <7 x [[STY]]>]* %vec)
-// CHECK: ret void
-export void lv_global_arr_assign(vector<TYPE, NUM> vec[10]) {
-  for (int i = 0; i < DIMS; i++)
-    static_vec_arr[i] = vec[i];
+// CHECK: define <[[NUM]] x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_global_ret
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@XZ"()
+// CHECK:   load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]>* @static_vec
+// CHECK:   ret <[[NUM]] x [[RTY]]>
+export vector<TYPE, NUM> lv_global_ret() {
+  return static_vec;
 }
 
 // CHECK-LABEL: define void @"\01?lv_global_arr_ret
-// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$06@@XZ"([10 x <7 x [[STY]]>]* noalias sret %agg.result)
-// Arrays are returned in the params
+// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$[[VS]]@@XZ"([10 x <[[NUM]] x [[STY]]>]* noalias sret %agg.result)
 // CHECK: ret void
 export vector<TYPE, NUM> lv_global_arr_ret()[10] {
-  vector<TYPE, NUM> ret[10];
-  for (int i = 0; i < DIMS; i++)
-    ret[i] = static_vec_arr[i];
-  return ret;
-}
-
-// CHECK-LABEL: define void @"\01?lv_gs_arr_assign
-// CHECK-SAME: @@YAXY09V?$vector@[[MNG]]$06@@@Z"([10 x <7 x [[STY]]>]* %vec)
-// ret void
-export void lv_gs_arr_assign(vector<TYPE, NUM> vec[10]) {
-  for (int i = 0; i < DIMS; i++)
-    gs_vec_arr[i] = vec[i];
+  return static_vec_arr;
 }
 
-// CHECK-LABEL: define void @"\01?lv_gs_arr_ret
-// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$06@@XZ"([10 x <7 x [[STY]]>]* noalias sret %agg.result)
-export vector<TYPE, NUM> lv_gs_arr_ret()[10] {
-  vector<TYPE, NUM> ret[10];
-  for (int i = 0; i < DIMS; i++)
-    ret[i] = gs_vec_arr[i];
-  return ret;
+// CHECK-LABEL: define void @"\01?lv_global_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result)
+// CHECK: memcpy
+// CHECK:   ret void
+export LongVec lv_global_rec_ret() {
+  return static_vec_rec;
 }
 
-// CHECK-LABEL: define void @"\01?lv_param_rec_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec)
+// CHECK-LABEL: define void @"\01?lv_global_sub_ret@@YA?AULongVecSub@@XZ"(%struct.LongVecSub* noalias sret %agg.result)
 // CHECK: memcpy
-// Aggregates are returned in the params
 // CHECK:   ret void
-export LongVec lv_param_rec_passthru(LongVec vec) {
-  LongVec ret = vec;
-  return ret;
+export LongVecSub lv_global_sub_ret() {
+  return static_vec_sub;
 }
 
-// CHECK-LABEL: define void @"\01?lv_global_rec_assign@@YAXULongVec@@@Z"(%struct.LongVec* %vec)
+// CHECK-LABEL: define void @"\01?lv_global_tpl_ret
+// CHECK-SAME: @@YA?AU?$LongVecTpl@$[[VS]]@@XZ"(%"struct.LongVecTpl<[[NUM]]>"* noalias sret %agg.result)
 // CHECK: memcpy
 // CHECK:   ret void
-export void lv_global_rec_assign(LongVec vec) {
-  static_vec_rec = vec;
+export LongVecTpl<NUM> lv_global_tpl_ret() {
+  return static_vec_tpl;
 }
 
-// CHECK-LABEL: define void @"\01?lv_global_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result)
+// CHECK: define <[[NUM]] x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_gs_ret
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@XZ"()
+// CHECK:   load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$[[VS]]@@A"
+// CHECK:   ret <[[NUM]] x [[RTY]]>
+export vector<TYPE, NUM> lv_gs_ret() {
+  return gs_vec;
+}
+
+// CHECK-LABEL: define void @"\01?lv_gs_arr_ret
+// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$[[VS]]@@XZ"([10 x <[[NUM]] x [[STY]]>]* noalias sret %agg.result)
+// CHECK: ret void
+export vector<TYPE, NUM> lv_gs_arr_ret()[10] {
+  return gs_vec_arr;
+}
+
+// CHECK-LABEL: define void @"\01?lv_gs_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result)
 // CHECK: memcpy
-// Aggregates are returned in the params
 // CHECK:   ret void
-export LongVec lv_global_rec_ret() {
-  LongVec ret = static_vec_rec;
-  return ret;
+export LongVec lv_gs_rec_ret() {
+  return gs_vec_rec;
 }
 
-// CHECK-LABEL: define void @"\01?lv_gs_rec_assign@@YAXULongVec@@@Z"(%struct.LongVec* %vec)
+// CHECK-LABEL: define void @"\01?lv_gs_sub_ret@@YA?AULongVecSub@@XZ"(%struct.LongVecSub* noalias sret %agg.result)
 // CHECK: memcpy
 // CHECK:   ret void
-export void lv_gs_rec_assign(LongVec vec) {
-  gs_vec_rec = vec;
+export LongVecSub lv_gs_sub_ret() {
+  return gs_vec_sub;
 }
 
-// CHECK-LABEL: define void @"\01?lv_gs_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result)
+// CHECK-LABEL: define void @"\01?lv_gs_tpl_ret
+// CHECK-SAME: @@YA?AU?$LongVecTpl@$[[VS]]@@XZ"(%"struct.LongVecTpl<[[NUM]]>"* noalias sret %agg.result)
 // CHECK: memcpy
-// Aggregates are returned in the params
 // CHECK:   ret void
-export LongVec lv_gs_rec_ret() {
-  LongVec ret = gs_vec_rec;
-  return ret;
+export LongVecTpl<NUM> lv_gs_tpl_ret() {
+  return gs_vec_tpl;
 }
 
-// CHECK: define <7 x [[RTY]]>
+// CHECK: define <[[NUM]] x [[RTY]]>
 // CHECK-LABEL: @"\01?lv_splat
-// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@[[MNG]]@Z"([[RTY]] {{.*}}%scalar)
-// CHECK:   ret <7 x [[RTY]]>
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@[[MNG]]@Z"([[RTY]] {{.*}}%scalar)
+// CHECK:   ret <[[NUM]] x [[RTY]]>
 export vector<TYPE,NUM> lv_splat(TYPE scalar) {
   vector<TYPE,NUM> ret = scalar;
   return ret;
@@ -244,10 +303,10 @@ export vector<TYPE, 6> lv_vec_vec(vector<TYPE, 3> vec1, vector<TYPE, 3> vec2) {
   return ret;
 }
 
-// CHECK: define <7 x [[RTY]]>
+// CHECK: define <[[NUM]] x [[RTY]]>
 // CHECK-LABEL: @"\01?lv_array_cast
-// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$06@@Y06[[MNG]]@Z"([7 x [[STY]]]* %arr)
-// CHECK:   ret <7 x [[RTY]]>
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@Y[[VS]][[MNG]]@Z"({{\[}}[[NUM]] x [[STY]]]* %arr)
+// CHECK:   ret <[[NUM]] x [[RTY]]>
 export vector<TYPE, NUM> lv_array_cast(TYPE arr[NUM]) {
   vector<TYPE, NUM> ret = (vector<TYPE,NUM>)arr;
   return ret;
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
index ae52983772..98bcc14342 100644
--- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
@@ -1,86 +1,142 @@
-// RUN: %dxc  -DTYPE=float -DNUM=7 -T ps_6_9 -verify %s
+// RUN: %dxc -T ps_6_9 -DTYPE=LongVec    -DNUM=5   -verify %s
+// RUiN: %dxc -T ps_6_9 -DTYPE=LongVecSub -DNUM=128 -verify %s
+// RUiN: %dxc -T ps_6_9                   -DNUM=1024 -verify %s
 
-struct [raypayload] LongVec {
-  float4 f : write(closesthit) : read(caller);
-  vector<TYPE,NUM> vec : write(closesthit) : read(caller);
-};
+// Add tests for base types and instantiated template classes with longvecs
+// Size of the vector shouldn't matter, but using a few different ones just in case.
+
+#define PASTE_(x,y) x##y
+#define PASTE(x,y) PASTE_(x,y)
+
+#ifndef TYPE
+#define TYPE LongVecTpl<NUM>
+#endif
 
-struct LongVecParm {
-  float f;
-  float4 tar2 : SV_Target2;
-  vector<TYPE,NUM> vec;
+struct LongVec {
+  float4 f;
+  vector<float,NUM> vec;
 };
 
-vector<TYPE, NUM> global_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+struct LongVecSub : LongVec {
+  int3 is;
+};
 
-vector<TYPE, NUM> global_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+template <int N>
+struct LongVecTpl {
+  float4 f;
+  vector<float,N> vec;
+};
 
-LongVec global_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+vector<float, NUM> global_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+vector<float, NUM> global_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+TYPE global_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+TYPE global_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
 
 cbuffer BadBuffy {
-  vector<TYPE, NUM> cb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
-  vector<TYPE, NUM> cb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
-  LongVec cb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+  vector<float, NUM> cb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+  vector<float, NUM> cb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+  TYPE cb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+  TYPE cb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
 };
 
 tbuffer BadTuffy {
-  vector<TYPE, NUM> tb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
-  vector<TYPE, NUM> tb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
-  LongVec tb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+  vector<float, NUM> tb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+  vector<float, NUM> tb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+  TYPE tb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+  TYPE tb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
 };
 
-ConstantBuffer< LongVec > const_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
-TextureBuffer< LongVec > tex_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+ConstantBuffer< TYPE > const_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+TextureBuffer< TYPE > tex_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+
+[shader("pixel")]
+vector<float, NUM> main( // expected-error{{Vectors of over 4 elements in entry function return type are not supported}}
+                     vector<float, NUM> vec : V) : SV_Target { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+  return vec;
+}
 
-vector<TYPE, 5> main( // expected-error{{Vectors of over 4 elements in entry function return type are not supported}}
-                     vector<TYPE, NUM> vec : V, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
-                     LongVecParm parm : P) : SV_Target { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
-  parm.f = vec; // expected-warning {{implicit truncation of vector type}}
-  parm.tar2 = vec; // expected-warning {{implicit truncation of vector type}}
-  return vec; // expected-warning {{implicit truncation of vector type}}
+[shader("vertex")]
+TYPE vs_main( // expected-error{{Vectors of over 4 elements in entry function return type are not supported}}
+                     TYPE parm : P) : SV_Target { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+  parm.f = 0;
+  return parm;
 }
 
+
+[shader("geometry")]
+[maxvertexcount(3)]
+void gs_point(line TYPE e, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+              inout PointStream<TYPE> OutputStream0) {} // expected-error{{Vectors of over 4 elements in geometry streams are not supported}}
+
+[shader("geometry")]
+[maxvertexcount(12)]
+void gs_line(line TYPE a, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+             inout LineStream<TYPE> OutputStream0) {} // expected-error{{Vectors of over 4 elements in geometry streams are not supported}}
+
+
+[shader("geometry")]
+[maxvertexcount(12)]
+void gs_line(line TYPE a, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+             inout TriangleStream<TYPE> OutputStream0) {} // expected-error{{Vectors of over 4 elements in geometry streams are not supported}}
+
 [shader("domain")]
 [domain("tri")]
-void ds_main(OutputPatch<LongVec, 3> TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
-
-void PatchConstantFunction(InputPatch<LongVec, 3> inpatch, // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
-			   OutputPatch<LongVec, 3> outpatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
+void ds_main(OutputPatch<TYPE, 3> TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
 
+void patch_const(InputPatch<TYPE, 3> inpatch, // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
+			   OutputPatch<TYPE, 3> outpatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
 
 [shader("hull")]
 [domain("tri")]
 [outputtopology("triangle_cw")]
 [outputcontrolpoints(32)]
-[patchconstantfunc("PatchConstantFunction")]
-void hs_main(InputPatch<LongVec, 3> TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
+[patchconstantfunc("patch_const")]
+void hs_main(InputPatch<TYPE, 3> TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
 
 RaytracingAccelerationStructure RTAS;
 
+struct [raypayload] DXRLongVec {
+  float4 f : write(closesthit) : read(caller);
+  vector<float,NUM> vec : write(closesthit) : read(caller);
+};
+
+struct [raypayload] DXRLongVecSub : DXRLongVec {
+  int3 is : write(closesthit) : read(caller);
+};
+
+template<int N>
+struct [raypayload] DXRLongVecTpl {
+  float4 f : write(closesthit) : read(caller);
+  vector<float,N> vec : write(closesthit) : read(caller);
+};
+
+#define RTTYPE PASTE(DXR,TYPE)
+
 [shader("raygeneration")]
 void raygen() {
-  LongVec p = (LongVec)0;
+  RTTYPE p = (RTTYPE)0;
   RayDesc ray = (RayDesc)0;
   TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
   CallShader(0, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
 }
 
+
 [shader("closesthit")]
-void closesthit(inout LongVec payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
-		in LongVec attribs ) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+void closesthit(inout RTTYPE payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+		in RTTYPE attribs ) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
   RayDesc ray;
   TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
   CallShader(0, payload); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
 }
 
 [shader("anyhit")]
-void AnyHit( inout LongVec payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
-	      in LongVec attribs  ) // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+void AnyHit( inout RTTYPE payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+	      in RTTYPE attribs  ) // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
 {
 }
 
 [shader("miss")]
-void Miss(inout LongVec payload){ // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+void Miss(inout RTTYPE payload){ // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
   RayDesc ray;
   TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
   CallShader(0, payload); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
@@ -89,12 +145,12 @@ void Miss(inout LongVec payload){ // expected-error{{Vectors of over 4 elements
 [shader("intersection")]
 void Intersection() {
   float hitT = RayTCurrent();
-  LongVec attr = (LongVec)0;
+  RTTYPE attr = (RTTYPE)0;
   bool bReported = ReportHit(hitT, 0, attr); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
 }
 
 [shader("callable")]
-void callable1(inout LongVec p) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+void callable1(inout RTTYPE p) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
   CallShader(0, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
 }
 
@@ -106,27 +162,39 @@ void Amp() {
   DispatchMesh(1,1,1,as_pld); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
 }
 
-struct LongVecRec {
+struct NodeLongVec {
   uint3 grid : SV_DispatchGrid;
-  vector<TYPE,NUM> vec;
+  vector<float,NUM> vec;
+};
+
+struct NodeLongVecSub : NodeLongVec {
+  int3 is;
 };
 
+template<int N>
+struct NodeLongVecTpl {
+  uint3 grid : SV_DispatchGrid;
+  vector<float,N> vec;
+};
+
+#define NTYPE PASTE(Node,TYPE)
+
 [Shader("node")]
 [NodeLaunch("broadcasting")]
 [NumThreads(8,1,1)]
 [NodeMaxDispatchGrid(8,1,1)]
-void broadcast(DispatchNodeInputRecord<LongVecRec> input,  // expected-error{{Vectors of over 4 elements in node records are not supported}}
-                NodeOutput<LongVec> output) // expected-error{{Vectors of over 4 elements in node records are not supported}}
+void broadcast(DispatchNodeInputRecord<NTYPE> input,  // expected-error{{Vectors of over 4 elements in node records are not supported}}
+                NodeOutput<TYPE> output) // expected-error{{Vectors of over 4 elements in node records are not supported}}
 {
-  ThreadNodeOutputRecords<LongVec> touts; // expected-error{{Vectors of over 4 elements in node records are not supported}}
-  GroupNodeOutputRecords<LongVec> gouts; // expected-error{{Vectors of over 4 elements in node records are not supported}}
+  ThreadNodeOutputRecords<TYPE> touts; // expected-error{{Vectors of over 4 elements in node records are not supported}}
+  GroupNodeOutputRecords<TYPE> gouts; // expected-error{{Vectors of over 4 elements in node records are not supported}}
 }
 
 [Shader("node")]
 [NodeLaunch("coalescing")]
 [NumThreads(8,1,1)]
-void coalesce(GroupNodeInputRecords<LongVec> input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}}
+void coalesce(GroupNodeInputRecords<TYPE> input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}}
 
 [Shader("node")]
 [NodeLaunch("thread")]
-void threader(ThreadNodeInputRecord<LongVec> input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}}
+void threader(ThreadNodeInputRecord<TYPE> input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}}
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl
new file mode 100644
index 0000000000..c1da348695
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl
@@ -0,0 +1,116 @@
+// RUN: %dxc -T lib_6_9 -DTYPE=float -DNUM=1025 -verify %s
+// RUN: %dxc -T ps_6_9  -DTYPE=float -DNUM=1025 -verify %s
+
+// A test to verify that declarations of longvecs are permitted in all the accepted places.
+// Only tests for acceptance, most codegen is ignored for now.
+
+struct LongVec {
+  float4 f;
+  vector<TYPE,NUM> vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+};
+
+template <int N>
+struct LongVecTpl {
+  float4 f;
+  vector<TYPE,N> vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+};
+
+template <int N>
+struct LongVecTpl2 {
+  float4 f;
+  vector<TYPE,N> vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+};
+
+groupshared vector<TYPE, NUM> gs_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+groupshared vector<TYPE, NUM> gs_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+groupshared LongVecTpl<NUM> gs_vec_tpl; // expected-note{{in instantiation of template class 'LongVecTpl<1025>' requested here}}
+
+static vector<TYPE, NUM> static_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+static vector<TYPE, NUM> static_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+static LongVecTpl2<NUM> static_vec_tpl; // expected-note{{in instantiation of template class 'LongVecTpl2<1025>' requested here}}
+
+export vector<TYPE, NUM> // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+lv_param_passthru(vector<TYPE, NUM> vec1) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> ret = vec1; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  return ret;
+}
+
+export void lv_param_in_out(in vector<TYPE, NUM> vec1, // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+                            out vector<TYPE, NUM> vec2) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vec2 = vec1;
+}
+
+export void lv_param_inout(inout vector<TYPE, NUM> vec1, // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+                           inout vector<TYPE, NUM> vec2) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> tmp = vec1; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vec1 = vec2;
+  vec2 = tmp;
+}
+
+export void lv_global_assign(vector<TYPE, NUM> vec) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  static_vec = vec;
+}
+
+export vector<TYPE, NUM> lv_global_ret() { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> ret = static_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  return ret;
+}
+
+export void lv_gs_assign(vector<TYPE, NUM> vec) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  gs_vec = vec;
+}
+
+export vector<TYPE, NUM> lv_gs_ret() { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> ret = gs_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  return ret;
+}
+
+#define DIMS 10
+
+export vector<TYPE, NUM> // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+lv_param_arr_passthru(vector<TYPE, NUM> vec)[10] { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> ret[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  for (int i = 0; i < DIMS; i++)
+    ret[i] = vec;
+  return ret;
+}
+
+export void lv_global_arr_assign(vector<TYPE, NUM> vec[10]) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  for (int i = 0; i < DIMS; i++)
+    static_vec_arr[i] = vec[i];
+}
+
+export vector<TYPE, NUM> lv_global_arr_ret()[10] { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> ret[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  for (int i = 0; i < DIMS; i++)
+    ret[i] = static_vec_arr[i];
+  return ret;
+}
+
+export void lv_gs_arr_assign(vector<TYPE, NUM> vec[10]) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  for (int i = 0; i < DIMS; i++)
+    gs_vec_arr[i] = vec[i];
+}
+
+export vector<TYPE, NUM> lv_gs_arr_ret()[10] { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> ret[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  for (int i = 0; i < DIMS; i++)
+    ret[i] = gs_vec_arr[i];
+  return ret;
+}
+
+export LongVec lv_param_rec_passthru(LongVec vec) {
+  LongVec ret = vec;
+  return ret;
+}
+
+export vector<TYPE,NUM> lv_splat(TYPE scalar) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE,NUM> ret = scalar; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  return ret;
+}
+
+export vector<TYPE, NUM> lv_array_cast(TYPE arr[NUM]) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> ret = (vector<TYPE,NUM>)arr; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  return ret;
+}
+

From fb6538e844a2993de10100c77769cbf0f2e862d2 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 3 Mar 2025 13:00:18 -0700
Subject: [PATCH 08/88] clang-format

---
 tools/clang/include/clang/AST/HlslTypes.h | 11 +++++-----
 tools/clang/lib/AST/ASTContextHLSL.cpp    |  9 ++++----
 tools/clang/lib/AST/HlslTypes.cpp         |  4 +---
 tools/clang/lib/Sema/SemaHLSL.cpp         | 25 ++++++++++++-----------
 4 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h
index 9aeb97d3ee..5cd14cbe8a 100644
--- a/tools/clang/include/clang/AST/HlslTypes.h
+++ b/tools/clang/include/clang/AST/HlslTypes.h
@@ -348,10 +348,10 @@ void AddHLSLNodeOutputRecordTemplate(
     _Outptr_ clang::ClassTemplateDecl **outputRecordTemplateDecl,
     bool isCompleteType = true);
 
-clang::CXXRecordDecl *DeclareRecordTypeWithHandle(clang::ASTContext &context,
-                                                  llvm::StringRef name,
-                                                  bool isCompleteType = true,
-                                                  clang::InheritableAttr *Attr = nullptr);
+clang::CXXRecordDecl *
+DeclareRecordTypeWithHandle(clang::ASTContext &context, llvm::StringRef name,
+                            bool isCompleteType = true,
+                            clang::InheritableAttr *Attr = nullptr);
 
 void AddRaytracingConstants(clang::ASTContext &context);
 void AddSamplerFeedbackConstants(clang::ASTContext &context);
@@ -382,8 +382,7 @@ clang::CXXRecordDecl *DeclareTemplateTypeWithHandleInDeclContext(
 
 clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandle(
     clang::ASTContext &context, llvm::StringRef typeName,
-    llvm::StringRef templateParamName,
-    clang::InheritableAttr *Attr = nullptr);
+    llvm::StringRef templateParamName, clang::InheritableAttr *Attr = nullptr);
 clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandleInDeclContext(
     clang::ASTContext &context, clang::DeclContext *declContext,
     llvm::StringRef typeName, llvm::StringRef templateParamName,
diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp
index e71f37b663..5b10540e7a 100644
--- a/tools/clang/lib/AST/ASTContextHLSL.cpp
+++ b/tools/clang/lib/AST/ASTContextHLSL.cpp
@@ -1111,7 +1111,8 @@ CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandleInDeclContext(
     ASTContext &context, DeclContext *declContext, StringRef typeName,
     StringRef templateParamName, InheritableAttr *Attr) {
   // template<uint kind> FeedbackTexture2D[Array] { ... }
-  BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName, TagTypeKind::TTK_Class);
+  BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName,
+                                         TagTypeKind::TTK_Class);
   typeDeclBuilder.addIntegerTemplateParam(templateParamName,
                                           context.UnsignedIntTy);
   typeDeclBuilder.startDefinition();
@@ -1140,9 +1141,9 @@ hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, bool bTBuf) {
   typeDeclBuilder.addField(
       "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle.
 
-  typeDeclBuilder.getRecordDecl()->addAttr(
-      HLSLResourceAttr::CreateImplicit(context, (unsigned)DXIL::ResourceKind::CBuffer,
-                                       (unsigned)DXIL::ResourceClass::CBuffer));
+  typeDeclBuilder.getRecordDecl()->addAttr(HLSLResourceAttr::CreateImplicit(
+      context, (unsigned)DXIL::ResourceKind::CBuffer,
+      (unsigned)DXIL::ResourceClass::CBuffer));
 
   typeDeclBuilder.getRecordDecl();
 
diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp
index 5f7e93fbee..4dd44c02d7 100644
--- a/tools/clang/lib/AST/HlslTypes.cpp
+++ b/tools/clang/lib/AST/HlslTypes.cpp
@@ -475,9 +475,7 @@ clang::QualType GetHLSLMatElementType(clang::QualType type) {
   return elemTy;
 }
 
-
-template<typename AttrType>
-static AttrType *getAttr(clang::QualType type) {
+template <typename AttrType> static AttrType *getAttr(clang::QualType type) {
   type = type.getCanonicalType();
   if (const RecordType *RT = type->getAs<RecordType>()) {
     if (const auto *Spec =
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index fe3390a89e..ff0624045f 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -363,7 +363,7 @@ enum ArBasicKind {
 
 #define IS_BPROP_STREAM(_Props) (((_Props)&BPROP_STREAM) != 0)
 
-#define IS_BPROP_PATCH(_Props) (((_Props) & BPROP_PATCH) != 0)
+#define IS_BPROP_PATCH(_Props) (((_Props)&BPROP_PATCH) != 0)
 
 #define IS_BPROP_SAMPLER(_Props) (((_Props)&BPROP_SAMPLER) != 0)
 
@@ -3546,11 +3546,11 @@ class HLSLExternalSource : public ExternalSemaSource {
 
       InheritableAttr *Attr = nullptr;
       if (IS_BASIC_STREAM(kind))
-        Attr =
-          HLSLStreamOutputAttr::CreateImplicit(*m_context,
-                                               kind - AR_OBJECT_POINTSTREAM + 1);
+        Attr = HLSLStreamOutputAttr::CreateImplicit(
+            *m_context, kind - AR_OBJECT_POINTSTREAM + 1);
       else if (IS_BASIC_PATCH(kind))
-        Attr = HLSLTessPatchAttr::CreateImplicit(*m_context, kind == AR_OBJECT_INPUTPATCH);
+        Attr = HLSLTessPatchAttr::CreateImplicit(*m_context,
+                                                 kind == AR_OBJECT_INPUTPATCH);
       else {
         DXIL::ResourceKind ResKind = DXIL::ResourceKind::NumEntries;
         DXIL::ResourceClass ResClass = DXIL::ResourceClass::Invalid;
@@ -3746,9 +3746,9 @@ class HLSLExternalSource : public ExternalSemaSource {
       }
 #endif
       else if (templateArgCount == 0) {
-        recordDecl = DeclareRecordTypeWithHandle(*m_context, typeName,
-                                                 /*isCompleteType*/ false,
-                                                 Attr);
+        recordDecl =
+            DeclareRecordTypeWithHandle(*m_context, typeName,
+                                        /*isCompleteType*/ false, Attr);
       } else {
         DXASSERT(templateArgCount == 1 || templateArgCount == 2,
                  "otherwise a new case has been added");
@@ -5237,8 +5237,9 @@ class HLSLExternalSource : public ExternalSemaSource {
     }
     // Allow object type for Constant/TextureBuffer.
     HLSLResourceAttr *ResAttr =
-      Template->getTemplatedDecl()->getAttr<HLSLResourceAttr>();
-    if (ResAttr && ResAttr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer) {
+        Template->getTemplatedDecl()->getAttr<HLSLResourceAttr>();
+    if (ResAttr &&
+        ResAttr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer) {
       if (TemplateArgList.size() == 1) {
         const TemplateArgumentLoc &argLoc = TemplateArgList[0];
         const TemplateArgument &arg = argLoc.getArgument();
@@ -5353,7 +5354,7 @@ class HLSLExternalSource : public ExternalSemaSource {
       if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) {
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
-          << DXIL::kDefaultMaxVectorLength << "tessellation patches";
+            << DXIL::kDefaultMaxVectorLength << "tessellation patches";
         return true;
       }
     } else if (Template->getTemplatedDecl()->hasAttr<HLSLStreamOutputAttr>()) {
@@ -5367,7 +5368,7 @@ class HLSLExternalSource : public ExternalSemaSource {
       if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) {
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
-          << DXIL::kDefaultMaxVectorLength << "geometry streams";
+            << DXIL::kDefaultMaxVectorLength << "geometry streams";
         return true;
       }
     }

From 19633b2b0ec8187cda5a4163c577a9dcec6e29d6 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Tue, 25 Feb 2025 14:43:14 -0700
Subject: [PATCH 09/88] Handle subclasses and templates of longvector structs

Use RequireCompleteType to force specialization of templates encountered
in global and other scopes where finding long vectors is necessary where
possible. This populates the definitiondata which contains the base
class chain needed to detect when a base class has disqualifying long
vectors. It was also needed to detect when dependent types in a template
class result in long vectors.

Work graph node types didn't check their base classes for failures. This
affects base classes with longvectors that have sub classes used for
node objects which should fail for having long vector members.

Respond to feedback about iterating through fields in clunky manner
which got left out of the last reviewer feedback response
---
 tools/clang/include/clang/Sema/SemaHLSL.h     |  3 +-
 tools/clang/lib/Sema/SemaDXR.cpp              |  2 +-
 tools/clang/lib/Sema/SemaHLSL.cpp             | 78 ++++++++++---------
 tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp   |  5 +-
 .../hlsl/types/invalid-longvec-decls.hlsl     |  4 +-
 5 files changed, 50 insertions(+), 42 deletions(-)

diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h
index 786f82933d..d31e32acbb 100644
--- a/tools/clang/include/clang/Sema/SemaHLSL.h
+++ b/tools/clang/include/clang/Sema/SemaHLSL.h
@@ -128,7 +128,8 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema,
                                           const clang::InitListExpr *InitList,
                                           const clang::QualType EltTy);
 
-bool ContainsVectorLongerThan(const clang::QualType &qt, unsigned length);
+bool ContainsVectorLongerThan(clang::Sema *S, clang::QualType qt,
+                              unsigned length);
 
 bool IsConversionToLessOrEqualElements(clang::Sema *self,
                                        const clang::ExprResult &sourceExpr,
diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp
index 07234554e2..32ca88c27a 100644
--- a/tools/clang/lib/Sema/SemaDXR.cpp
+++ b/tools/clang/lib/Sema/SemaDXR.cpp
@@ -810,7 +810,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload,
     return;
   }
 
-  if (hlsl::ContainsVectorLongerThan(Payload->getType(),
+  if (hlsl::ContainsVectorLongerThan(&S, Payload->getType(),
                                      DXIL::kDefaultMaxVectorLength)) {
     S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector)
         << DXIL::kDefaultMaxVectorLength << "payload parameters";
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index ff0624045f..ac5ab27835 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -5254,23 +5254,13 @@ class HLSLExternalSource : public ExternalSemaSource {
               << argType;
           return true;
         }
-        if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) {
+        if (ContainsVectorLongerThan(m_sema, argType,
+                                     DXIL::kDefaultMaxVectorLength)) {
           m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector)
               << DXIL::kDefaultMaxVectorLength << "cbuffers";
           return true;
         }
 
-        if (auto *TST = dyn_cast<TemplateSpecializationType>(argType)) {
-          // This is a bit of a special case we need to handle. Because the
-          // buffer types don't use their template parameter in a way that would
-          // force instantiation, we need to force specialization here.
-          GetOrCreateTemplateSpecialization(
-              *m_context, *m_sema,
-              cast<ClassTemplateDecl>(
-                  TST->getTemplateName().getAsTemplateDecl()),
-              llvm::ArrayRef<TemplateArgument>(TST->getArgs(),
-                                               TST->getNumArgs()));
-        }
         if (const RecordType *recordType = argType->getAs<RecordType>()) {
           if (!recordType->getDecl()->isCompleteDefinition()) {
             m_sema->Diag(argSrcLoc, diag::err_typecheck_decl_incomplete_type)
@@ -5351,7 +5341,8 @@ class HLSLExternalSource : public ExternalSemaSource {
       DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type,
                "Tessellation patch requires type template arg 0");
       QualType argType = arg.getAsType();
-      if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) {
+      if (ContainsVectorLongerThan(m_sema, argType,
+                                   DXIL::kDefaultMaxVectorLength)) {
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
             << DXIL::kDefaultMaxVectorLength << "tessellation patches";
@@ -5365,7 +5356,8 @@ class HLSLExternalSource : public ExternalSemaSource {
       DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type,
                "Geometry stream requires type template arg 0");
       QualType argType = arg.getAsType();
-      if (ContainsVectorLongerThan(argType, DXIL::kDefaultMaxVectorLength)) {
+      if (ContainsVectorLongerThan(m_sema, argType,
+                                   DXIL::kDefaultMaxVectorLength)) {
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
             << DXIL::kDefaultMaxVectorLength << "geometry streams";
@@ -11662,14 +11654,15 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc,
     bool ErrorFound = false;
     const RecordDecl *RD = ArgTy->getAs<RecordType>()->getDecl();
     // Check the fields of the RecordDecl
-    RecordDecl::field_iterator begin = RD->field_begin();
-    RecordDecl::field_iterator end = RD->field_end();
-    while (begin != end) {
-      const FieldDecl *FD = *begin;
+    for (auto *FD : RD->fields())
       ErrorFound |=
           DiagnoseNodeStructArgument(self, ArgLoc, FD->getType(), Empty, FD);
-      begin++;
-    }
+    if (RD->isCompleteDefinition())
+      if (auto *Child = dyn_cast<CXXRecordDecl>(RD))
+        // Walk up the inheritance chain and check base class fields
+        for (auto &B : Child->bases())
+          ErrorFound |=
+              DiagnoseNodeStructArgument(self, ArgLoc, B.getType(), Empty);
     return ErrorFound;
   }
   default:
@@ -12105,8 +12098,8 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType,
   return false;
 }
 
-bool hlsl::ContainsVectorLongerThan(const QualType &qt, unsigned length) {
-  if (qt.isNull())
+bool hlsl::ContainsVectorLongerThan(Sema *S, QualType qt, unsigned length) {
+  if (qt.isNull() || qt->isDependentType())
     return false;
 
   if (IsHLSLVecType(qt)) {
@@ -12114,19 +12107,30 @@ bool hlsl::ContainsVectorLongerThan(const QualType &qt, unsigned length) {
       return true;
   } else if (qt->isArrayType()) {
     const ArrayType *arrayType = qt->getAsArrayTypeUnsafe();
-    return ContainsVectorLongerThan(arrayType->getElementType(), length);
+    return ContainsVectorLongerThan(S, arrayType->getElementType(), length);
   } else if (qt->isStructureOrClassType()) {
     const RecordType *recordType = qt->getAs<RecordType>();
-    const RecordDecl *recordDecl = recordType->getDecl();
+    RecordDecl *recordDecl = recordType->getDecl();
     if (recordDecl->isInvalidDecl())
       return false;
-    RecordDecl::field_iterator begin = recordDecl->field_begin();
-    RecordDecl::field_iterator end = recordDecl->field_end();
-    for (; begin != end; begin++) {
-      const FieldDecl *fieldDecl = *begin;
-      if (ContainsVectorLongerThan(fieldDecl->getType(), length))
-        return true;
+    if (ClassTemplateSpecializationDecl *templateSpecializationDecl =
+            dyn_cast<ClassTemplateSpecializationDecl>(recordDecl)) {
+      if (templateSpecializationDecl->getSpecializationKind() ==
+          TSK_Undeclared) {
+        S->RequireCompleteType(recordDecl->getLocation(), qt,
+                               diag::err_typecheck_decl_incomplete_type);
+      }
     }
+    if (!recordDecl->isCompleteDefinition())
+      return false;
+    for (FieldDecl *FD : recordDecl->fields())
+      if (ContainsVectorLongerThan(S, FD->getType(), length))
+        return true;
+    if (auto *Child = dyn_cast<CXXRecordDecl>(recordDecl))
+      // Walk up the inheritance chain and check all fields on base classes
+      for (auto &B : Child->bases())
+        if (ContainsVectorLongerThan(S, B.getType(), length))
+          return true;
   }
   return false;
 }
@@ -14759,9 +14763,9 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
     result = false;
   }
 
-  // Disallow long vecs from cbuffers.
+  // Disallow long vecs from $Global cbuffers.
   if (isGlobal && !isStatic && !isGroupShared &&
-      ContainsVectorLongerThan(qt, DXIL::kDefaultMaxVectorLength)) {
+      ContainsVectorLongerThan(this, qt, DXIL::kDefaultMaxVectorLength)) {
     Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector)
         << DXIL::kDefaultMaxVectorLength << "cbuffers";
     result = false;
@@ -15657,7 +15661,8 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) {
 
 // Verify that user-defined intrinsic struct args contain no long vectors
 static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) {
-  if (ContainsVectorLongerThan(Arg->getType(), DXIL::kDefaultMaxVectorLength)) {
+  if (ContainsVectorLongerThan(S, Arg->getType(),
+                               DXIL::kDefaultMaxVectorLength)) {
     S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector)
         << DXIL::kDefaultMaxVectorLength << "user-defined struct parameter";
     return true;
@@ -16397,13 +16402,14 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) {
 
   // Check general parameter characteristics
   // Would be nice to check for resources here as they crash the compiler now.
-  for (const auto *param : FD->params())
-    if (ContainsVectorLongerThan(param->getType(),
+  for (const auto *param : FD->params()) {
+    if (ContainsVectorLongerThan(&S, param->getType(),
                                  DXIL::kDefaultMaxVectorLength))
       S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector)
           << DXIL::kDefaultMaxVectorLength << "entry function parameters";
+  }
 
-  if (ContainsVectorLongerThan(FD->getReturnType(),
+  if (ContainsVectorLongerThan(&S, FD->getReturnType(),
                                DXIL::kDefaultMaxVectorLength))
     S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector)
         << DXIL::kDefaultMaxVectorLength << "entry function return type";
diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
index adb2352a56..6645c4c3d2 100644
--- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
+++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
@@ -521,14 +521,15 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
         }
       }
       for (const auto *param : pPatchFnDecl->params())
-        if (ContainsVectorLongerThan(param->getType(),
+        if (ContainsVectorLongerThan(self, param->getType(),
                                      DXIL::kDefaultMaxVectorLength))
           self->Diag(param->getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
               << DXIL::kDefaultMaxVectorLength
               << "patch constant function parameters";
 
-      if (ContainsVectorLongerThan(pPatchFnDecl->getReturnType(), 4))
+      if (ContainsVectorLongerThan(self, pPatchFnDecl->getReturnType(),
+                                   DXIL::kDefaultMaxVectorLength))
         self->Diag(pPatchFnDecl->getLocation(),
                    diag::err_hlsl_unsupported_long_vector)
             << DXIL::kDefaultMaxVectorLength
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
index 98bcc14342..2d0f800121 100644
--- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
@@ -1,6 +1,6 @@
 // RUN: %dxc -T ps_6_9 -DTYPE=LongVec    -DNUM=5   -verify %s
-// RUiN: %dxc -T ps_6_9 -DTYPE=LongVecSub -DNUM=128 -verify %s
-// RUiN: %dxc -T ps_6_9                   -DNUM=1024 -verify %s
+// RUN: %dxc -T ps_6_9 -DTYPE=LongVecSub -DNUM=128 -verify %s
+// RUN: %dxc -T ps_6_9                   -DNUM=1024 -verify %s
 
 // Add tests for base types and instantiated template classes with longvecs
 // Size of the vector shouldn't matter, but using a few different ones just in case.

From 466bb1498f9edd8ea7c8e265185ae6c679aee288 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 5 Mar 2025 20:50:11 +0000
Subject: [PATCH 10/88] chore: autopublish 2025-03-05T20:50:11Z

---
 tools/clang/lib/Sema/SemaHLSL.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index ac5ab27835..9b1537d03a 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -363,7 +363,7 @@ enum ArBasicKind {
 
 #define IS_BPROP_STREAM(_Props) (((_Props)&BPROP_STREAM) != 0)
 
-#define IS_BPROP_PATCH(_Props) (((_Props)&BPROP_PATCH) != 0)
+#define IS_BPROP_PATCH(_Props) (((_Props) & BPROP_PATCH) != 0)
 
 #define IS_BPROP_SAMPLER(_Props) (((_Props)&BPROP_SAMPLER) != 0)
 

From 66bb77262fff4f660b065da8dd30fc3f342d4880 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Wed, 5 Mar 2025 19:11:54 -0700
Subject: [PATCH 11/88] Identify matrices and vectors by attributes

I guess it was about time. Should simplify some things later as well as at present and it was too easy to not do. Specifically, I was going to need to add another string check to the template instantiation code to identify longvectors. This is cleaner.

Incidentally convert another feedback texture string check to use attribs.

Incidentally resort the recently-added attribs to not break up the node shader attribs.
---
 tools/clang/include/clang/Basic/Attr.td       | 55 ++++++++------
 tools/clang/lib/AST/ASTContextHLSL.cpp        |  6 ++
 tools/clang/lib/AST/HlslTypes.cpp             | 76 ++++++++-----------
 .../hlsl/types/matrix/matrix-ast.hlsl         |  1 +
 .../hlsl/types/vector/vector-ast.hlsl         |  1 +
 5 files changed, 74 insertions(+), 65 deletions(-)

diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td
index e344e7b851..29430e6d4c 100644
--- a/tools/clang/include/clang/Basic/Attr.td
+++ b/tools/clang/include/clang/Basic/Attr.td
@@ -939,6 +939,39 @@ def HLSLCXXOverload : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def HLSLVector : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+}
+
+def HLSLMatrix : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+}
+
+def HLSLTessPatch : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Args = [BoolArgument<"IsInput">];
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+}
+
+def HLSLStreamOutput : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Args = [UnsignedArgument<"Vertices">];
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+}
+
+def HLSLResource : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Args = [UnsignedArgument<"ResKind">, UnsignedArgument<"ResClass">];
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+}
+
 def HLSLNodeLaunch : InheritableAttr {
   let Spellings = [CXX11<"", "nodelaunch", 2017>];
   let Args = [StringArgument<"LaunchType">]; // one of broadcasting, coalescing, thread
@@ -992,28 +1025,6 @@ def HLSLNodeTrackRWInputSharing : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
-
-def HLSLTessPatch : InheritableAttr {
-  let Spellings = []; // No spellings!
-  let Args = [BoolArgument<"IsInput">];
-  let Subjects = SubjectList<[CXXRecord]>;
-  let Documentation = [Undocumented];
-}
-
-def HLSLStreamOutput : InheritableAttr {
-  let Spellings = []; // No spellings!
-  let Args = [UnsignedArgument<"Vertices">];
-  let Subjects = SubjectList<[CXXRecord]>;
-  let Documentation = [Undocumented];
-}
-
-def HLSLResource : InheritableAttr {
-  let Spellings = []; // No spellings!
-  let Args = [UnsignedArgument<"ResKind">, UnsignedArgument<"ResClass">];
-  let Subjects = SubjectList<[CXXRecord]>;
-  let Documentation = [Undocumented];
-}
-
 def HLSLNodeObject : InheritableAttr {
   let Spellings = []; // No spellings!
   let Subjects = SubjectList<[CXXRecord]>;
diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp
index 5b10540e7a..9bacfc8b42 100644
--- a/tools/clang/lib/AST/ASTContextHLSL.cpp
+++ b/tools/clang/lib/AST/ASTContextHLSL.cpp
@@ -329,6 +329,9 @@ void hlsl::AddHLSLMatrixTemplate(ASTContext &context,
 
   typeDeclBuilder.addField("h", vectorArrayType);
 
+  typeDeclBuilder.getRecordDecl()->addAttr(
+      HLSLMatrixAttr::CreateImplicit(context));
+
   // Add an operator[]. The operator ranges from zero to rowcount-1, and returns
   // a vector of colcount elements.
   const unsigned int templateDepth = 0;
@@ -385,6 +388,9 @@ void hlsl::AddHLSLVectorTemplate(ASTContext &context,
   // Add an 'h' field to hold the handle.
   typeDeclBuilder.addField("h", vectorType);
 
+  typeDeclBuilder.getRecordDecl()->addAttr(
+      HLSLVectorAttr::CreateImplicit(context));
+
   // Add an operator[]. The operator ranges from zero to colcount-1, and returns
   // a scalar.
 
diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp
index 4dd44c02d7..e9c443b9d7 100644
--- a/tools/clang/lib/AST/HlslTypes.cpp
+++ b/tools/clang/lib/AST/HlslTypes.cpp
@@ -53,44 +53,44 @@ ConvertHLSLVecMatTypeToExtVectorType(const clang::ASTContext &context,
   return nullptr;
 }
 
+template <typename AttrType> static AttrType *getAttr(clang::QualType type) {
+  type = type.getCanonicalType();
+  if (const RecordType *RT = type->getAs<RecordType>()) {
+    if (const auto *Spec =
+            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl()))
+      if (const auto *Template =
+              dyn_cast<ClassTemplateDecl>(Spec->getSpecializedTemplate()))
+        return Template->getTemplatedDecl()->getAttr<AttrType>();
+    if (const auto *Decl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
+      return Decl->getAttr<AttrType>();
+  }
+  return nullptr;
+}
+
 bool IsHLSLVecMatType(clang::QualType type) {
-  const Type *Ty = type.getCanonicalType().getTypePtr();
-  if (const RecordType *RT = dyn_cast<RecordType>(Ty)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl())) {
-      if (templateDecl->getName() == "vector") {
-        return true;
-      } else if (templateDecl->getName() == "matrix") {
-        return true;
-      }
-    }
+  type = type.getCanonicalType();
+  if (const RecordType *RT = type->getAs<RecordType>()) {
+    if (const auto *Spec =
+            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl()))
+      if (const auto *Template =
+              dyn_cast<ClassTemplateDecl>(Spec->getSpecializedTemplate()))
+        return Template->getTemplatedDecl()->getAttr<HLSLMatrixAttr>() ||
+               Template->getTemplatedDecl()->getAttr<HLSLVectorAttr>();
+    if (const auto *Decl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
+      return Decl->getAttr<HLSLMatrixAttr>() || Decl->getAttr<HLSLVectorAttr>();
   }
   return false;
 }
 
 bool IsHLSLMatType(clang::QualType type) {
-  const clang::Type *Ty = type.getCanonicalType().getTypePtr();
-  if (const RecordType *RT = dyn_cast<RecordType>(Ty)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl())) {
-      if (templateDecl->getName() == "matrix") {
-        return true;
-      }
-    }
-  }
+  if (getAttr<HLSLMatrixAttr>(type))
+    return true;
   return false;
 }
 
 bool IsHLSLVecType(clang::QualType type) {
-  const clang::Type *Ty = type.getCanonicalType().getTypePtr();
-  if (const RecordType *RT = dyn_cast<RecordType>(Ty)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl())) {
-      if (templateDecl->getName() == "vector") {
-        return true;
-      }
-    }
-  }
+  if (getAttr<HLSLVectorAttr>(type))
+    return true;
   return false;
 }
 
@@ -475,20 +475,6 @@ clang::QualType GetHLSLMatElementType(clang::QualType type) {
   return elemTy;
 }
 
-template <typename AttrType> static AttrType *getAttr(clang::QualType type) {
-  type = type.getCanonicalType();
-  if (const RecordType *RT = type->getAs<RecordType>()) {
-    if (const auto *Spec =
-            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl()))
-      if (const auto *Template =
-              dyn_cast<ClassTemplateDecl>(Spec->getSpecializedTemplate()))
-        return Template->getTemplatedDecl()->getAttr<AttrType>();
-    if (const auto *Decl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
-      return Decl->getAttr<AttrType>();
-  }
-  return nullptr;
-}
-
 // TODO: Add type cache to ASTContext.
 bool IsHLSLInputPatchType(QualType type) {
   type = type.getCanonicalType();
@@ -812,7 +798,11 @@ QualType GetHLSLResourceResultType(QualType type) {
   if (const ClassTemplateSpecializationDecl *templateDecl =
           dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
 
-    if (RD->getName().startswith("FeedbackTexture")) {
+    const HLSLResourceAttr *Attr = getAttr<HLSLResourceAttr>(type);
+    if (Attr && (Attr->getResKind() ==
+                     (unsigned)DXIL::ResourceKind::FeedbackTexture2D ||
+                 Attr->getResKind() ==
+                     (unsigned)DXIL::ResourceKind::FeedbackTexture2DArray)) {
       // Feedback textures are write-only and the data is opaque,
       // so there is no result type per se.
       return {};
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl
index 33086852ab..5443ada0c9 100644
--- a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl
@@ -15,6 +15,7 @@
 // ext_vector array.
 // CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <<invalid sloc>> <invalid sloc> implicit class matrix definition
 // CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <<invalid sloc>> Implicit final
+// CHECK-NEXT: HLSLMatrixAttr {{0x[0-9a-fA-F]+}} <<invalid sloc>> Implicit
 // CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <<invalid sloc>> <invalid sloc> implicit h 'element [row_count] __attribute__((ext_vector_type(col_count)))'
 
 
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl
index 0ad236a4b2..12859b7eda 100644
--- a/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl
@@ -12,6 +12,7 @@
 // Verify the class, final attribute and ext_vector field decl.
 // CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <<invalid sloc>> <invalid sloc> implicit class vector definition
 // CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <<invalid sloc>> Implicit final
+// CHECK-NEXT: HLSLVectorAttr {{0x[0-9a-fA-F]+}} <<invalid sloc>> Implicit
 // CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <<invalid sloc>> <invalid sloc> implicit h 'element __attribute__((ext_vector_type(element_count)))'
 
 // Verify operator overloads for const vector subscript operators.

From 20c2609253a50817de2d1d1884ef414f80fdd592 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Tue, 4 Mar 2025 11:45:49 -0700
Subject: [PATCH 12/88] Use constant vector limit value for cached types

Vector types can be cached in a 2D array that has a column for lenghts 1-4. This uses the added contant to indicate the length and for the checks that confirm it isn't exceeded.
---
 tools/clang/lib/Sema/SemaHLSL.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 9b1537d03a..ff682ef501 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -2860,8 +2860,9 @@ class HLSLExternalSource : public ExternalSemaSource {
   TypedefDecl *m_matrixShorthandTypes[HLSLScalarTypeCount][4][4];
 
   // Vector types already built.
-  QualType m_vectorTypes[HLSLScalarTypeCount][4];
-  TypedefDecl *m_vectorTypedefs[HLSLScalarTypeCount][4];
+  QualType m_vectorTypes[HLSLScalarTypeCount][DXIL::kDefaultMaxVectorLength];
+  TypedefDecl
+      *m_vectorTypedefs[HLSLScalarTypeCount][DXIL::kDefaultMaxVectorLength];
 
   // BuiltinType for each scalar type.
   QualType m_baseTypes[HLSLScalarTypeCount];
@@ -3840,7 +3841,7 @@ class HLSLExternalSource : public ExternalSemaSource {
   clang::TypedefDecl *LookupVectorShorthandType(HLSLScalarType scalarType,
                                                 UINT colCount) {
     DXASSERT_NOMSG(scalarType != HLSLScalarType::HLSLScalarType_unknown &&
-                   colCount <= 4);
+                   colCount <= DXIL::kDefaultMaxVectorLength);
     TypedefDecl *qts = m_vectorTypedefs[scalarType][colCount - 1];
     if (qts == nullptr) {
       QualType type = LookupVectorType(scalarType, colCount);
@@ -3948,7 +3949,7 @@ class HLSLExternalSource : public ExternalSemaSource {
 
   QualType LookupVectorType(HLSLScalarType scalarType, unsigned int colCount) {
     QualType qt;
-    if (colCount < 4)
+    if (colCount < DXIL::kDefaultMaxVectorLength)
       qt = m_vectorTypes[scalarType][colCount - 1];
     if (qt.isNull()) {
       if (m_scalarTypes[scalarType].isNull()) {
@@ -3957,7 +3958,7 @@ class HLSLExternalSource : public ExternalSemaSource {
       qt = GetOrCreateVectorSpecialization(*m_context, m_sema,
                                            m_vectorTemplateDecl,
                                            m_scalarTypes[scalarType], colCount);
-      if (colCount < 4)
+      if (colCount < DXIL::kDefaultMaxVectorLength)
         m_vectorTypes[scalarType][colCount - 1] = qt;
     }
     return qt;

From 1b3ad427e556c9d7e8086f7dd3971d2c1e070f19 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Sun, 2 Mar 2025 22:46:38 -0700
Subject: [PATCH 13/88] Use definitiondata bits to determine long vector
 presence

By setting the bit when the vector template is instantiated and then propagating it when members, be they standard members or base classes, the bit will be set correctly for any struct or struct-like type. For arrays, the arrays are pealed away in a utility function to get at the elements.

Decided to separate the check for completeness from the check for long vectors. Even though the latter almost always requires the former, they are separate concepts and embedding the first in the second would be unexpected
---
 tools/clang/include/clang/AST/DeclCXX.h       | 11 +++
 tools/clang/include/clang/Sema/SemaHLSL.h     |  3 +-
 tools/clang/lib/AST/DeclCXX.cpp               | 60 ++++++------
 tools/clang/lib/Sema/SemaDXR.cpp              |  3 +-
 tools/clang/lib/Sema/SemaHLSL.cpp             | 97 ++++++++-----------
 tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp   |  6 +-
 .../lib/Sema/SemaTemplateInstantiate.cpp      | 12 +++
 tools/clang/test/SemaHLSL/const-default.hlsl  |  4 +
 .../clang/test/SemaHLSL/incomplete-type.hlsl  |  1 +
 9 files changed, 105 insertions(+), 92 deletions(-)

diff --git a/tools/clang/include/clang/AST/DeclCXX.h b/tools/clang/include/clang/AST/DeclCXX.h
index 3b07576545..36e0f99c82 100644
--- a/tools/clang/include/clang/AST/DeclCXX.h
+++ b/tools/clang/include/clang/AST/DeclCXX.h
@@ -465,6 +465,10 @@ class CXXRecordDecl : public RecordDecl {
     /// \brief Whether we are currently parsing base specifiers.
     bool IsParsingBaseSpecifiers : 1;
 
+    /// \brief Whether this class contains at least one member or base
+    ///  class containing an HLSL vector longer than 4 elements.
+    bool HasHLSLLongVector : 1;
+
     /// \brief The number of base class specifiers in Bases.
     unsigned NumBases;
 
@@ -1018,6 +1022,13 @@ class CXXRecordDecl : public RecordDecl {
     return data().NeedOverloadResolutionForDestructor;
   }
 
+  // HLSL Change add HLSL Long vector bit.
+  /// \brief Determine whether this class contains an HLSL long vector
+  /// of over 4 elements.
+  bool hasHLSLLongVector() { return data().HasHLSLLongVector; }
+  /// \brief Set that this class contains an HLSL long vector of over 4 elements
+  bool setHasHLSLLongVector() { return data().HasHLSLLongVector = true; }
+
   /// \brief Determine whether this class describes a lambda function object.
   bool isLambda() const {
     // An update record can't turn a non-lambda into a lambda.
diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h
index d31e32acbb..7e7400d390 100644
--- a/tools/clang/include/clang/Sema/SemaHLSL.h
+++ b/tools/clang/include/clang/Sema/SemaHLSL.h
@@ -128,8 +128,7 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema,
                                           const clang::InitListExpr *InitList,
                                           const clang::QualType EltTy);
 
-bool ContainsVectorLongerThan(clang::Sema *S, clang::QualType qt,
-                              unsigned length);
+bool ContainsLongVector(clang::QualType qt);
 
 bool IsConversionToLessOrEqualElements(clang::Sema *self,
                                        const clang::ExprResult &sourceExpr,
diff --git a/tools/clang/lib/AST/DeclCXX.cpp b/tools/clang/lib/AST/DeclCXX.cpp
index 9ef771b932..5f8c186919 100644
--- a/tools/clang/lib/AST/DeclCXX.cpp
+++ b/tools/clang/lib/AST/DeclCXX.cpp
@@ -48,34 +48,31 @@ void LazyASTUnresolvedSet::getFromExternalSource(ASTContext &C) const {
 }
 
 CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D)
-  : UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0),
-    Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false),
-    Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true),
-    HasPrivateFields(false), HasProtectedFields(false), HasPublicFields(false),
-    HasMutableFields(false), HasVariantMembers(false), HasOnlyCMembers(true),
-    HasInClassInitializer(false), HasUninitializedReferenceMember(false),
-    NeedOverloadResolutionForMoveConstructor(false),
-    NeedOverloadResolutionForMoveAssignment(false),
-    NeedOverloadResolutionForDestructor(false),
-    DefaultedMoveConstructorIsDeleted(false),
-    DefaultedMoveAssignmentIsDeleted(false),
-    DefaultedDestructorIsDeleted(false),
-    HasTrivialSpecialMembers(SMF_All),
-    DeclaredNonTrivialSpecialMembers(0),
-    HasIrrelevantDestructor(true),
-    HasConstexprNonCopyMoveConstructor(false),
-    DefaultedDefaultConstructorIsConstexpr(true),
-    HasConstexprDefaultConstructor(false),
-    HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false),
-    UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0),
-    ImplicitCopyConstructorHasConstParam(true),
-    ImplicitCopyAssignmentHasConstParam(true),
-    HasDeclaredCopyConstructorWithConstParam(false),
-    HasDeclaredCopyAssignmentWithConstParam(false),
-    IsLambda(false), IsParsingBaseSpecifiers(false), NumBases(0), NumVBases(0),
-    Bases(), VBases(),
-    Definition(D), FirstFriend() {
-}
+    : UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0),
+      Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false),
+      Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true),
+      HasPrivateFields(false), HasProtectedFields(false),
+      HasPublicFields(false), HasMutableFields(false), HasVariantMembers(false),
+      HasOnlyCMembers(true), HasInClassInitializer(false),
+      HasUninitializedReferenceMember(false),
+      NeedOverloadResolutionForMoveConstructor(false),
+      NeedOverloadResolutionForMoveAssignment(false),
+      NeedOverloadResolutionForDestructor(false),
+      DefaultedMoveConstructorIsDeleted(false),
+      DefaultedMoveAssignmentIsDeleted(false),
+      DefaultedDestructorIsDeleted(false), HasTrivialSpecialMembers(SMF_All),
+      DeclaredNonTrivialSpecialMembers(0), HasIrrelevantDestructor(true),
+      HasConstexprNonCopyMoveConstructor(false),
+      DefaultedDefaultConstructorIsConstexpr(true),
+      HasConstexprDefaultConstructor(false),
+      HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false),
+      UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0),
+      ImplicitCopyConstructorHasConstParam(true),
+      ImplicitCopyAssignmentHasConstParam(true),
+      HasDeclaredCopyConstructorWithConstParam(false),
+      HasDeclaredCopyAssignmentWithConstParam(false), IsLambda(false),
+      IsParsingBaseSpecifiers(false), HasHLSLLongVector(false), NumBases(0),
+      NumVBases(0), Bases(), VBases(), Definition(D), FirstFriend() {}
 
 CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const {
   return Bases.get(Definition->getASTContext().getExternalSource());
@@ -204,6 +201,10 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases,
     if (!BaseClassDecl->isStandardLayout())
       data().IsStandardLayout = false;
 
+    // Propagate presence of long vector to child classes.
+    if (BaseClassDecl->hasHLSLLongVector())
+      data().HasHLSLLongVector = true;
+
     // Record if this base is the first non-literal field or base.
     if (!hasNonLiteralTypeFieldsOrBases() && !BaseType->isLiteralType(C))
       data().HasNonLiteralTypeFieldsOrBases = true;
@@ -385,6 +386,9 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) {
     data().NeedOverloadResolutionForMoveConstructor = true;
     data().NeedOverloadResolutionForDestructor = true;
   }
+
+  if (Subobj->hasHLSLLongVector())
+    data().HasHLSLLongVector = true;
 }
 
 /// Callback function for CXXRecordDecl::forallBases that acknowledges
diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp
index 32ca88c27a..d71dc2be4c 100644
--- a/tools/clang/lib/Sema/SemaDXR.cpp
+++ b/tools/clang/lib/Sema/SemaDXR.cpp
@@ -810,8 +810,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload,
     return;
   }
 
-  if (hlsl::ContainsVectorLongerThan(&S, Payload->getType(),
-                                     DXIL::kDefaultMaxVectorLength)) {
+  if (ContainsLongVector(Payload->getType())) {
     S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector)
         << DXIL::kDefaultMaxVectorLength << "payload parameters";
     return;
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index ff682ef501..fc6e7004d4 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -942,6 +942,11 @@ GetOrCreateVectorSpecialization(ASTContext &context, Sema *sema,
            "otherwise vector handle cannot be looked up");
 #endif
 
+  // I don't think this is necessary.
+  CXXRecordDecl *Decl = vectorSpecializationType->getAsCXXRecordDecl();
+  if (GetHLSLVecSize(vectorSpecializationType) > DXIL::kDefaultMaxVectorLength)
+    Decl->setHasHLSLLongVector();
+
   return vectorSpecializationType;
 }
 
@@ -5255,20 +5260,14 @@ class HLSLExternalSource : public ExternalSemaSource {
               << argType;
           return true;
         }
-        if (ContainsVectorLongerThan(m_sema, argType,
-                                     DXIL::kDefaultMaxVectorLength)) {
+        m_sema->RequireCompleteType(argSrcLoc, argType,
+                                    diag::err_typecheck_decl_incomplete_type);
+
+        if (ContainsLongVector(argType)) {
           m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector)
               << DXIL::kDefaultMaxVectorLength << "cbuffers";
           return true;
         }
-
-        if (const RecordType *recordType = argType->getAs<RecordType>()) {
-          if (!recordType->getDecl()->isCompleteDefinition()) {
-            m_sema->Diag(argSrcLoc, diag::err_typecheck_decl_incomplete_type)
-                << argType;
-            return true;
-          }
-        }
       }
       return false;
 
@@ -5341,9 +5340,10 @@ class HLSLExternalSource : public ExternalSemaSource {
       const TemplateArgument &arg = argLoc.getArgument();
       DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type,
                "Tessellation patch requires type template arg 0");
-      QualType argType = arg.getAsType();
-      if (ContainsVectorLongerThan(m_sema, argType,
-                                   DXIL::kDefaultMaxVectorLength)) {
+
+      m_sema->RequireCompleteType(argLoc.getLocation(), arg.getAsType(),
+                                  diag::err_typecheck_decl_incomplete_type);
+      if (ContainsLongVector(arg.getAsType())) {
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
             << DXIL::kDefaultMaxVectorLength << "tessellation patches";
@@ -5356,9 +5356,9 @@ class HLSLExternalSource : public ExternalSemaSource {
       const TemplateArgument &arg = argLoc.getArgument();
       DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type,
                "Geometry stream requires type template arg 0");
-      QualType argType = arg.getAsType();
-      if (ContainsVectorLongerThan(m_sema, argType,
-                                   DXIL::kDefaultMaxVectorLength)) {
+      m_sema->RequireCompleteType(argLoc.getLocation(), arg.getAsType(),
+                                  diag::err_typecheck_decl_incomplete_type);
+      if (ContainsLongVector(arg.getAsType())) {
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
             << DXIL::kDefaultMaxVectorLength << "geometry streams";
@@ -12099,39 +12099,17 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType,
   return false;
 }
 
-bool hlsl::ContainsVectorLongerThan(Sema *S, QualType qt, unsigned length) {
+bool hlsl::ContainsLongVector(QualType qt) {
   if (qt.isNull() || qt->isDependentType())
     return false;
 
-  if (IsHLSLVecType(qt)) {
-    if (GetHLSLVecSize(qt) > length)
-      return true;
-  } else if (qt->isArrayType()) {
-    const ArrayType *arrayType = qt->getAsArrayTypeUnsafe();
-    return ContainsVectorLongerThan(S, arrayType->getElementType(), length);
-  } else if (qt->isStructureOrClassType()) {
-    const RecordType *recordType = qt->getAs<RecordType>();
-    RecordDecl *recordDecl = recordType->getDecl();
-    if (recordDecl->isInvalidDecl())
-      return false;
-    if (ClassTemplateSpecializationDecl *templateSpecializationDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(recordDecl)) {
-      if (templateSpecializationDecl->getSpecializationKind() ==
-          TSK_Undeclared) {
-        S->RequireCompleteType(recordDecl->getLocation(), qt,
-                               diag::err_typecheck_decl_incomplete_type);
-      }
-    }
-    if (!recordDecl->isCompleteDefinition())
+  while (const ArrayType *Arr = qt->getAsArrayTypeUnsafe())
+    qt = Arr->getElementType();
+
+  if (CXXRecordDecl *Decl = qt->getAsCXXRecordDecl()) {
+    if (!Decl->isCompleteDefinition())
       return false;
-    for (FieldDecl *FD : recordDecl->fields())
-      if (ContainsVectorLongerThan(S, FD->getType(), length))
-        return true;
-    if (auto *Child = dyn_cast<CXXRecordDecl>(recordDecl))
-      // Walk up the inheritance chain and check all fields on base classes
-      for (auto &B : Child->bases())
-        if (ContainsVectorLongerThan(S, B.getType(), length))
-          return true;
+    return Decl->hasHLSLLongVector();
   }
   return false;
 }
@@ -14765,11 +14743,21 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
   }
 
   // Disallow long vecs from $Global cbuffers.
-  if (isGlobal && !isStatic && !isGroupShared &&
-      ContainsVectorLongerThan(this, qt, DXIL::kDefaultMaxVectorLength)) {
-    Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector)
-        << DXIL::kDefaultMaxVectorLength << "cbuffers";
-    result = false;
+  if (isGlobal && !isStatic && !isGroupShared) {
+    if (qt->isStructureOrClassType()) {
+      if (ClassTemplateSpecializationDecl *templateSpecializationDecl =
+              dyn_cast<ClassTemplateSpecializationDecl>(
+                  qt->getAsCXXRecordDecl()))
+        if (templateSpecializationDecl->getSpecializationKind() ==
+            TSK_Undeclared)
+          RequireCompleteType(D.getLocStart(), qt,
+                              diag::err_typecheck_decl_incomplete_type);
+    }
+    if (ContainsLongVector(qt)) {
+      Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector)
+          << DXIL::kDefaultMaxVectorLength << "cbuffers";
+      result = false;
+    }
   }
 
   // SPIRV change starts
@@ -15662,8 +15650,7 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) {
 
 // Verify that user-defined intrinsic struct args contain no long vectors
 static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) {
-  if (ContainsVectorLongerThan(S, Arg->getType(),
-                               DXIL::kDefaultMaxVectorLength)) {
+  if (ContainsLongVector(Arg->getType())) {
     S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector)
         << DXIL::kDefaultMaxVectorLength << "user-defined struct parameter";
     return true;
@@ -16404,14 +16391,12 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) {
   // Check general parameter characteristics
   // Would be nice to check for resources here as they crash the compiler now.
   for (const auto *param : FD->params()) {
-    if (ContainsVectorLongerThan(&S, param->getType(),
-                                 DXIL::kDefaultMaxVectorLength))
+    if (ContainsLongVector(param->getType()))
       S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector)
           << DXIL::kDefaultMaxVectorLength << "entry function parameters";
   }
 
-  if (ContainsVectorLongerThan(&S, FD->getReturnType(),
-                               DXIL::kDefaultMaxVectorLength))
+  if (ContainsLongVector(FD->getReturnType()))
     S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector)
         << DXIL::kDefaultMaxVectorLength << "entry function return type";
 
diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
index 6645c4c3d2..2275c48114 100644
--- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
+++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
@@ -521,15 +521,13 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
         }
       }
       for (const auto *param : pPatchFnDecl->params())
-        if (ContainsVectorLongerThan(self, param->getType(),
-                                     DXIL::kDefaultMaxVectorLength))
+        if (ContainsLongVector(param->getType()))
           self->Diag(param->getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
               << DXIL::kDefaultMaxVectorLength
               << "patch constant function parameters";
 
-      if (ContainsVectorLongerThan(self, pPatchFnDecl->getReturnType(),
-                                   DXIL::kDefaultMaxVectorLength))
+      if (ContainsLongVector(pPatchFnDecl->getReturnType()))
         self->Diag(pPatchFnDecl->getLocation(),
                    diag::err_hlsl_unsupported_long_vector)
             << DXIL::kDefaultMaxVectorLength
diff --git a/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp b/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp
index a6ae05faa5..1eacedbb0b 100644
--- a/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -2139,6 +2139,18 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation,
               SourceLocation(), SourceLocation(), nullptr);
   CheckCompletedCXXClass(Instantiation);
 
+  // HLSL Change Begin - set longvec bit for vectors of over 4 elements
+  ClassTemplateSpecializationDecl *Spec =
+      dyn_cast<ClassTemplateSpecializationDecl>(Instantiation);
+  if (Spec && Spec->hasAttr<HLSLVectorAttr>()) {
+    const TemplateArgumentList &argList = Spec->getTemplateArgs();
+    const TemplateArgument &arg1 = argList[1];
+    llvm::APSInt vecSize = arg1.getAsIntegral();
+    if (vecSize.getLimitedValue() > hlsl::DXIL::kDefaultMaxVectorLength)
+      Instantiation->setHasHLSLLongVector();
+  }
+  // HLSL Change End - set longvec bit for vectors of over 4 elements
+
   // Default arguments are parsed, if not instantiated. We can go instantiate
   // default arg exprs for default constructors if necessary now.
   ActOnFinishCXXMemberDefaultArgs(Instantiation);
diff --git a/tools/clang/test/SemaHLSL/const-default.hlsl b/tools/clang/test/SemaHLSL/const-default.hlsl
index 2ebb6fe52e..6b5e43e0e9 100644
--- a/tools/clang/test/SemaHLSL/const-default.hlsl
+++ b/tools/clang/test/SemaHLSL/const-default.hlsl
@@ -33,7 +33,11 @@ class MyClass {
 ConstantBuffer<MyClass> g_const_buffer2;
 TextureBuffer<MyClass> g_texture_buffer2;
 
+// expected-note@+2 {{forward declaration of 'FWDDeclStruct'}}
+// expected-note@+1 {{forward declaration of 'FWDDeclStruct'}}
 struct FWDDeclStruct;
+// expected-note@+2 {{forward declaration of 'FWDDeclClass'}}
+// expected-note@+1 {{forward declaration of 'FWDDeclClass'}}
 class FWDDeclClass;
 
 // Ensure forward declared struct/class fails as expected
diff --git a/tools/clang/test/SemaHLSL/incomplete-type.hlsl b/tools/clang/test/SemaHLSL/incomplete-type.hlsl
index 8869b80400..250171ad05 100644
--- a/tools/clang/test/SemaHLSL/incomplete-type.hlsl
+++ b/tools/clang/test/SemaHLSL/incomplete-type.hlsl
@@ -3,6 +3,7 @@
 // Tests that the compiler is well-behaved with regard to uses of incomplete types.
 // Regression test for GitHub #2058, which crashed in this case.
 
+// expected-note@+5 {{forward declaration of 'S'}}
 // expected-note@+4 {{forward declaration of 'S'}}
 // expected-note@+3 {{forward declaration of 'S'}}
 // expected-note@+2 {{forward declaration of 'S'}}

From d3fec833e31f2b375279c5ca48ed7655ee685272 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Wed, 5 Mar 2025 19:17:41 -0700
Subject: [PATCH 14/88] Test for incomplete types in a number of builtin
 template-like objects

Output Streams, Tessellation patches, and global variables should be complete when receiving other correctness checks. If they cannot be made complete, they should produce an error. This was omitted for various of these including non-template globals, which was fine, but it meant that redundant errors were produced for templates, but not standard globals likely just because that was what was tested. This removes that distinction and adds testing for all of the above to the existing incomplete-type.hlsl test.
---
 tools/clang/lib/Sema/SemaHLSL.cpp             | 45 +++++-----
 ...ent_type_for_node_object_template_arg.hlsl |  3 -
 .../clang/test/SemaHLSL/incomplete-type.hlsl  | 87 +++++++++++++++++--
 3 files changed, 101 insertions(+), 34 deletions(-)

diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index fc6e7004d4..8abad632a2 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -5296,22 +5296,13 @@ class HLSLExternalSource : public ExternalSemaSource {
       // template instantiation.
       if (ArgTy->isDependentType())
         return false;
-      if (auto *recordType = ArgTy->getAs<RecordType>()) {
-        if (CXXRecordDecl *cxxRecordDecl =
-                dyn_cast<CXXRecordDecl>(recordType->getDecl())) {
-          if (ClassTemplateSpecializationDecl *templateSpecializationDecl =
-                  dyn_cast<ClassTemplateSpecializationDecl>(cxxRecordDecl)) {
-            if (templateSpecializationDecl->getSpecializationKind() ==
-                TSK_Undeclared) {
-              // Make sure specialization is done before IsTypeNumeric.
-              // If not, ArgTy might be treat as empty struct.
-              m_sema->RequireCompleteType(
-                  ArgLoc.getLocation(), ArgTy,
-                  diag::err_typecheck_decl_incomplete_type);
-            }
-          }
-        }
-      }
+      // Make sure specialization is done before IsTypeNumeric.
+      // If not, ArgTy might be treat as empty struct.
+      m_sema->RequireCompleteType(ArgLoc.getLocation(), ArgTy,
+                                  diag::err_typecheck_decl_incomplete_type);
+      CXXRecordDecl *Decl = ArgTy->getAsCXXRecordDecl();
+      if (Decl && !Decl->isCompleteDefinition())
+        return true;
       // The node record type must be compound - error if it is not.
       if (GetTypeObjectKind(ArgTy) != AR_TOBJ_COMPOUND) {
         m_sema->Diag(ArgLoc.getLocation(), diag::err_hlsl_node_record_type)
@@ -5343,6 +5334,9 @@ class HLSLExternalSource : public ExternalSemaSource {
 
       m_sema->RequireCompleteType(argLoc.getLocation(), arg.getAsType(),
                                   diag::err_typecheck_decl_incomplete_type);
+      CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl();
+      if (Decl && !Decl->isCompleteDefinition())
+        return true;
       if (ContainsLongVector(arg.getAsType())) {
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
@@ -5358,6 +5352,9 @@ class HLSLExternalSource : public ExternalSemaSource {
                "Geometry stream requires type template arg 0");
       m_sema->RequireCompleteType(argLoc.getLocation(), arg.getAsType(),
                                   diag::err_typecheck_decl_incomplete_type);
+      CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl();
+      if (Decl && !Decl->isCompleteDefinition())
+        return true;
       if (ContainsLongVector(arg.getAsType())) {
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
@@ -14744,15 +14741,13 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
 
   // Disallow long vecs from $Global cbuffers.
   if (isGlobal && !isStatic && !isGroupShared) {
-    if (qt->isStructureOrClassType()) {
-      if (ClassTemplateSpecializationDecl *templateSpecializationDecl =
-              dyn_cast<ClassTemplateSpecializationDecl>(
-                  qt->getAsCXXRecordDecl()))
-        if (templateSpecializationDecl->getSpecializationKind() ==
-            TSK_Undeclared)
-          RequireCompleteType(D.getLocStart(), qt,
-                              diag::err_typecheck_decl_incomplete_type);
-    }
+    // Suppress actual emitting of errors for incompletable types here
+    // They are redundant to those produced in ActOnUninitializedDecl.
+    struct SilentDiagnoser : public TypeDiagnoser {
+      SilentDiagnoser() : TypeDiagnoser(true) {}
+      virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) {}
+    } SD;
+    RequireCompleteType(D.getLocStart(), qt, SD);
     if (ContainsLongVector(qt)) {
       Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector)
           << DXIL::kDefaultMaxVectorLength << "cbuffers";
diff --git a/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl b/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl
index 40e0452719..05ec268a0c 100644
--- a/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl
+++ b/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl
@@ -60,12 +60,9 @@ void woo() {
 }
 
 template<typename T>
-// expected-note@+1{{zero sized record defined here}}
 struct ForwardDecl; // expected-note{{template is declared here}}
 
 void woot() {
-  // Forward decl fails because forcing completion to check empty size for node object.
-  // expected-error@+1{{record used in GroupNodeInputRecords may not have zero size}}
   GroupNodeInputRecords<ForwardDecl<int> > data; // expected-error{{implicit instantiation of undefined template 'ForwardDecl<int>'}}
   foo(data);
 }
diff --git a/tools/clang/test/SemaHLSL/incomplete-type.hlsl b/tools/clang/test/SemaHLSL/incomplete-type.hlsl
index 250171ad05..a2856f448e 100644
--- a/tools/clang/test/SemaHLSL/incomplete-type.hlsl
+++ b/tools/clang/test/SemaHLSL/incomplete-type.hlsl
@@ -1,18 +1,93 @@
-// RUN: %dxc -Tlib_6_3 -Wno-unused-value -verify %s
+// RUN: %dxc -Tlib_6_8 -Wno-unused-value -verify %s
 
 // Tests that the compiler is well-behaved with regard to uses of incomplete types.
 // Regression test for GitHub #2058, which crashed in this case.
 
-// expected-note@+5 {{forward declaration of 'S'}}
-// expected-note@+4 {{forward declaration of 'S'}}
-// expected-note@+3 {{forward declaration of 'S'}}
-// expected-note@+2 {{forward declaration of 'S'}}
-// expected-note@+1 {{forward declaration of 'S'}}
+// expected-note@+8 {{forward declaration of 'S'}} expected-note@+8 {{forward declaration of 'S'}} expected-note@+8 {{forward declaration of 'S'}}
+// expected-note@+7 {{forward declaration of 'S'}} expected-note@+7 {{forward declaration of 'S'}} expected-note@+7 {{forward declaration of 'S'}}
+// expected-note@+6 {{forward declaration of 'S'}} expected-note@+6 {{forward declaration of 'S'}} expected-note@+6 {{forward declaration of 'S'}}
+// expected-note@+5 {{forward declaration of 'S'}} expected-note@+5 {{forward declaration of 'S'}} expected-note@+5 {{forward declaration of 'S'}}
+// expected-note@+4 {{forward declaration of 'S'}} expected-note@+4 {{forward declaration of 'S'}} expected-note@+4 {{forward declaration of 'S'}}
+// expected-note@+3 {{forward declaration of 'S'}} expected-note@+3 {{forward declaration of 'S'}} expected-note@+3 {{forward declaration of 'S'}}
+// expected-note@+2 {{forward declaration of 'S'}} expected-note@+2 {{forward declaration of 'S'}} expected-note@+2 {{forward declaration of 'S'}}
+// expected-note@+1 {{forward declaration of 'S'}} expected-note@+1 {{forward declaration of 'S'}} expected-note@+1 {{forward declaration of 'S'}}
 struct S;
+
+// expected-note@+2 {{template is declared here}}
+// expected-note@+1 {{template is declared here}} expected-note@+1 {{template is declared here}} expected-note@+1 {{template is declared here}}
+template <int N> struct T;
+
 ConstantBuffer<S> CB; // expected-error {{variable has incomplete type 'S'}}
+ConstantBuffer<T<1> > TB; // expected-error {{implicit instantiation of undefined template 'T<1>'}}
+
+S s; // expected-error {{variable has incomplete type 'S'}}
+T<1> t; // expected-error {{implicit instantiation of undefined template 'T<1>'}}
+
+cbuffer BadBuffy {
+  S cb_s; // expected-error {{variable has incomplete type 'S'}}
+  T<1> cb_t; // expected-error {{implicit instantiation of undefined template 'T<1>'}}
+};
+
+tbuffer BadTuffy {
+  S tb_s; // expected-error {{variable has incomplete type 'S'}}
+  T<1> tb_t; // expected-error {{implicit instantiation of undefined template 'T<1>'}}
+};
+
 S func( // expected-error {{incomplete result type 'S' in function definition}}
   S param) // expected-error {{variable has incomplete type 'S'}}
 {
   S local; // expected-error {{variable has incomplete type 'S'}}
   return (S)0; // expected-error {{'S' is an incomplete type}}
 }
+
+[shader("geometry")]
+[maxvertexcount(3)]
+void gs_point(line S e, // expected-error {{variable has incomplete type 'S'}}
+              inout PointStream<S> OutputStream0) {} // expected-error {{variable has incomplete type 'S'}}
+
+[shader("geometry")]
+[maxvertexcount(12)]
+void gs_line(line S a, // expected-error {{variable has incomplete type 'S'}}
+             inout LineStream<S> OutputStream0) {} // expected-error {{variable has incomplete type 'S'}}
+
+
+[shader("geometry")]
+[maxvertexcount(12)]
+void gs_line(line S a, // expected-error {{variable has incomplete type 'S'}}
+             inout TriangleStream<S> OutputStream0) {} // expected-error {{variable has incomplete type 'S'}}
+
+
+[shader("domain")]
+[domain("tri")]
+void ds_main(OutputPatch<S, 3> TrianglePatch) {} // expected-error{{variable has incomplete type 'S'}}
+
+void patch_const(InputPatch<S, 3> inpatch, // expected-error{{variable has incomplete type 'S'}}
+                 OutputPatch<S, 3> outpatch) {} // expected-error{{variable has incomplete type 'S'}}
+
+[shader("hull")]
+[domain("tri")]
+[outputtopology("triangle_cw")]
+[outputcontrolpoints(32)]
+[patchconstantfunc("patch_const")]
+void hs_main(InputPatch<S, 3> TrianglePatch) {} // expected-error{{variable has incomplete type 'S'}}
+
+[Shader("node")]
+[NodeLaunch("broadcasting")]
+[NumThreads(8,1,1)]
+[NodeMaxDispatchGrid(8,1,1)]
+// expected-error@+1{{Broadcasting node shader 'broadcast' with NodeMaxDispatchGrid attribute must declare an input record containing a field with SV_DispatchGrid semantic}}
+void broadcast(DispatchNodeInputRecord<S> input,  // expected-error{{variable has incomplete type 'S'}}
+                NodeOutput<S> output) // expected-error{{variable has incomplete type 'S'}}
+{
+  ThreadNodeOutputRecords<S> touts; // expected-error{{variable has incomplete type 'S'}}
+  GroupNodeOutputRecords<S> gouts; // expected-error{{variable has incomplete type 'S'}}
+}
+
+[Shader("node")]
+[NodeLaunch("coalescing")]
+[NumThreads(8,1,1)]
+void coalesce(GroupNodeInputRecords<S> input) {} // expected-error{{variable has incomplete type 'S'}}
+
+[Shader("node")]
+[NodeLaunch("thread")]
+void threader(ThreadNodeInputRecord<S> input) {} // expected-error{{variable has incomplete type 'S'}}

From f7f1e3dd8d8c097eeb74a86ccd348bf3f5a27b82 Mon Sep 17 00:00:00 2001
From: Chris B <cbieneman@microsoft.com>
Date: Fri, 7 Mar 2025 17:27:47 -0600
Subject: [PATCH 15/88] [Metal] Add experimental Metal support (#6805)

This adds a new `-metal` flag to DXC which can be used to generate
Metal's IR directly from DXC after compilation. There are some
limitations in this flag which are worth noting:

1) It does not support library shaders (yet)
2) It does not support disassembly (yet)
3) It is _wildly_ under tested because wtihout (2) we can't do anything
to really verify correct output (yay?)
---
 README.md                                     | 10 +++
 cmake/config-ix.cmake                         |  9 ++
 cmake/modules/FindMetalIRConverter.cmake      | 16 ++++
 include/dxc/Support/HLSLOptions.h             |  2 +
 include/dxc/Support/HLSLOptions.td            |  5 ++
 lib/DxcSupport/HLSLOptions.cpp                | 17 ++++
 tools/clang/test/DXC/metal.test               |  7 ++
 tools/clang/test/DXC/no_metal.test            |  4 +
 .../clang/test/DXC/no_metal_disassembly.test  |  7 ++
 tools/clang/test/lit.cfg                      |  3 +
 tools/clang/test/lit.site.cfg.in              |  1 +
 tools/clang/tools/dxcompiler/CMakeLists.txt   |  8 ++
 .../clang/tools/dxcompiler/dxcompilerobj.cpp  | 89 ++++++++++++++++++-
 13 files changed, 177 insertions(+), 1 deletion(-)
 create mode 100644 cmake/modules/FindMetalIRConverter.cmake
 create mode 100644 tools/clang/test/DXC/metal.test
 create mode 100644 tools/clang/test/DXC/no_metal.test
 create mode 100644 tools/clang/test/DXC/no_metal_disassembly.test

diff --git a/README.md b/README.md
index 35c0132068..ddafde2115 100644
--- a/README.md
+++ b/README.md
@@ -34,6 +34,16 @@ Development kits containing only the dxc.exe driver app, the dxcompiler.dll, and
 
 As an example of community contribution, this project can also target the [SPIR-V](https://www.khronos.org/registry/spir-v/) intermediate representation. Please see the [doc](docs/SPIR-V.rst) for how HLSL features are mapped to SPIR-V, and the [wiki](https://github.com/microsoft/DirectXShaderCompiler/wiki/SPIR%E2%80%90V-CodeGen) page for how to build, use, and contribute to the SPIR-V CodeGen.
 
+### Metal CodeGen
+
+When built from source DXC can utilize the [Metal Shader
+Converter](https://developer.apple.com/metal/shader-converter/) if it is
+available during build and configuration time. This allows DXC to generate Metal
+shader libraries directly using the `-metal` flag.
+
+Note: DXC cannot currently disassemble Metal shaders so the `-Fc` flag cannot be
+used in conjunction with the `-Fo` flag.
+
 ## Building Sources
 
 See the full documentation for [Building and testing DXC](docs/BuildingAndTestingDXC.rst) for detailed instructions.
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 01b30568a9..4541d08162 100644
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -568,3 +568,12 @@ else()
 endif()
 
 string(REPLACE " " ";" LLVM_BINDINGS_LIST "${LLVM_BINDINGS}")
+
+# HLSL Change Begin - Metal IR Converter
+find_package(MetalIRConverter)
+if (METAL_IRCONVERTER_FOUND)
+  set(ENABLE_METAL_CODEGEN On)
+  message(STATUS "Enabling Metal Support")
+  add_definitions(-DENABLE_METAL_CODEGEN)
+endif()
+# HLSL Change End - Metal IR Converter
diff --git a/cmake/modules/FindMetalIRConverter.cmake b/cmake/modules/FindMetalIRConverter.cmake
new file mode 100644
index 0000000000..fc7df1d6cc
--- /dev/null
+++ b/cmake/modules/FindMetalIRConverter.cmake
@@ -0,0 +1,16 @@
+find_path(METAL_IRCONVERTER_INCLUDE_DIR metal_irconverter.h
+          HINTS /usr/local/include/metal_irconverter
+          DOC "Path to metal IR converter headers"
+          )
+
+find_library(METAL_IRCONVERTER_LIB NAMES metalirconverter
+  PATH_SUFFIXES lib
+  )
+
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(METAL_IRCONVERTER
+                                    REQUIRED_VARS METAL_IRCONVERTER_LIB METAL_IRCONVERTER_INCLUDE_DIR)
+
+message(STATUS "Metal IR Converter Include Dir: ${METAL_IRCONVERTER_INCLUDE_DIR}")
+message(STATUS "Metal IR Converter Library: ${METAL_IRCONVERTER_LIB}")
+mark_as_advanced(METAL_IRCONVERTER_LIB METAL_IRCONVERTER_INCLUDE_DIR)
diff --git a/include/dxc/Support/HLSLOptions.h b/include/dxc/Support/HLSLOptions.h
index 887591ae82..56e95a1659 100644
--- a/include/dxc/Support/HLSLOptions.h
+++ b/include/dxc/Support/HLSLOptions.h
@@ -274,6 +274,8 @@ class DxcOpts {
       SpirvOptions; // All SPIR-V CodeGen-related options
 #endif
   // SPIRV Change Ends
+
+  bool GenMetal = false; // OPT_metal
 };
 
 /// Use this class to capture, convert and handle the lifetime for the
diff --git a/include/dxc/Support/HLSLOptions.td b/include/dxc/Support/HLSLOptions.td
index 130e19a525..ea000f4877 100644
--- a/include/dxc/Support/HLSLOptions.td
+++ b/include/dxc/Support/HLSLOptions.td
@@ -346,6 +346,11 @@ def disable_exception_handling : Flag<["-", "/"], "disable-exception-handling">,
 def skip_serialization : Flag<["-", "/"], "skip-serialization">, Group<hlslcore_Group>, Flags<[CoreOption, HelpHidden]>,
   HelpText<"Return a module interface instead of serialized output">;
 
+def metal : Flag<["-"], "metal">,
+            Group<spirv_Group>,
+            Flags<[CoreOption, DriverOption]>,
+            HelpText<"Generate Metal code">;
+
 // SPIRV Change Starts
 def spirv : Flag<["-"], "spirv">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
   HelpText<"Generate SPIR-V code">;
diff --git a/lib/DxcSupport/HLSLOptions.cpp b/lib/DxcSupport/HLSLOptions.cpp
index 3daf880f6d..1ce7d0dfc0 100644
--- a/lib/DxcSupport/HLSLOptions.cpp
+++ b/lib/DxcSupport/HLSLOptions.cpp
@@ -1089,6 +1089,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 
   addDiagnosticArgs(Args, OPT_W_Group, OPT_W_value_Group, opts.Warnings);
 
+  opts.GenMetal = Args.hasFlag(OPT_metal, OPT_INVALID, false);
+
   // SPIRV Change Starts
 #ifdef ENABLE_SPIRV_CODEGEN
   opts.GenSPIRV = Args.hasFlag(OPT_spirv, OPT_INVALID, false);
@@ -1313,6 +1315,21 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 #endif // ENABLE_SPIRV_CODEGEN
   // SPIRV Change Ends
 
+#ifndef ENABLE_METAL_CODEGEN
+  if (opts.GenMetal) {
+    errors << "Metal CodeGen not available. "
+              "Please rebuild with Metal IR Converter installed.";
+    return 1;
+  }
+#endif
+
+  if (opts.GenMetal) {
+    if (!opts.AssemblyCode.empty() || opts.OutputObject.empty()) {
+      errors << "Disassembly of Metal IR not supported (yet).";
+      return 1;
+    }
+  }
+
   // Validation for DebugInfo here because spirv uses same DebugInfo opt,
   // and legacy wrappers will add EmbedDebug in this case, leading to this
   // failing if placed before spirv path sets DebugInfo to true.
diff --git a/tools/clang/test/DXC/metal.test b/tools/clang/test/DXC/metal.test
new file mode 100644
index 0000000000..3d00850abc
--- /dev/null
+++ b/tools/clang/test/DXC/metal.test
@@ -0,0 +1,7 @@
+// REQUIRES: metal
+
+// Metal libraries are LLVM bitcode. This check inspects the magic number from
+// the metal library output.
+// RUN: %dxc %S/Inputs/smoke.hlsl  /T ps_6_0 -metal -Fo Tmp.metal
+// RUN: head -c 4 Tmp.metal | FileCheck -check-prefix=MTL %s
+// MTL: {{^MTLB}}
diff --git a/tools/clang/test/DXC/no_metal.test b/tools/clang/test/DXC/no_metal.test
new file mode 100644
index 0000000000..37af16cad5
--- /dev/null
+++ b/tools/clang/test/DXC/no_metal.test
@@ -0,0 +1,4 @@
+// UNSUPPORTED: metal
+
+// RUN:not %dxc %S/Inputs/smoke.hlsl  /T ps_6_0 -metal 2>&1 | FileCheck %s
+// CHECK:Metal CodeGen not available
diff --git a/tools/clang/test/DXC/no_metal_disassembly.test b/tools/clang/test/DXC/no_metal_disassembly.test
new file mode 100644
index 0000000000..44283a8fe8
--- /dev/null
+++ b/tools/clang/test/DXC/no_metal_disassembly.test
@@ -0,0 +1,7 @@
+// REQUIRES: metal
+
+// These cases both fail because the shader converter library cannot emit
+// textual IR.
+// RUN: not %dxc %S/Inputs/smoke.hlsl  /T ps_6_0 -metal -Fo Tmp.metal -Fc Tmp.air 2>&1 | FileCheck %s
+// RUN: not %dxc %S/Inputs/smoke.hlsl  /T ps_6_0 -metal 2>&1 | FileCheck %s
+// CHECK: Disassembly of Metal IR not supported (yet).
diff --git a/tools/clang/test/lit.cfg b/tools/clang/test/lit.cfg
index 5fc5d4a27c..a3a352071c 100644
--- a/tools/clang/test/lit.cfg
+++ b/tools/clang/test/lit.cfg
@@ -504,6 +504,9 @@ if config.enable_backtrace == "1":
 if config.spirv:
     config.available_features.add("spirv")
 
+if config.metal:
+    config.available_features.add("metal")
+
 # Check supported dxil version
 def get_dxil_version():
     result = subprocess.run([lit.util.which('dxc', llvm_tools_dir), "--version"], stdout=subprocess.PIPE)
diff --git a/tools/clang/test/lit.site.cfg.in b/tools/clang/test/lit.site.cfg.in
index 207450add5..80dcadf288 100644
--- a/tools/clang/test/lit.site.cfg.in
+++ b/tools/clang/test/lit.site.cfg.in
@@ -22,6 +22,7 @@ config.enable_backtrace = "@ENABLE_BACKTRACES@"
 config.host_arch = "@HOST_ARCH@"
 config.spirv = "@ENABLE_SPIRV_CODEGEN@" =="ON"
 config.hlsl_headers_dir = "@HLSL_HEADERS_DIR@" # HLSL change
+config.metal = "@ENABLE_METAL_CODEGEN@".upper() == "ON" # HLSL change
 
 # Support substitution of the tools and libs dirs with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
diff --git a/tools/clang/tools/dxcompiler/CMakeLists.txt b/tools/clang/tools/dxcompiler/CMakeLists.txt
index 004d2e5ad1..c69e276194 100644
--- a/tools/clang/tools/dxcompiler/CMakeLists.txt
+++ b/tools/clang/tools/dxcompiler/CMakeLists.txt
@@ -136,6 +136,14 @@ target_link_libraries(dxcompiler PRIVATE ${LIBRARIES})
 if (ENABLE_SPIRV_CODEGEN)
   target_link_libraries(dxcompiler PRIVATE clangSPIRV)
 endif (ENABLE_SPIRV_CODEGEN)
+if (ENABLE_METAL_CODEGEN)
+  target_link_libraries(dxcompiler PRIVATE ${METAL_IRCONVERTER_LIB})
+  target_include_directories(dxcompiler PRIVATE ${METAL_IRCONVERTER_INCLUDE_DIR})
+
+  get_filename_component(METAL_IRCONVERTER_LIB_DIR ${METAL_IRCONVERTER_LIB} DIRECTORY CACHE)
+  set_property(TARGET dxcompiler APPEND_STRING
+               PROPERTY LINK_FLAGS " -Wl,-rpath,${METAL_IRCONVERTER_LIB_DIR}")
+endif (ENABLE_METAL_CODEGEN)
 include_directories(AFTER ${LLVM_INCLUDE_DIR}/dxc/Tracing ${DIASDK_INCLUDE_DIRS} ${HLSL_VERSION_LOCATION})
 include_directories(${LLVM_SOURCE_DIR}/tools/clang/tools/dxcvalidator)
 
diff --git a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
index c1c844d4be..a8f804bdca 100644
--- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
+++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
@@ -71,6 +71,10 @@
 #include "clang/Basic/Version.h"
 #endif // SUPPORT_QUERY_GIT_COMMIT_INFO
 
+#ifdef ENABLE_METAL_CODEGEN
+#include "metal_irconverter.h"
+#endif
+
 #define CP_UTF16 1200
 
 using namespace llvm;
@@ -817,6 +821,10 @@ class DxcCompiler : public IDxcCompiler3,
         }
         compiler.getLangOpts().IsHLSLLibrary = opts.IsLibraryProfile();
 
+        if (compiler.getLangOpts().IsHLSLLibrary && opts.GenMetal)
+          return ErrorWithString("Shader libraries unsupported in Metal (yet)",
+                                 riid, ppResult);
+
         // Clear entry function if library target
         if (compiler.getLangOpts().IsHLSLLibrary)
           compiler.getLangOpts().HLSLEntryFunction =
@@ -1107,7 +1115,86 @@ class DxcCompiler : public IDxcCompiler3,
                                               &pHashBlob));
             IFT(pResult->SetOutputObject(DXC_OUT_SHADER_HASH, pHashBlob));
           } // SUCCEEDED(valHR)
-        }   // compileOK && !opts.CodeGenHighLevel
+#ifdef ENABLE_METAL_CODEGEN
+          // This is a bit hacky because we don't currently have a good way to
+          // disassemble AIR.
+          if (opts.GenMetal && produceFullContainer &&
+              !opts.OutputObject.empty()) {
+            IRCompiler *MetalCompiler = IRCompilerCreate();
+            IRCompilerSetEntryPointName(
+                MetalCompiler,
+                compiler.getCodeGenOpts().HLSLEntryFunction.c_str());
+
+            IRObject *DXILObj = IRObjectCreateFromDXIL(
+                static_cast<const uint8_t *>(pOutputBlob->GetBufferPointer()),
+                pOutputBlob->GetBufferSize(), IRBytecodeOwnershipNone);
+
+            // Compile DXIL to Metal IR:
+            IRError *Error = nullptr;
+            IRObject *AIR = IRCompilerAllocCompileAndLink(MetalCompiler, NULL,
+                                                          DXILObj, &Error);
+
+            if (!AIR) {
+              IRObjectDestroy(DXILObj);
+              IRCompilerDestroy(MetalCompiler);
+              IRErrorDestroy(Error);
+              return ErrorWithString(
+                  "Error occurred in Metal Shader Conversion", riid, ppResult);
+            }
+
+            IRMetalLibBinary *MetalLib = IRMetalLibBinaryCreate();
+            IRShaderStage Stage = IRShaderStageInvalid;
+            const ShaderModel *SM = hlsl::ShaderModel::GetByName(
+                compiler.getLangOpts().HLSLProfile);
+            switch (SM->GetKind()) {
+            case DXIL::ShaderKind::Vertex:
+              Stage = IRShaderStageVertex;
+              break;
+            case DXIL::ShaderKind::Pixel:
+              Stage = IRShaderStageFragment;
+              break;
+            case DXIL::ShaderKind::Hull:
+              Stage = IRShaderStageHull;
+              break;
+            case DXIL::ShaderKind::Domain:
+              Stage = IRShaderStageDomain;
+              break;
+            case DXIL::ShaderKind::Mesh:
+              Stage = IRShaderStageMesh;
+              break;
+            case DXIL::ShaderKind::Amplification:
+              Stage = IRShaderStageAmplification;
+              break;
+            case DXIL::ShaderKind::Geometry:
+              Stage = IRShaderStageGeometry;
+              break;
+            case DXIL::ShaderKind::Compute:
+              Stage = IRShaderStageCompute;
+              break;
+            }
+            assert(Stage != IRShaderStageInvalid &&
+                   "Library targets not supported for Metal (yet).");
+            IRObjectGetMetalLibBinary(AIR, Stage, MetalLib);
+            size_t MetalLibSize = IRMetalLibGetBytecodeSize(MetalLib);
+            std::unique_ptr<uint8_t[]> MetalLibBytes =
+                std::unique_ptr<uint8_t[]>(new uint8_t[MetalLibSize]);
+            IRMetalLibGetBytecode(MetalLib, MetalLibBytes.get());
+
+            // Store the metallib to custom format or disk, or use to create a
+            // MTLLibrary.
+
+            CComPtr<IDxcBlob> MetalBlob;
+            IFT(hlsl::DxcCreateBlobOnHeapCopy(
+                MetalLibBytes.get(), (uint32_t)MetalLibSize, &MetalBlob));
+            std::swap(pOutputBlob, MetalBlob);
+
+            IRMetalLibBinaryDestroy(MetalLib);
+            IRObjectDestroy(DXILObj);
+            IRObjectDestroy(AIR);
+            IRCompilerDestroy(MetalCompiler);
+          }
+#endif
+        } // compileOK && !opts.CodeGenHighLevel
       }
 
       std::string remarks;

From 4d3a2f5489fd9f438f13b2308e767a93882d4728 Mon Sep 17 00:00:00 2001
From: Chris B <cbieneman@microsoft.com>
Date: Fri, 7 Mar 2025 17:28:14 -0600
Subject: [PATCH 16/88] [NFC] Improve time tracing data (#7146)

This is a bunch of small changes to improve the quality of the time
traces. This mostly adds new timers breakign down dxcompilerobj and the
always inliner code.
---
 lib/DxilContainer/DxilContainerAssembler.cpp   |  2 ++
 lib/Transforms/Utils/CloneFunction.cpp         |  6 +++++-
 lib/Transforms/Utils/InlineFunction.cpp        | 16 +++++++++++-----
 tools/clang/lib/CodeGen/CodeGenModule.cpp      |  6 ++++++
 tools/clang/lib/Parse/ParseAST.cpp             | 16 ++++++++++------
 tools/clang/tools/dxcompiler/dxcompilerobj.cpp |  5 +++++
 6 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/lib/DxilContainer/DxilContainerAssembler.cpp b/lib/DxilContainer/DxilContainerAssembler.cpp
index 0b7f5dd467..f0d7bf6d23 100644
--- a/lib/DxilContainer/DxilContainerAssembler.cpp
+++ b/lib/DxilContainer/DxilContainerAssembler.cpp
@@ -37,6 +37,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/MD5.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include <algorithm>
 #include <assert.h> // Needed for DxilPipelineStateValidation.h
@@ -1895,6 +1896,7 @@ void hlsl::SerializeDxilContainerForModule(
     DxilShaderHash *pShaderHashOut, AbstractMemoryStream *pReflectionStreamOut,
     AbstractMemoryStream *pRootSigStreamOut, void *pPrivateData,
     size_t PrivateDataSize) {
+  llvm::TimeTraceScope TimeScope("SerializeDxilContainer", StringRef(""));
   // TODO: add a flag to update the module and remove information that is not
   // part of DXIL proper and is used only to assemble the container.
 
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index f0d2dbcd7a..46294b3db8 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -13,7 +13,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -29,7 +28,9 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <map>
@@ -473,6 +474,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
                                      const char *NameSuffix, 
                                      ClonedCodeInfo *CodeInfo,
                                      CloningDirector *Director) {
+  TimeTraceScope TimeScope("CloneAndPruneIntoFromInst", [&] {
+    return (Twine(OldFunc->getName()) + "->" + NewFunc->getName()).str();
+  });
   assert(NameSuffix && "NameSuffix cannot be null!");
 
   ValueMapTypeRemapper *TypeMapper = nullptr;
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index f6a255a0e4..bfa4b61fbe 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -12,10 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -24,13 +23,13 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
@@ -38,8 +37,10 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -291,6 +292,8 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
 /// non-aliasing property communicated by the metadata could have
 /// call-site-specific control dependencies).
 static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
+  TimeTraceScope TimeScope("CloneAliasScopeMetadata",
+                           [&] { return CS.getCalledFunction()->getName(); });
   const Function *CalledFunc = CS.getCalledFunction();
   SetVector<const MDNode *> MD;
 
@@ -401,6 +404,8 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
 /// non-derived loads, stores and memory intrinsics with the new alias scopes.
 static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
                                   const DataLayout &DL, AliasAnalysis *AA) {
+  TimeTraceScope TimeScope("AddAliasScopeMetadata",
+                           [&] { return CS.getCalledFunction()->getName(); });
   if (!EnableNoAliasConversion)
     return;
 
@@ -872,6 +877,7 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx,
 /// to encode location where these instructions are inlined.
 static void fixupLineNumbers(Function *Fn, Function::iterator FI,
                              Instruction *TheCall) {
+  TimeTraceScope TimeScope("fixupLineNumbers", [&] { return Fn->getName(); });
   DebugLoc TheCallDL = TheCall->getDebugLoc();
 #if 0 // HLSL Change
   if (!TheCallDL)
diff --git a/tools/clang/lib/CodeGen/CodeGenModule.cpp b/tools/clang/lib/CodeGen/CodeGenModule.cpp
index 73ad296d47..b274ea9d64 100644
--- a/tools/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/tools/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3376,6 +3376,12 @@ void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) {
 
 /// EmitTopLevelDecl - Emit code for a single top level declaration.
 void CodeGenModule::EmitTopLevelDecl(Decl *D) {
+  llvm::TimeTraceScope TimeScope("CGM::EmitTopLevelDecl", [&] {
+    if (const auto *ND = dyn_cast<NamedDecl>(D))
+      return ND->getName();
+    return StringRef("Unnamed decl");
+  });
+
   // Ignore dependent declarations.
   if (D->getDeclContext() && D->getDeclContext()->isDependentContext())
     return;
diff --git a/tools/clang/lib/Parse/ParseAST.cpp b/tools/clang/lib/Parse/ParseAST.cpp
index e06a4ee09e..c8009b9b53 100644
--- a/tools/clang/lib/Parse/ParseAST.cpp
+++ b/tools/clang/lib/Parse/ParseAST.cpp
@@ -100,8 +100,6 @@ void clang::ParseAST(Preprocessor &PP, ASTConsumer *Consumer,
 
 void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) {
 
-  // HLSL Change - Support hierarchial time tracing.
-  llvm::TimeTraceScope TimeScope("Frontend", StringRef(""));
   // Collect global stats on Decls/Stmts (until we have a module streamer).
   if (PrintStats) {
     Decl::EnableStatistics();
@@ -137,6 +135,8 @@ void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) {
     External->StartTranslationUnit(Consumer);
 
   if (!S.getDiagnostics().hasUnrecoverableErrorOccurred()) {  // HLSL Change: Skip if fatal error already occurred
+    // HLSL Change - Support hierarchial time tracing.
+    llvm::TimeTraceScope TimeScope("Frontend", StringRef(""));
     if (P.ParseTopLevelDecl(ADecl)) {
       if (!External && !S.getLangOpts().CPlusPlus)
         P.Diag(diag::ext_empty_translation_unit);
@@ -151,10 +151,14 @@ void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) {
     }
   } // HLSL Change: Skip if fatal error already occurred
 
-  // Process any TopLevelDecls generated by #pragma weak.
-  for (Decl *D : S.WeakTopLevelDecls())
-    Consumer->HandleTopLevelDecl(DeclGroupRef(D));
-  
+  {
+    // HLSL Change - Support hierarchial time tracing.
+    llvm::TimeTraceScope TimeScope("Frontend - Consumer", StringRef(""));
+    // Process any TopLevelDecls generated by #pragma weak.
+    for (Decl *D : S.WeakTopLevelDecls())
+      Consumer->HandleTopLevelDecl(DeclGroupRef(D));
+  }
+
   // HLSL Change Starts
   // Provide the opportunity to generate translation-unit level validation
   // errors in the front-end, without relying on code generation being
diff --git a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
index a8f804bdca..ab66838b66 100644
--- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
+++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
@@ -722,6 +722,7 @@ class DxcCompiler : public IDxcCompiler3,
       bool validateRootSigContainer = false;
 
       if (isPreprocessing) {
+        TimeTraceScope TimeScope("PreprocessAction", StringRef(""));
         // These settings are back-compatible with fxc.
         clang::PreprocessorOutputOptions &PPOutOpts =
             compiler.getPreprocessorOutputOpts();
@@ -867,6 +868,7 @@ class DxcCompiler : public IDxcCompiler3,
       compiler.getTarget().adjust(compiler.getLangOpts());
 
       if (opts.AstDump) {
+        TimeTraceScope TimeScope("DumpAST", StringRef(""));
         clang::ASTDumpAction dumpAction;
         // Consider - ASTDumpFilter, ASTDumpLookups
         compiler.getFrontendOpts().ASTDumpDecls = true;
@@ -876,6 +878,7 @@ class DxcCompiler : public IDxcCompiler3,
         dumpAction.EndSourceFile();
         outStream.flush();
       } else if (opts.DumpDependencies) {
+        TimeTraceScope TimeScope("DumpDependencies", StringRef(""));
         auto dependencyCollector = std::make_shared<DependencyCollector>();
         compiler.addDependencyCollector(dependencyCollector);
         compiler.createPreprocessor(clang::TranslationUnitKind::TU_Complete);
@@ -978,6 +981,7 @@ class DxcCompiler : public IDxcCompiler3,
         EmitBCAction action(&llvmContext);
         FrontendInputFile file(pUtf8SourceName, IK_HLSL);
         bool compileOK;
+        TimeTraceScope TimeScope("Compile Action", StringRef(""));
         if (action.BeginSourceFile(compiler, file)) {
           action.Execute();
           action.EndSourceFile();
@@ -1032,6 +1036,7 @@ class DxcCompiler : public IDxcCompiler3,
         // Do not create a container when there is only a a high-level
         // representation in the module.
         if (compileOK && !opts.CodeGenHighLevel) {
+          TimeTraceScope TimeScope("AssembleAndWriteContainer", StringRef(""));
           HRESULT valHR = S_OK;
           CComPtr<AbstractMemoryStream> pRootSigStream;
           IFT(CreateMemoryStream(DxcGetThreadMallocNoRef(),

From 50d1af5b645651b7ee4d4ef063bdc88c7d6790d4 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 10 Mar 2025 04:10:08 -0600
Subject: [PATCH 17/88] Respond to feedback

remove some stale elements. Add some HLSL type helper functions and add some new ones. Make resource type retreiveals type-safe. Add some parameter comments and names to make clearer what the effect of them are. Pass resource attribute to cbuffer/tbuffer creation. Clean up and clarify error messages. Remove redundant type canonization from type queries. Correct resclass of tbuffers. Use multimatch utility of verify to condense checks
---
 include/dxc/DXIL/DxilConstants.h              |  5 +++
 lib/DXIL/DxilUtil.cpp                         | 40 +++++++++----------
 tools/clang/include/clang/AST/HlslTypes.h     |  2 +-
 tools/clang/include/clang/Basic/Attr.td       | 17 +++++++-
 .../clang/Basic/DiagnosticSemaKinds.td        |  6 +--
 tools/clang/lib/AST/ASTContextHLSL.cpp        | 15 ++++---
 tools/clang/lib/AST/HlslTypes.cpp             | 22 ++++------
 tools/clang/lib/Sema/SemaDXR.cpp              |  2 +-
 tools/clang/lib/Sema/SemaHLSL.cpp             | 34 +++++++---------
 tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp   |  2 -
 .../hlsl/types/invalid-longvec-decls.hlsl     | 28 ++++++-------
 .../clang/test/SemaHLSL/incomplete-type.hlsl  | 16 ++------
 12 files changed, 90 insertions(+), 99 deletions(-)

diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
index ac894df1d6..b3c510a038 100644
--- a/include/dxc/DXIL/DxilConstants.h
+++ b/include/dxc/DXIL/DxilConstants.h
@@ -465,6 +465,11 @@ inline bool IsTBuffer(DXIL::ResourceKind ResourceKind) {
   return ResourceKind == DXIL::ResourceKind::TBuffer;
 }
 
+inline bool IsCTBuffer(DXIL::ResourceKind ResourceKind) {
+  return ResourceKind == DXIL::ResourceKind::CBuffer ||
+         ResourceKind == DXIL::ResourceKind::TBuffer;
+}
+
 /// Whether the resource kind is a FeedbackTexture.
 inline bool IsFeedbackTexture(DXIL::ResourceKind ResourceKind) {
   return ResourceKind == DXIL::ResourceKind::FeedbackTexture2D ||
diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp
index 757a0bc3ee..065f19d7d0 100644
--- a/lib/DXIL/DxilUtil.cpp
+++ b/lib/DXIL/DxilUtil.cpp
@@ -427,34 +427,34 @@ GetHLSLResourceProperties(llvm::Type *Ty) {
 
     if (name == "SamplerComparisonState")
       return RetType(
-          true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Sampler, false,
-                                       false, /*cmp or counter*/ true));
+          true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Sampler, /*UAV*/ false,
+                                       /*ROV*/ false, /*cmp or counter*/ true));
 
     if (name.startswith("AppendStructuredBuffer<"))
       return RetType(true, MakeResourceProperties(
                                hlsl::DXIL::ResourceKind::StructuredBuffer,
-                               false, false, /*cmp or counter*/ true));
+                               /*UAV*/ true, /*ROV*/ false, /*cmp or counter*/ true));
 
     if (name.startswith("ConsumeStructuredBuffer<"))
       return RetType(true, MakeResourceProperties(
                                hlsl::DXIL::ResourceKind::StructuredBuffer,
-                               false, false, /*cmp or counter*/ true));
+                               /*UAV*/ false, /*ROV*/ false, /*cmp or counter*/ true));
 
     if (name == "RaytracingAccelerationStructure")
       return RetType(true,
                      MakeResourceProperties(
                          hlsl::DXIL::ResourceKind::RTAccelerationStructure,
-                         false, false, false));
+                         /*UAV*/ false, /*ROV*/ false, false));
 
     if (name.startswith("ConstantBuffer<"))
       return RetType(true,
                      MakeResourceProperties(hlsl::DXIL::ResourceKind::CBuffer,
-                                            false, false, false));
+                                            /*UAV*/ false, /*ROV*/ false, false));
 
     if (name.startswith("TextureBuffer<"))
       return RetType(true,
                      MakeResourceProperties(hlsl::DXIL::ResourceKind::TBuffer,
-                                            false, false, false));
+                                            /*UAV*/ false, /*ROV*/ false, false));
 
     if (ConsumePrefix(name, "FeedbackTexture2D")) {
       hlsl::DXIL::ResourceKind kind = hlsl::DXIL::ResourceKind::Invalid;
@@ -464,7 +464,7 @@ GetHLSLResourceProperties(llvm::Type *Ty) {
         kind = hlsl::DXIL::ResourceKind::FeedbackTexture2D;
 
       if (name.startswith("<"))
-        return RetType(true, MakeResourceProperties(kind, false, false, false));
+        return RetType(true, MakeResourceProperties(kind, /*UAV*/ false, /*ROV*/ false, /*Cmp*/ false));
 
       return FalseRet;
     }
@@ -475,63 +475,63 @@ GetHLSLResourceProperties(llvm::Type *Ty) {
     if (name == "ByteAddressBuffer")
       return RetType(true,
                      MakeResourceProperties(hlsl::DXIL::ResourceKind::RawBuffer,
-                                            UAV, ROV, false));
+                                            UAV, ROV, /*Cmp*/ false));
 
     if (name.startswith("Buffer<"))
       return RetType(
           true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TypedBuffer,
-                                       UAV, ROV, false));
+                                       UAV, ROV, /*Cmp*/ false));
 
     if (name.startswith("StructuredBuffer<"))
       return RetType(true, MakeResourceProperties(
                                hlsl::DXIL::ResourceKind::StructuredBuffer, UAV,
-                               ROV, false));
+                               ROV, /*Cmp*/ false));
 
     if (ConsumePrefix(name, "Texture")) {
       if (name.startswith("1D<"))
         return RetType(
             true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture1D,
-                                         UAV, ROV, false));
+                                         UAV, ROV, /*Cmp*/ false));
 
       if (name.startswith("1DArray<"))
         return RetType(true, MakeResourceProperties(
                                  hlsl::DXIL::ResourceKind::Texture1DArray, UAV,
-                                 ROV, false));
+                                 ROV, /*Cmp*/ false));
 
       if (name.startswith("2D<"))
         return RetType(
             true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture2D,
-                                         UAV, ROV, false));
+                                         UAV, ROV, /*Cmp*/ false));
 
       if (name.startswith("2DArray<"))
         return RetType(true, MakeResourceProperties(
                                  hlsl::DXIL::ResourceKind::Texture2DArray, UAV,
-                                 ROV, false));
+                                 ROV, /*Cmp*/ false));
 
       if (name.startswith("3D<"))
         return RetType(
             true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture3D,
-                                         UAV, ROV, false));
+                                         UAV, ROV, /*Cmp*/ false));
 
       if (name.startswith("Cube<"))
         return RetType(
             true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TextureCube,
-                                         UAV, ROV, false));
+                                         UAV, ROV, /*Cmp*/ false));
 
       if (name.startswith("CubeArray<"))
         return RetType(true, MakeResourceProperties(
                                  hlsl::DXIL::ResourceKind::TextureCubeArray,
-                                 UAV, ROV, false));
+                                 UAV, ROV, /*Cmp*/ false));
 
       if (name.startswith("2DMS<"))
         return RetType(
             true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture2DMS,
-                                         UAV, ROV, false));
+                                         UAV, ROV, /*Cmp*/ false));
 
       if (name.startswith("2DMSArray<"))
         return RetType(true, MakeResourceProperties(
                                  hlsl::DXIL::ResourceKind::Texture2DMSArray,
-                                 UAV, ROV, false));
+                                 UAV, ROV, /*Cmp*/ false));
       return FalseRet;
     }
   }
diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h
index 5cd14cbe8a..9fd09b6539 100644
--- a/tools/clang/include/clang/AST/HlslTypes.h
+++ b/tools/clang/include/clang/AST/HlslTypes.h
@@ -388,7 +388,7 @@ clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandleInDeclContext(
     llvm::StringRef typeName, llvm::StringRef templateParamName,
     clang::InheritableAttr *Attr = nullptr);
 clang::CXXRecordDecl *DeclareConstantBufferViewType(clang::ASTContext &context,
-                                                    bool bTBuf);
+                                                    clang::InheritableAttr *Attr);
 clang::CXXRecordDecl *DeclareRayQueryType(clang::ASTContext &context);
 clang::CXXRecordDecl *DeclareResourceType(clang::ASTContext &context,
                                           bool bSampler);
diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td
index 29430e6d4c..bbc1263e20 100644
--- a/tools/clang/include/clang/Basic/Attr.td
+++ b/tools/clang/include/clang/Basic/Attr.td
@@ -960,16 +960,29 @@ def HLSLTessPatch : InheritableAttr {
 
 def HLSLStreamOutput : InheritableAttr {
   let Spellings = []; // No spellings!
-  let Args = [UnsignedArgument<"Vertices">];
+  // PrimVertices are the number of vertices that make up the streamed primitive.
+  // Points have 1. Lines have 2. Triangles have 3.
+  let Args = [UnsignedArgument<"PrimVertices">];
   let Subjects = SubjectList<[CXXRecord]>;
   let Documentation = [Undocumented];
 }
 
 def HLSLResource : InheritableAttr {
   let Spellings = []; // No spellings!
-  let Args = [UnsignedArgument<"ResKind">, UnsignedArgument<"ResClass">];
+  let Args = [UnsignedArgument<"ResKindUint">, UnsignedArgument<"ResClassUint">];
   let Subjects = SubjectList<[CXXRecord]>;
   let Documentation = [Undocumented];
+
+  // Add enum typed getters for safety and brevity.
+  let AdditionalMembers =
+  [{
+  hlsl::DXIL::ResourceKind getResKind() const {
+    return (hlsl::DXIL::ResourceKind)getResKindUint();
+  }
+  hlsl::DXIL::ResourceClass getResClass() const {
+    return (hlsl::DXIL::ResourceClass)getResClassUint();
+  }
+  }];
 }
 
 def HLSLNodeLaunch : InheritableAttr {
diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 4d81b25ccc..5f6b7effce 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7519,7 +7519,7 @@ def err_hlsl_half_load_store: Error<
   "LoadHalf and StoreHalf are not supported for min precision mode">;
 def err_hlsl_interfaces_cannot_inherit: Error<
   "interfaces cannot inherit from other types">;
-def err_hlsl_invalid_range_1_plus: Error<
+def err_hlsl_invalid_range_1_to_max: Error<
   "invalid value, valid range is between 1 and %0 inclusive">;
 def err_hlsl_matrix_member_bad_format: Error<
   "invalid format for matrix subscript '%0'">;
@@ -7852,9 +7852,7 @@ def err_hlsl_load_from_mesh_out_arrays: Error<
 def err_hlsl_out_indices_array_incorrect_access: Error<
    "a vector in out indices array must be accessed as a whole">;
 def err_hlsl_unsupported_long_vector: Error<
-   "Vectors of over %0 elements in %1 are not supported">;
-def err_hlsl_vector_too_long: Error<
-   "Vectors of over %0 elements in are not supported">;
+   "Vectors of over 4 elements in %0 are not supported">;
 def err_hlsl_logical_binop_scalar : Error<
    "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">;
 def err_hlsl_ternary_scalar : Error<
diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp
index 9bacfc8b42..0a64772d11 100644
--- a/tools/clang/lib/AST/ASTContextHLSL.cpp
+++ b/tools/clang/lib/AST/ASTContextHLSL.cpp
@@ -1131,25 +1131,24 @@ CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandleInDeclContext(
 }
 
 clang::CXXRecordDecl *
-hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, bool bTBuf) {
+hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, InheritableAttr *Attr) {
   // Create ConstantBufferView template declaration in translation unit scope
   // like other resource.
   // template<typename T> ConstantBuffer { int h; }
   DeclContext *DC = context.getTranslationUnitDecl();
+  DXASSERT(Attr, "Constbuffer types require an attribute");
 
-  BuiltinTypeDeclBuilder typeDeclBuilder(
-      DC, bTBuf ? "TextureBuffer" : "ConstantBuffer",
-      TagDecl::TagKind::TTK_Struct);
+  const char *TypeName = "ConstantBuffer";
+  if (IsTBuffer(cast<HLSLResourceAttr>(Attr)->getResKind()))
+    TypeName = "TextureBuffer";
+  BuiltinTypeDeclBuilder typeDeclBuilder(DC, TypeName, TagDecl::TagKind::TTK_Struct);
   (void)typeDeclBuilder.addTypeTemplateParam("T");
   typeDeclBuilder.startDefinition();
   CXXRecordDecl *templateRecordDecl = typeDeclBuilder.getRecordDecl();
 
   typeDeclBuilder.addField(
       "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle.
-
-  typeDeclBuilder.getRecordDecl()->addAttr(HLSLResourceAttr::CreateImplicit(
-      context, (unsigned)DXIL::ResourceKind::CBuffer,
-      (unsigned)DXIL::ResourceClass::CBuffer));
+  typeDeclBuilder.getRecordDecl()->addAttr(Attr);
 
   typeDeclBuilder.getRecordDecl();
 
diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp
index e9c443b9d7..41175e3d37 100644
--- a/tools/clang/lib/AST/HlslTypes.cpp
+++ b/tools/clang/lib/AST/HlslTypes.cpp
@@ -477,37 +477,32 @@ clang::QualType GetHLSLMatElementType(clang::QualType type) {
 
 // TODO: Add type cache to ASTContext.
 bool IsHLSLInputPatchType(QualType type) {
-  type = type.getCanonicalType();
   if (const HLSLTessPatchAttr *Attr = getAttr<HLSLTessPatchAttr>(type))
     return Attr->getIsInput();
   return false;
 }
 
 bool IsHLSLOutputPatchType(QualType type) {
-  type = type.getCanonicalType();
   if (const HLSLTessPatchAttr *Attr = getAttr<HLSLTessPatchAttr>(type))
     return !Attr->getIsInput();
   return false;
 }
 
 bool IsHLSLPointStreamType(QualType type) {
-  type = type.getCanonicalType();
   if (const HLSLStreamOutputAttr *Attr = getAttr<HLSLStreamOutputAttr>(type))
-    return Attr->getVertices() == 1;
+    return Attr->getPrimVertices() == (unsigned)DXIL::InputPrimitive::Point;
   return false;
 }
 
 bool IsHLSLLineStreamType(QualType type) {
-  type = type.getCanonicalType();
   if (const HLSLStreamOutputAttr *Attr = getAttr<HLSLStreamOutputAttr>(type))
-    return Attr->getVertices() == 2;
+    return Attr->getPrimVertices() == (unsigned)DXIL::InputPrimitive::Line;
   return false;
 }
 
 bool IsHLSLTriangleStreamType(QualType type) {
-  type = type.getCanonicalType();
   if (const HLSLStreamOutputAttr *Attr = getAttr<HLSLStreamOutputAttr>(type))
-    return Attr->getVertices() == 3;
+    return Attr->getPrimVertices() == (unsigned)DXIL::InputPrimitive::Triangle;
   return false;
 }
 
@@ -558,13 +553,13 @@ bool IsHLSLNodeType(clang::QualType type) {
 
 bool IsHLSLObjectWithImplicitMemberAccess(clang::QualType type) {
   if (const HLSLResourceAttr *Attr = getAttr<HLSLResourceAttr>(type))
-    return Attr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer;
+    return DXIL::IsCTBuffer(Attr->getResKind());
   return false;
 }
 
 bool IsHLSLObjectWithImplicitROMemberAccess(clang::QualType type) {
   if (const HLSLResourceAttr *Attr = getAttr<HLSLResourceAttr>(type))
-    return Attr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer;
+    return DXIL::IsCTBuffer(Attr->getResKind());
   return false;
 }
 
@@ -592,7 +587,7 @@ bool IsHLSLNodeOutputType(clang::QualType type) {
 
 bool IsHLSLStructuredBufferType(clang::QualType type) {
   if (const HLSLResourceAttr *Attr = getAttr<HLSLResourceAttr>(type))
-    return Attr->getResKind() == (unsigned)DXIL::ResourceKind::StructuredBuffer;
+    return Attr->getResKind() == DXIL::ResourceKind::StructuredBuffer;
   return false;
 }
 
@@ -799,10 +794,7 @@ QualType GetHLSLResourceResultType(QualType type) {
           dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
 
     const HLSLResourceAttr *Attr = getAttr<HLSLResourceAttr>(type);
-    if (Attr && (Attr->getResKind() ==
-                     (unsigned)DXIL::ResourceKind::FeedbackTexture2D ||
-                 Attr->getResKind() ==
-                     (unsigned)DXIL::ResourceKind::FeedbackTexture2DArray)) {
+    if (Attr && DXIL::IsFeedbackTexture(Attr->getResKind())) {
       // Feedback textures are write-only and the data is opaque,
       // so there is no result type per se.
       return {};
diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp
index d71dc2be4c..73ea9dd93c 100644
--- a/tools/clang/lib/Sema/SemaDXR.cpp
+++ b/tools/clang/lib/Sema/SemaDXR.cpp
@@ -812,7 +812,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload,
 
   if (ContainsLongVector(Payload->getType())) {
     S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector)
-        << DXIL::kDefaultMaxVectorLength << "payload parameters";
+        << "payload parameters";
     return;
   }
 
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 8abad632a2..a7d38dc1a6 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -942,11 +942,6 @@ GetOrCreateVectorSpecialization(ASTContext &context, Sema *sema,
            "otherwise vector handle cannot be looked up");
 #endif
 
-  // I don't think this is necessary.
-  CXXRecordDecl *Decl = vectorSpecializationType->getAsCXXRecordDecl();
-  if (GetHLSLVecSize(vectorSpecializationType) > DXIL::kDefaultMaxVectorLength)
-    Decl->setHasHLSLLongVector();
-
   return vectorSpecializationType;
 }
 
@@ -3610,9 +3605,9 @@ class HLSLExternalSource : public ExternalSemaSource {
           break;
         }
       } else if (kind == AR_OBJECT_CONSTANT_BUFFER) {
-        recordDecl = DeclareConstantBufferViewType(*m_context, /*bTBuf*/ false);
+        recordDecl = DeclareConstantBufferViewType(*m_context, Attr);
       } else if (kind == AR_OBJECT_TEXTURE_BUFFER) {
-        recordDecl = DeclareConstantBufferViewType(*m_context, /*bTBuf*/ true);
+        recordDecl = DeclareConstantBufferViewType(*m_context, Attr);
       } else if (kind == AR_OBJECT_RAY_QUERY) {
         recordDecl = DeclareRayQueryType(*m_context);
       } else if (kind == AR_OBJECT_HEAP_RESOURCE) {
@@ -4760,7 +4755,7 @@ class HLSLExternalSource : public ExternalSemaSource {
       return true;
     case AR_OBJECT_TEXTURE_BUFFER:
       ResKind = DXIL::ResourceKind::TBuffer;
-      ResClass = DXIL::ResourceClass::CBuffer;
+      ResClass = DXIL::ResourceClass::SRV;
       return true;
     case AR_OBJECT_FEEDBACKTEXTURE2D:
       ResKind = DXIL::ResourceKind::FeedbackTexture2D;
@@ -5219,7 +5214,7 @@ class HLSLExternalSource : public ExternalSemaSource {
       MaxLength = m_sema->getLangOpts().MaxHLSLVectorLength;
     if (!sintValue.isStrictlyPositive() ||
         sintValue.getLimitedValue() > MaxLength) {
-      m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_plus) << MaxLength;
+      m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_to_max) << MaxLength;
       return true;
     }
 
@@ -5245,7 +5240,7 @@ class HLSLExternalSource : public ExternalSemaSource {
     HLSLResourceAttr *ResAttr =
         Template->getTemplatedDecl()->getAttr<HLSLResourceAttr>();
     if (ResAttr &&
-        ResAttr->getResClass() == (unsigned)DXIL::ResourceClass::CBuffer) {
+        DXIL::IsCTBuffer(ResAttr->getResKind())) {
       if (TemplateArgList.size() == 1) {
         const TemplateArgumentLoc &argLoc = TemplateArgList[0];
         const TemplateArgument &arg = argLoc.getArgument();
@@ -5265,7 +5260,7 @@ class HLSLExternalSource : public ExternalSemaSource {
 
         if (ContainsLongVector(argType)) {
           m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector)
-              << DXIL::kDefaultMaxVectorLength << "cbuffers";
+              << "ConstantBuffers or TextureBuffers";
           return true;
         }
       }
@@ -5340,7 +5335,7 @@ class HLSLExternalSource : public ExternalSemaSource {
       if (ContainsLongVector(arg.getAsType())) {
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
-            << DXIL::kDefaultMaxVectorLength << "tessellation patches";
+            << "tessellation patches";
         return true;
       }
     } else if (Template->getTemplatedDecl()->hasAttr<HLSLStreamOutputAttr>()) {
@@ -5358,7 +5353,7 @@ class HLSLExternalSource : public ExternalSemaSource {
       if (ContainsLongVector(arg.getAsType())) {
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
-            << DXIL::kDefaultMaxVectorLength << "geometry streams";
+            << "geometry streams";
         return true;
       }
     }
@@ -5382,7 +5377,7 @@ class HLSLExternalSource : public ExternalSemaSource {
             // NOTE: IsValidTemplateArgumentType emits its own diagnostics
             return true;
           }
-          if (ResAttr && IsTyped((DXIL::ResourceKind)ResAttr->getResKind())) {
+          if (ResAttr && IsTyped(ResAttr->getResKind())) {
             // Check vectors for being too large.
             if (IsVectorType(m_sema, argType)) {
               unsigned NumElt = hlsl::GetElementCount(argType);
@@ -11626,7 +11621,7 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc,
   case AR_TOBJ_VECTOR:
     if (GetHLSLVecSize(ArgTy) > DXIL::kDefaultMaxVectorLength) {
       self->Diag(ArgLoc.getLocation(), diag::err_hlsl_unsupported_long_vector)
-          << DXIL::kDefaultMaxVectorLength << "node records";
+          << "node records";
       Empty = false;
       return false;
     }
@@ -14750,7 +14745,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
     RequireCompleteType(D.getLocStart(), qt, SD);
     if (ContainsLongVector(qt)) {
       Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector)
-          << DXIL::kDefaultMaxVectorLength << "cbuffers";
+          << "cbuffers or tbuffers";
       result = false;
     }
   }
@@ -15647,7 +15642,7 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) {
 static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) {
   if (ContainsLongVector(Arg->getType())) {
     S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector)
-        << DXIL::kDefaultMaxVectorLength << "user-defined struct parameter";
+        << "user-defined struct parameter";
     return true;
   }
   return false;
@@ -16385,15 +16380,16 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) {
 
   // Check general parameter characteristics
   // Would be nice to check for resources here as they crash the compiler now.
+  // See issue #7186.
   for (const auto *param : FD->params()) {
     if (ContainsLongVector(param->getType()))
       S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector)
-          << DXIL::kDefaultMaxVectorLength << "entry function parameters";
+          << "entry function parameters";
   }
 
   if (ContainsLongVector(FD->getReturnType()))
     S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector)
-        << DXIL::kDefaultMaxVectorLength << "entry function return type";
+        << "entry function return type";
 
   DXIL::ShaderKind Stage =
       ShaderModel::KindFromFullName(shaderAttr->getStage());
diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
index 2275c48114..a11f72b306 100644
--- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
+++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
@@ -524,13 +524,11 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
         if (ContainsLongVector(param->getType()))
           self->Diag(param->getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
-              << DXIL::kDefaultMaxVectorLength
               << "patch constant function parameters";
 
       if (ContainsLongVector(pPatchFnDecl->getReturnType()))
         self->Diag(pPatchFnDecl->getLocation(),
                    diag::err_hlsl_unsupported_long_vector)
-            << DXIL::kDefaultMaxVectorLength
             << "patch constant function return type";
     }
 
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
index 2d0f800121..142eb59f87 100644
--- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
@@ -27,27 +27,27 @@ struct LongVecTpl {
   vector<float,N> vec;
 };
 
-vector<float, NUM> global_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
-vector<float, NUM> global_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
-TYPE global_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
-TYPE global_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+vector<float, NUM> global_vec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+vector<float, NUM> global_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+TYPE global_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+TYPE global_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
 
 cbuffer BadBuffy {
-  vector<float, NUM> cb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
-  vector<float, NUM> cb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
-  TYPE cb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
-  TYPE cb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+  vector<float, NUM> cb_vec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  vector<float, NUM> cb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  TYPE cb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  TYPE cb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
 };
 
 tbuffer BadTuffy {
-  vector<float, NUM> tb_vec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
-  vector<float, NUM> tb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
-  TYPE tb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
-  TYPE tb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+  vector<float, NUM> tb_vec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  vector<float, NUM> tb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  TYPE tb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  TYPE tb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
 };
 
-ConstantBuffer< TYPE > const_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
-TextureBuffer< TYPE > tex_buf; // expected-error{{Vectors of over 4 elements in cbuffers are not supported}}
+ConstantBuffer< TYPE > const_buf; // expected-error{{Vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}}
+TextureBuffer< TYPE > tex_buf; // expected-error{{Vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}}
 
 [shader("pixel")]
 vector<float, NUM> main( // expected-error{{Vectors of over 4 elements in entry function return type are not supported}}
diff --git a/tools/clang/test/SemaHLSL/incomplete-type.hlsl b/tools/clang/test/SemaHLSL/incomplete-type.hlsl
index a2856f448e..b0d4f1da7f 100644
--- a/tools/clang/test/SemaHLSL/incomplete-type.hlsl
+++ b/tools/clang/test/SemaHLSL/incomplete-type.hlsl
@@ -3,19 +3,9 @@
 // Tests that the compiler is well-behaved with regard to uses of incomplete types.
 // Regression test for GitHub #2058, which crashed in this case.
 
-// expected-note@+8 {{forward declaration of 'S'}} expected-note@+8 {{forward declaration of 'S'}} expected-note@+8 {{forward declaration of 'S'}}
-// expected-note@+7 {{forward declaration of 'S'}} expected-note@+7 {{forward declaration of 'S'}} expected-note@+7 {{forward declaration of 'S'}}
-// expected-note@+6 {{forward declaration of 'S'}} expected-note@+6 {{forward declaration of 'S'}} expected-note@+6 {{forward declaration of 'S'}}
-// expected-note@+5 {{forward declaration of 'S'}} expected-note@+5 {{forward declaration of 'S'}} expected-note@+5 {{forward declaration of 'S'}}
-// expected-note@+4 {{forward declaration of 'S'}} expected-note@+4 {{forward declaration of 'S'}} expected-note@+4 {{forward declaration of 'S'}}
-// expected-note@+3 {{forward declaration of 'S'}} expected-note@+3 {{forward declaration of 'S'}} expected-note@+3 {{forward declaration of 'S'}}
-// expected-note@+2 {{forward declaration of 'S'}} expected-note@+2 {{forward declaration of 'S'}} expected-note@+2 {{forward declaration of 'S'}}
-// expected-note@+1 {{forward declaration of 'S'}} expected-note@+1 {{forward declaration of 'S'}} expected-note@+1 {{forward declaration of 'S'}}
-struct S;
-
-// expected-note@+2 {{template is declared here}}
-// expected-note@+1 {{template is declared here}} expected-note@+1 {{template is declared here}} expected-note@+1 {{template is declared here}}
-template <int N> struct T;
+
+struct S; // expected-note 24 {{forward declaration of 'S'}}
+template <int N> struct T; // expected-note 4 {{template is declared here}}
 
 ConstantBuffer<S> CB; // expected-error {{variable has incomplete type 'S'}}
 ConstantBuffer<T<1> > TB; // expected-error {{implicit instantiation of undefined template 'T<1>'}}

From eedab25273063edd04740d96174dcb8d799c44d7 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 10 Mar 2025 04:16:43 -0600
Subject: [PATCH 18/88] clang-format

---
 lib/DXIL/DxilUtil.cpp                         | 32 +++++++++++--------
 tools/clang/include/clang/AST/HlslTypes.h     |  5 +--
 tools/clang/include/clang/Basic/Attr.td       | 10 +++---
 .../clang/Basic/DiagnosticSemaKinds.td        |  8 ++---
 tools/clang/lib/AST/ASTContextHLSL.cpp        |  6 ++--
 tools/clang/lib/Sema/SemaHLSL.cpp             |  3 +-
 6 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp
index 065f19d7d0..f6ffd7f7e2 100644
--- a/lib/DXIL/DxilUtil.cpp
+++ b/lib/DXIL/DxilUtil.cpp
@@ -426,19 +426,21 @@ GetHLSLResourceProperties(llvm::Type *Ty) {
                                             false, false, false));
 
     if (name == "SamplerComparisonState")
-      return RetType(
-          true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Sampler, /*UAV*/ false,
-                                       /*ROV*/ false, /*cmp or counter*/ true));
+      return RetType(true, MakeResourceProperties(
+                               hlsl::DXIL::ResourceKind::Sampler, /*UAV*/ false,
+                               /*ROV*/ false, /*cmp or counter*/ true));
 
     if (name.startswith("AppendStructuredBuffer<"))
-      return RetType(true, MakeResourceProperties(
-                               hlsl::DXIL::ResourceKind::StructuredBuffer,
-                               /*UAV*/ true, /*ROV*/ false, /*cmp or counter*/ true));
+      return RetType(true,
+                     MakeResourceProperties(
+                         hlsl::DXIL::ResourceKind::StructuredBuffer,
+                         /*UAV*/ true, /*ROV*/ false, /*cmp or counter*/ true));
 
     if (name.startswith("ConsumeStructuredBuffer<"))
       return RetType(true, MakeResourceProperties(
                                hlsl::DXIL::ResourceKind::StructuredBuffer,
-                               /*UAV*/ false, /*ROV*/ false, /*cmp or counter*/ true));
+                               /*UAV*/ false, /*ROV*/ false,
+                               /*cmp or counter*/ true));
 
     if (name == "RaytracingAccelerationStructure")
       return RetType(true,
@@ -447,14 +449,14 @@ GetHLSLResourceProperties(llvm::Type *Ty) {
                          /*UAV*/ false, /*ROV*/ false, false));
 
     if (name.startswith("ConstantBuffer<"))
-      return RetType(true,
-                     MakeResourceProperties(hlsl::DXIL::ResourceKind::CBuffer,
-                                            /*UAV*/ false, /*ROV*/ false, false));
+      return RetType(
+          true, MakeResourceProperties(hlsl::DXIL::ResourceKind::CBuffer,
+                                       /*UAV*/ false, /*ROV*/ false, false));
 
     if (name.startswith("TextureBuffer<"))
-      return RetType(true,
-                     MakeResourceProperties(hlsl::DXIL::ResourceKind::TBuffer,
-                                            /*UAV*/ false, /*ROV*/ false, false));
+      return RetType(
+          true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TBuffer,
+                                       /*UAV*/ false, /*ROV*/ false, false));
 
     if (ConsumePrefix(name, "FeedbackTexture2D")) {
       hlsl::DXIL::ResourceKind kind = hlsl::DXIL::ResourceKind::Invalid;
@@ -464,7 +466,9 @@ GetHLSLResourceProperties(llvm::Type *Ty) {
         kind = hlsl::DXIL::ResourceKind::FeedbackTexture2D;
 
       if (name.startswith("<"))
-        return RetType(true, MakeResourceProperties(kind, /*UAV*/ false, /*ROV*/ false, /*Cmp*/ false));
+        return RetType(true,
+                       MakeResourceProperties(kind, /*UAV*/ false,
+                                              /*ROV*/ false, /*Cmp*/ false));
 
       return FalseRet;
     }
diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h
index 9fd09b6539..e6a50de8fb 100644
--- a/tools/clang/include/clang/AST/HlslTypes.h
+++ b/tools/clang/include/clang/AST/HlslTypes.h
@@ -387,8 +387,9 @@ clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandleInDeclContext(
     clang::ASTContext &context, clang::DeclContext *declContext,
     llvm::StringRef typeName, llvm::StringRef templateParamName,
     clang::InheritableAttr *Attr = nullptr);
-clang::CXXRecordDecl *DeclareConstantBufferViewType(clang::ASTContext &context,
-                                                    clang::InheritableAttr *Attr);
+clang::CXXRecordDecl *
+DeclareConstantBufferViewType(clang::ASTContext &context,
+                              clang::InheritableAttr *Attr);
 clang::CXXRecordDecl *DeclareRayQueryType(clang::ASTContext &context);
 clang::CXXRecordDecl *DeclareResourceType(clang::ASTContext &context,
                                           bool bSampler);
diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td
index bbc1263e20..9e48df51fd 100644
--- a/tools/clang/include/clang/Basic/Attr.td
+++ b/tools/clang/include/clang/Basic/Attr.td
@@ -960,8 +960,8 @@ def HLSLTessPatch : InheritableAttr {
 
 def HLSLStreamOutput : InheritableAttr {
   let Spellings = []; // No spellings!
-  // PrimVertices are the number of vertices that make up the streamed primitive.
-  // Points have 1. Lines have 2. Triangles have 3.
+  // PrimVertices are the number of vertices that make up the streamed
+  // primitive. Points have 1. Lines have 2. Triangles have 3.
   let Args = [UnsignedArgument<"PrimVertices">];
   let Subjects = SubjectList<[CXXRecord]>;
   let Documentation = [Undocumented];
@@ -969,13 +969,13 @@ def HLSLStreamOutput : InheritableAttr {
 
 def HLSLResource : InheritableAttr {
   let Spellings = []; // No spellings!
-  let Args = [UnsignedArgument<"ResKindUint">, UnsignedArgument<"ResClassUint">];
+  let Args = [UnsignedArgument<"ResKindUint">,
+              UnsignedArgument<"ResClassUint">];
   let Subjects = SubjectList<[CXXRecord]>;
   let Documentation = [Undocumented];
 
   // Add enum typed getters for safety and brevity.
-  let AdditionalMembers =
-  [{
+  let AdditionalMembers = [{
   hlsl::DXIL::ResourceKind getResKind() const {
     return (hlsl::DXIL::ResourceKind)getResKindUint();
   }
diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 5f6b7effce..64eebfeb0e 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7519,8 +7519,8 @@ def err_hlsl_half_load_store: Error<
   "LoadHalf and StoreHalf are not supported for min precision mode">;
 def err_hlsl_interfaces_cannot_inherit: Error<
   "interfaces cannot inherit from other types">;
-def err_hlsl_invalid_range_1_to_max: Error<
-  "invalid value, valid range is between 1 and %0 inclusive">;
+def err_hlsl_invalid_range_1_to_max
+    : Error<"invalid value, valid range is between 1 and %0 inclusive">;
 def err_hlsl_matrix_member_bad_format: Error<
   "invalid format for matrix subscript '%0'">;
 def err_hlsl_matrix_member_empty: Error<
@@ -7851,8 +7851,8 @@ def err_hlsl_load_from_mesh_out_arrays: Error<
    "output arrays of a mesh shader can not be read from">;
 def err_hlsl_out_indices_array_incorrect_access: Error<
    "a vector in out indices array must be accessed as a whole">;
-def err_hlsl_unsupported_long_vector: Error<
-   "Vectors of over 4 elements in %0 are not supported">;
+def err_hlsl_unsupported_long_vector
+    : Error<"Vectors of over 4 elements in %0 are not supported">;
 def err_hlsl_logical_binop_scalar : Error<
    "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">;
 def err_hlsl_ternary_scalar : Error<
diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp
index 0a64772d11..870d032d39 100644
--- a/tools/clang/lib/AST/ASTContextHLSL.cpp
+++ b/tools/clang/lib/AST/ASTContextHLSL.cpp
@@ -1131,7 +1131,8 @@ CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandleInDeclContext(
 }
 
 clang::CXXRecordDecl *
-hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, InheritableAttr *Attr) {
+hlsl::DeclareConstantBufferViewType(clang::ASTContext &context,
+                                    InheritableAttr *Attr) {
   // Create ConstantBufferView template declaration in translation unit scope
   // like other resource.
   // template<typename T> ConstantBuffer { int h; }
@@ -1141,7 +1142,8 @@ hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, InheritableAttr
   const char *TypeName = "ConstantBuffer";
   if (IsTBuffer(cast<HLSLResourceAttr>(Attr)->getResKind()))
     TypeName = "TextureBuffer";
-  BuiltinTypeDeclBuilder typeDeclBuilder(DC, TypeName, TagDecl::TagKind::TTK_Struct);
+  BuiltinTypeDeclBuilder typeDeclBuilder(DC, TypeName,
+                                         TagDecl::TagKind::TTK_Struct);
   (void)typeDeclBuilder.addTypeTemplateParam("T");
   typeDeclBuilder.startDefinition();
   CXXRecordDecl *templateRecordDecl = typeDeclBuilder.getRecordDecl();
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index a7d38dc1a6..2de7004532 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -5239,8 +5239,7 @@ class HLSLExternalSource : public ExternalSemaSource {
     // Allow object type for Constant/TextureBuffer.
     HLSLResourceAttr *ResAttr =
         Template->getTemplatedDecl()->getAttr<HLSLResourceAttr>();
-    if (ResAttr &&
-        DXIL::IsCTBuffer(ResAttr->getResKind())) {
+    if (ResAttr && DXIL::IsCTBuffer(ResAttr->getResKind())) {
       if (TemplateArgList.size() == 1) {
         const TemplateArgumentLoc &argLoc = TemplateArgList[0];
         const TemplateArgument &arg = argLoc.getArgument();

From e9cf3d2b9693ca997b579c1fc1b5ab5af3df7c29 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 10 Mar 2025 04:49:24 -0600
Subject: [PATCH 19/88] Respond to feedback from a different PR

---
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 +-
 .../hlsl/types/invalid-longvec-decls-hs.hlsl  |  6 +-
 .../hlsl/types/invalid-longvec-decls.hlsl     | 98 +++++++++----------
 3 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 64eebfeb0e..9be040b8a0 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7852,7 +7852,7 @@ def err_hlsl_load_from_mesh_out_arrays: Error<
 def err_hlsl_out_indices_array_incorrect_access: Error<
    "a vector in out indices array must be accessed as a whole">;
 def err_hlsl_unsupported_long_vector
-    : Error<"Vectors of over 4 elements in %0 are not supported">;
+    : Error<"vectors of over 4 elements in %0 are not supported">;
 def err_hlsl_logical_binop_scalar : Error<
    "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">;
 def err_hlsl_ternary_scalar : Error<
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl
index 185233ad0f..1625454360 100644
--- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl
@@ -10,9 +10,9 @@ struct LongVec {
   vector<TYPE,NUM> vec;
 };
 
-HsConstantData PatchConstantFunction( // expected-error{{Vectors of over 4 elements in patch constant function return type are not supported}}
-				      vector<TYPE,NUM> vec : V, // expected-error{{Vectors of over 4 elements in patch constant function parameters are not supported}}
-				      LongVec lv : L) { // expected-error{{Vectors of over 4 elements in patch constant function parameters are not supported}}
+HsConstantData PatchConstantFunction( // expected-error{{vectors of over 4 elements in patch constant function return type are not supported}}
+				      vector<TYPE,NUM> vec : V, // expected-error{{vectors of over 4 elements in patch constant function parameters are not supported}}
+				      LongVec lv : L) { // expected-error{{vectors of over 4 elements in patch constant function parameters are not supported}}
   return (HsConstantData)0;
 }
 
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
index 142eb59f87..0604feeaec 100644
--- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
@@ -27,37 +27,37 @@ struct LongVecTpl {
   vector<float,N> vec;
 };
 
-vector<float, NUM> global_vec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
-vector<float, NUM> global_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
-TYPE global_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
-TYPE global_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+vector<float, NUM> global_vec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+vector<float, NUM> global_vec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+TYPE global_vec_rec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+TYPE global_vec_rec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
 
 cbuffer BadBuffy {
-  vector<float, NUM> cb_vec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
-  vector<float, NUM> cb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
-  TYPE cb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
-  TYPE cb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  vector<float, NUM> cb_vec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  vector<float, NUM> cb_vec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  TYPE cb_vec_rec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  TYPE cb_vec_rec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
 };
 
 tbuffer BadTuffy {
-  vector<float, NUM> tb_vec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
-  vector<float, NUM> tb_vec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
-  TYPE tb_vec_rec; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
-  TYPE tb_vec_rec_arr[10]; // expected-error{{Vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  vector<float, NUM> tb_vec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  vector<float, NUM> tb_vec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  TYPE tb_vec_rec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  TYPE tb_vec_rec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
 };
 
-ConstantBuffer< TYPE > const_buf; // expected-error{{Vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}}
-TextureBuffer< TYPE > tex_buf; // expected-error{{Vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}}
+ConstantBuffer< TYPE > const_buf; // expected-error{{vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}}
+TextureBuffer< TYPE > tex_buf; // expected-error{{vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}}
 
 [shader("pixel")]
-vector<float, NUM> main( // expected-error{{Vectors of over 4 elements in entry function return type are not supported}}
-                     vector<float, NUM> vec : V) : SV_Target { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+vector<float, NUM> main( // expected-error{{vectors of over 4 elements in entry function return type are not supported}}
+                     vector<float, NUM> vec : V) : SV_Target { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
   return vec;
 }
 
 [shader("vertex")]
-TYPE vs_main( // expected-error{{Vectors of over 4 elements in entry function return type are not supported}}
-                     TYPE parm : P) : SV_Target { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+TYPE vs_main( // expected-error{{vectors of over 4 elements in entry function return type are not supported}}
+                     TYPE parm : P) : SV_Target { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
   parm.f = 0;
   return parm;
 }
@@ -65,33 +65,33 @@ TYPE vs_main( // expected-error{{Vectors of over 4 elements in entry function re
 
 [shader("geometry")]
 [maxvertexcount(3)]
-void gs_point(line TYPE e, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
-              inout PointStream<TYPE> OutputStream0) {} // expected-error{{Vectors of over 4 elements in geometry streams are not supported}}
+void gs_point(line TYPE e, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+              inout PointStream<TYPE> OutputStream0) {} // expected-error{{vectors of over 4 elements in geometry streams are not supported}}
 
 [shader("geometry")]
 [maxvertexcount(12)]
-void gs_line(line TYPE a, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
-             inout LineStream<TYPE> OutputStream0) {} // expected-error{{Vectors of over 4 elements in geometry streams are not supported}}
+void gs_line(line TYPE a, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+             inout LineStream<TYPE> OutputStream0) {} // expected-error{{vectors of over 4 elements in geometry streams are not supported}}
 
 
 [shader("geometry")]
 [maxvertexcount(12)]
-void gs_line(line TYPE a, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
-             inout TriangleStream<TYPE> OutputStream0) {} // expected-error{{Vectors of over 4 elements in geometry streams are not supported}}
+void gs_line(line TYPE a, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+             inout TriangleStream<TYPE> OutputStream0) {} // expected-error{{vectors of over 4 elements in geometry streams are not supported}}
 
 [shader("domain")]
 [domain("tri")]
-void ds_main(OutputPatch<TYPE, 3> TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
+void ds_main(OutputPatch<TYPE, 3> TrianglePatch) {} // expected-error{{vectors of over 4 elements in tessellation patches are not supported}}
 
-void patch_const(InputPatch<TYPE, 3> inpatch, // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
-			   OutputPatch<TYPE, 3> outpatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
+void patch_const(InputPatch<TYPE, 3> inpatch, // expected-error{{vectors of over 4 elements in tessellation patches are not supported}}
+			   OutputPatch<TYPE, 3> outpatch) {} // expected-error{{vectors of over 4 elements in tessellation patches are not supported}}
 
 [shader("hull")]
 [domain("tri")]
 [outputtopology("triangle_cw")]
 [outputcontrolpoints(32)]
 [patchconstantfunc("patch_const")]
-void hs_main(InputPatch<TYPE, 3> TrianglePatch) {} // expected-error{{Vectors of over 4 elements in tessellation patches are not supported}}
+void hs_main(InputPatch<TYPE, 3> TrianglePatch) {} // expected-error{{vectors of over 4 elements in tessellation patches are not supported}}
 
 RaytracingAccelerationStructure RTAS;
 
@@ -116,42 +116,42 @@ struct [raypayload] DXRLongVecTpl {
 void raygen() {
   RTTYPE p = (RTTYPE)0;
   RayDesc ray = (RayDesc)0;
-  TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
-  CallShader(0, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
+  TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
+  CallShader(0, p); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
 }
 
 
 [shader("closesthit")]
-void closesthit(inout RTTYPE payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
-		in RTTYPE attribs ) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+void closesthit(inout RTTYPE payload, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+		in RTTYPE attribs ) { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
   RayDesc ray;
-  TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
-  CallShader(0, payload); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
+  TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
+  CallShader(0, payload); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
 }
 
 [shader("anyhit")]
-void AnyHit( inout RTTYPE payload, // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
-	      in RTTYPE attribs  ) // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+void AnyHit( inout RTTYPE payload, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+	      in RTTYPE attribs  ) // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
 {
 }
 
 [shader("miss")]
-void Miss(inout RTTYPE payload){ // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
+void Miss(inout RTTYPE payload){ // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
   RayDesc ray;
-  TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
-  CallShader(0, payload); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
+  TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
+  CallShader(0, payload); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
 }
 
 [shader("intersection")]
 void Intersection() {
   float hitT = RayTCurrent();
   RTTYPE attr = (RTTYPE)0;
-  bool bReported = ReportHit(hitT, 0, attr); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
+  bool bReported = ReportHit(hitT, 0, attr); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
 }
 
 [shader("callable")]
-void callable1(inout RTTYPE p) { // expected-error{{Vectors of over 4 elements in entry function parameters are not supported}}
-  CallShader(0, p); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
+void callable1(inout RTTYPE p) { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+  CallShader(0, p); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
 }
 
 groupshared LongVec as_pld;
@@ -159,7 +159,7 @@ groupshared LongVec as_pld;
 [shader("amplification")]
 [numthreads(1,1,1)]
 void Amp() {
-  DispatchMesh(1,1,1,as_pld); // expected-error{{Vectors of over 4 elements in user-defined struct parameter are not supported}}
+  DispatchMesh(1,1,1,as_pld); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
 }
 
 struct NodeLongVec {
@@ -183,18 +183,18 @@ struct NodeLongVecTpl {
 [NodeLaunch("broadcasting")]
 [NumThreads(8,1,1)]
 [NodeMaxDispatchGrid(8,1,1)]
-void broadcast(DispatchNodeInputRecord<NTYPE> input,  // expected-error{{Vectors of over 4 elements in node records are not supported}}
-                NodeOutput<TYPE> output) // expected-error{{Vectors of over 4 elements in node records are not supported}}
+void broadcast(DispatchNodeInputRecord<NTYPE> input,  // expected-error{{vectors of over 4 elements in node records are not supported}}
+                NodeOutput<TYPE> output) // expected-error{{vectors of over 4 elements in node records are not supported}}
 {
-  ThreadNodeOutputRecords<TYPE> touts; // expected-error{{Vectors of over 4 elements in node records are not supported}}
-  GroupNodeOutputRecords<TYPE> gouts; // expected-error{{Vectors of over 4 elements in node records are not supported}}
+  ThreadNodeOutputRecords<TYPE> touts; // expected-error{{vectors of over 4 elements in node records are not supported}}
+  GroupNodeOutputRecords<TYPE> gouts; // expected-error{{vectors of over 4 elements in node records are not supported}}
 }
 
 [Shader("node")]
 [NodeLaunch("coalescing")]
 [NumThreads(8,1,1)]
-void coalesce(GroupNodeInputRecords<TYPE> input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}}
+void coalesce(GroupNodeInputRecords<TYPE> input) {} // expected-error{{vectors of over 4 elements in node records are not supported}}
 
 [Shader("node")]
 [NodeLaunch("thread")]
-void threader(ThreadNodeInputRecord<TYPE> input) {} // expected-error{{Vectors of over 4 elements in node records are not supported}}
+void threader(ThreadNodeInputRecord<TYPE> input) {} // expected-error{{vectors of over 4 elements in node records are not supported}}

From cc0ddc23b5a8b6bc16329871b3c91900676090dd Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 10 Mar 2025 05:04:29 -0600
Subject: [PATCH 20/88] Rename long vector check func again

---
 tools/clang/include/clang/Sema/SemaHLSL.h   |  2 +-
 tools/clang/lib/Sema/SemaDXR.cpp            |  2 +-
 tools/clang/lib/Sema/SemaHLSL.cpp           | 16 ++++++++--------
 tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp |  4 ++--
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h
index 7e7400d390..d6103b55e6 100644
--- a/tools/clang/include/clang/Sema/SemaHLSL.h
+++ b/tools/clang/include/clang/Sema/SemaHLSL.h
@@ -128,7 +128,7 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema,
                                           const clang::InitListExpr *InitList,
                                           const clang::QualType EltTy);
 
-bool ContainsLongVector(clang::QualType qt);
+bool containsLongVector(clang::QualType qt);
 
 bool IsConversionToLessOrEqualElements(clang::Sema *self,
                                        const clang::ExprResult &sourceExpr,
diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp
index 73ea9dd93c..c3dfdb7c9f 100644
--- a/tools/clang/lib/Sema/SemaDXR.cpp
+++ b/tools/clang/lib/Sema/SemaDXR.cpp
@@ -810,7 +810,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload,
     return;
   }
 
-  if (ContainsLongVector(Payload->getType())) {
+  if (containsLongVector(Payload->getType())) {
     S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector)
         << "payload parameters";
     return;
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 2de7004532..dffa680a35 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -5257,7 +5257,7 @@ class HLSLExternalSource : public ExternalSemaSource {
         m_sema->RequireCompleteType(argSrcLoc, argType,
                                     diag::err_typecheck_decl_incomplete_type);
 
-        if (ContainsLongVector(argType)) {
+        if (containsLongVector(argType)) {
           m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector)
               << "ConstantBuffers or TextureBuffers";
           return true;
@@ -5331,7 +5331,7 @@ class HLSLExternalSource : public ExternalSemaSource {
       CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl();
       if (Decl && !Decl->isCompleteDefinition())
         return true;
-      if (ContainsLongVector(arg.getAsType())) {
+      if (containsLongVector(arg.getAsType())) {
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
             << "tessellation patches";
@@ -5349,7 +5349,7 @@ class HLSLExternalSource : public ExternalSemaSource {
       CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl();
       if (Decl && !Decl->isCompleteDefinition())
         return true;
-      if (ContainsLongVector(arg.getAsType())) {
+      if (containsLongVector(arg.getAsType())) {
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
             << "geometry streams";
@@ -12090,7 +12090,7 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType,
   return false;
 }
 
-bool hlsl::ContainsLongVector(QualType qt) {
+bool hlsl::containsLongVector(QualType qt) {
   if (qt.isNull() || qt->isDependentType())
     return false;
 
@@ -14742,7 +14742,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
       virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) {}
     } SD;
     RequireCompleteType(D.getLocStart(), qt, SD);
-    if (ContainsLongVector(qt)) {
+    if (containsLongVector(qt)) {
       Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector)
           << "cbuffers or tbuffers";
       result = false;
@@ -15639,7 +15639,7 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) {
 
 // Verify that user-defined intrinsic struct args contain no long vectors
 static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) {
-  if (ContainsLongVector(Arg->getType())) {
+  if (containsLongVector(Arg->getType())) {
     S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector)
         << "user-defined struct parameter";
     return true;
@@ -16381,12 +16381,12 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) {
   // Would be nice to check for resources here as they crash the compiler now.
   // See issue #7186.
   for (const auto *param : FD->params()) {
-    if (ContainsLongVector(param->getType()))
+    if (containsLongVector(param->getType()))
       S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector)
           << "entry function parameters";
   }
 
-  if (ContainsLongVector(FD->getReturnType()))
+  if (containsLongVector(FD->getReturnType()))
     S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector)
         << "entry function return type";
 
diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
index a11f72b306..11bb4c4f2f 100644
--- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
+++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
@@ -521,12 +521,12 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
         }
       }
       for (const auto *param : pPatchFnDecl->params())
-        if (ContainsLongVector(param->getType()))
+        if (containsLongVector(param->getType()))
           self->Diag(param->getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
               << "patch constant function parameters";
 
-      if (ContainsLongVector(pPatchFnDecl->getReturnType()))
+      if (containsLongVector(pPatchFnDecl->getReturnType()))
         self->Diag(pPatchFnDecl->getLocation(),
                    diag::err_hlsl_unsupported_long_vector)
             << "patch constant function return type";

From 66e7d23937c1608576942e73ad4bf97eeb2185fb Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 10 Mar 2025 13:08:58 -0600
Subject: [PATCH 21/88] Respond to feedback

Correct UAVness of consume/append buffers.

Add HLSL notes for changes to DeclCXX

Share more code in IsHLSLVecMatType
---
 lib/DXIL/DxilUtil.cpp             |  2 +-
 tools/clang/lib/AST/DeclCXX.cpp   |  7 ++++++-
 tools/clang/lib/AST/HlslTypes.cpp | 13 +------------
 tools/clang/lib/Sema/SemaHLSL.cpp |  6 ++----
 4 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp
index f6ffd7f7e2..865fad487c 100644
--- a/lib/DXIL/DxilUtil.cpp
+++ b/lib/DXIL/DxilUtil.cpp
@@ -439,7 +439,7 @@ GetHLSLResourceProperties(llvm::Type *Ty) {
     if (name.startswith("ConsumeStructuredBuffer<"))
       return RetType(true, MakeResourceProperties(
                                hlsl::DXIL::ResourceKind::StructuredBuffer,
-                               /*UAV*/ false, /*ROV*/ false,
+                               /*UAV*/ true, /*ROV*/ false,
                                /*cmp or counter*/ true));
 
     if (name == "RaytracingAccelerationStructure")
diff --git a/tools/clang/lib/AST/DeclCXX.cpp b/tools/clang/lib/AST/DeclCXX.cpp
index 5f8c186919..baed44667f 100644
--- a/tools/clang/lib/AST/DeclCXX.cpp
+++ b/tools/clang/lib/AST/DeclCXX.cpp
@@ -48,6 +48,7 @@ void LazyASTUnresolvedSet::getFromExternalSource(ASTContext &C) const {
 }
 
 CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D)
+    // HLSL Change Begin - Add HasLongVector and clang-format
     : UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0),
       Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false),
       Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true),
@@ -73,6 +74,7 @@ CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D)
       HasDeclaredCopyAssignmentWithConstParam(false), IsLambda(false),
       IsParsingBaseSpecifiers(false), HasHLSLLongVector(false), NumBases(0),
       NumVBases(0), Bases(), VBases(), Definition(D), FirstFriend() {}
+// HLSL Change End - Add HasLongVector and clang-format
 
 CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const {
   return Bases.get(Definition->getASTContext().getExternalSource());
@@ -201,9 +203,10 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases,
     if (!BaseClassDecl->isStandardLayout())
       data().IsStandardLayout = false;
 
-    // Propagate presence of long vector to child classes.
+    // HLSL Change Begin - Propagate presence of long vector to child classes.
     if (BaseClassDecl->hasHLSLLongVector())
       data().HasHLSLLongVector = true;
+    // HLSL Change End
 
     // Record if this base is the first non-literal field or base.
     if (!hasNonLiteralTypeFieldsOrBases() && !BaseType->isLiteralType(C))
@@ -387,8 +390,10 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) {
     data().NeedOverloadResolutionForDestructor = true;
   }
 
+  // HLSL Change Begin - Propagate presence of long vector to child classes.
   if (Subobj->hasHLSLLongVector())
     data().HasHLSLLongVector = true;
+  // HLSL Change End
 }
 
 /// Callback function for CXXRecordDecl::forallBases that acknowledges
diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp
index 41175e3d37..630e969881 100644
--- a/tools/clang/lib/AST/HlslTypes.cpp
+++ b/tools/clang/lib/AST/HlslTypes.cpp
@@ -68,18 +68,7 @@ template <typename AttrType> static AttrType *getAttr(clang::QualType type) {
 }
 
 bool IsHLSLVecMatType(clang::QualType type) {
-  type = type.getCanonicalType();
-  if (const RecordType *RT = type->getAs<RecordType>()) {
-    if (const auto *Spec =
-            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl()))
-      if (const auto *Template =
-              dyn_cast<ClassTemplateDecl>(Spec->getSpecializedTemplate()))
-        return Template->getTemplatedDecl()->getAttr<HLSLMatrixAttr>() ||
-               Template->getTemplatedDecl()->getAttr<HLSLVectorAttr>();
-    if (const auto *Decl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
-      return Decl->getAttr<HLSLMatrixAttr>() || Decl->getAttr<HLSLVectorAttr>();
-  }
-  return false;
+  return getAttr<HLSLMatrixAttr>(type) || getAttr<HLSLVectorAttr>(type);
 }
 
 bool IsHLSLMatType(clang::QualType type) {
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index dffa680a35..858b964cdf 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -4736,16 +4736,14 @@ class HLSLExternalSource : public ExternalSemaSource {
       ResKind = DXIL::ResourceKind::RawBuffer;
       ResClass = DXIL::ResourceClass::UAV;
       return true;
-    case AR_OBJECT_CONSUME_STRUCTURED_BUFFER:
-    case AR_OBJECT_APPEND_STRUCTURED_BUFFER:
-      // It may seem incorrect to make these SRV,
-      // but it is consistent with GetHLSLResourceProperties().
     case AR_OBJECT_STRUCTURED_BUFFER:
       ResKind = DXIL::ResourceKind::StructuredBuffer;
       ResClass = DXIL::ResourceClass::SRV;
       return true;
     case AR_OBJECT_RWSTRUCTURED_BUFFER:
     case AR_OBJECT_ROVSTRUCTURED_BUFFER:
+    case AR_OBJECT_CONSUME_STRUCTURED_BUFFER:
+    case AR_OBJECT_APPEND_STRUCTURED_BUFFER:
       ResKind = DXIL::ResourceKind::StructuredBuffer;
       ResClass = DXIL::ResourceClass::UAV;
       return true;

From 0102b3c4592682deacbae140af462f512ce325d1 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 10 Mar 2025 14:32:02 -0600
Subject: [PATCH 22/88] Use select indices instead of strings as parameters to
 longvec error

---
 .../clang/Basic/DiagnosticSemaKinds.td        |  8 ++++-
 tools/clang/lib/Sema/SemaDXR.cpp              |  3 +-
 tools/clang/lib/Sema/SemaHLSL.cpp             | 30 ++++++++++++-------
 tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp   | 12 +++++---
 4 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 9be040b8a0..de59f01c5d 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7852,7 +7852,13 @@ def err_hlsl_load_from_mesh_out_arrays: Error<
 def err_hlsl_out_indices_array_incorrect_access: Error<
    "a vector in out indices array must be accessed as a whole">;
 def err_hlsl_unsupported_long_vector
-    : Error<"vectors of over 4 elements in %0 are not supported">;
+    : Error<"vectors of over 4 elements in %select{
+    ConstantBuffers or TextureBuffers|
+    tessellation patches|geometry streams|node records|
+    cbuffers or tbuffers|user-defined struct parameter|
+    entry function parameters|entry function return type|
+    patch constant function parameters|patch constant function return type|
+    payload parameters}0 are not supported">;
 def err_hlsl_logical_binop_scalar : Error<
    "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">;
 def err_hlsl_ternary_scalar : Error<
diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp
index c3dfdb7c9f..0f27de8291 100644
--- a/tools/clang/lib/Sema/SemaDXR.cpp
+++ b/tools/clang/lib/Sema/SemaDXR.cpp
@@ -811,8 +811,9 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload,
   }
 
   if (containsLongVector(Payload->getType())) {
+    const unsigned PayloadParametersIdx = 10;
     S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector)
-        << "payload parameters";
+        << PayloadParametersIdx;
     return;
   }
 
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 858b964cdf..2d1873fd55 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -5256,8 +5256,9 @@ class HLSLExternalSource : public ExternalSemaSource {
                                     diag::err_typecheck_decl_incomplete_type);
 
         if (containsLongVector(argType)) {
+          const unsigned ConstantBuffersOrTextureBuffersIdx = 0;
           m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector)
-              << "ConstantBuffers or TextureBuffers";
+              << ConstantBuffersOrTextureBuffersIdx;
           return true;
         }
       }
@@ -5330,9 +5331,10 @@ class HLSLExternalSource : public ExternalSemaSource {
       if (Decl && !Decl->isCompleteDefinition())
         return true;
       if (containsLongVector(arg.getAsType())) {
+        const unsigned TessellationPatchesIDx = 1;
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
-            << "tessellation patches";
+            << TessellationPatchesIDx;
         return true;
       }
     } else if (Template->getTemplatedDecl()->hasAttr<HLSLStreamOutputAttr>()) {
@@ -5348,9 +5350,10 @@ class HLSLExternalSource : public ExternalSemaSource {
       if (Decl && !Decl->isCompleteDefinition())
         return true;
       if (containsLongVector(arg.getAsType())) {
+        const unsigned GeometryStreamsIdx = 2;
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
-            << "geometry streams";
+            << GeometryStreamsIdx;
         return true;
       }
     }
@@ -11617,8 +11620,9 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc,
   switch (shapeKind) {
   case AR_TOBJ_VECTOR:
     if (GetHLSLVecSize(ArgTy) > DXIL::kDefaultMaxVectorLength) {
+      const unsigned NodeRecordsIdx = 3;
       self->Diag(ArgLoc.getLocation(), diag::err_hlsl_unsupported_long_vector)
-          << "node records";
+          << NodeRecordsIdx;
       Empty = false;
       return false;
     }
@@ -14741,8 +14745,9 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
     } SD;
     RequireCompleteType(D.getLocStart(), qt, SD);
     if (containsLongVector(qt)) {
+      unsigned CbuffersOrTbuffersIdx = 4;
       Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector)
-          << "cbuffers or tbuffers";
+          << CbuffersOrTbuffersIdx;
       result = false;
     }
   }
@@ -15638,8 +15643,9 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) {
 // Verify that user-defined intrinsic struct args contain no long vectors
 static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) {
   if (containsLongVector(Arg->getType())) {
+    const unsigned UserDefinedStructParameterIdx = 5;
     S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector)
-        << "user-defined struct parameter";
+        << UserDefinedStructParameterIdx;
     return true;
   }
   return false;
@@ -16379,14 +16385,18 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) {
   // Would be nice to check for resources here as they crash the compiler now.
   // See issue #7186.
   for (const auto *param : FD->params()) {
-    if (containsLongVector(param->getType()))
+    if (containsLongVector(param->getType())) {
+      const unsigned EntryFunctionParametersIdx = 6;
       S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector)
-          << "entry function parameters";
+          << EntryFunctionParametersIdx;
+    }
   }
 
-  if (containsLongVector(FD->getReturnType()))
+  if (containsLongVector(FD->getReturnType())) {
+    const unsigned EntryFunctionReturnIdx = 7;
     S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector)
-        << "entry function return type";
+        << EntryFunctionReturnIdx;
+  }
 
   DXIL::ShaderKind Stage =
       ShaderModel::KindFromFullName(shaderAttr->getStage());
diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
index 11bb4c4f2f..feefd4f625 100644
--- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
+++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
@@ -521,15 +521,19 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
         }
       }
       for (const auto *param : pPatchFnDecl->params())
-        if (containsLongVector(param->getType()))
+        if (containsLongVector(param->getType())) {
+          const unsigned PatchConstantFunctionParametersIdx = 8;
           self->Diag(param->getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
-              << "patch constant function parameters";
+              << PatchConstantFunctionParametersIdx;
+        }
 
-      if (containsLongVector(pPatchFnDecl->getReturnType()))
+      if (containsLongVector(pPatchFnDecl->getReturnType())) {
+        const unsigned PatchConstantFunctionReturnIdx = 9;
         self->Diag(pPatchFnDecl->getLocation(),
                    diag::err_hlsl_unsupported_long_vector)
-            << "patch constant function return type";
+            << PatchConstantFunctionReturnIdx;
+      }
     }
 
     DXIL::ShaderKind EntrySK = shaderModel->GetKind();

From 88479cfeb11657031606f6d8a3bcb1076d5f6746 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 10 Mar 2025 15:14:08 -0600
Subject: [PATCH 23/88] fix formatting induced build break

---
 .../include/clang/Basic/DiagnosticSemaKinds.td     | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index de59f01c5d..8d428073bd 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7852,13 +7852,13 @@ def err_hlsl_load_from_mesh_out_arrays: Error<
 def err_hlsl_out_indices_array_incorrect_access: Error<
    "a vector in out indices array must be accessed as a whole">;
 def err_hlsl_unsupported_long_vector
-    : Error<"vectors of over 4 elements in %select{
-    ConstantBuffers or TextureBuffers|
-    tessellation patches|geometry streams|node records|
-    cbuffers or tbuffers|user-defined struct parameter|
-    entry function parameters|entry function return type|
-    patch constant function parameters|patch constant function return type|
-    payload parameters}0 are not supported">;
+    : Error<"vectors of over 4 elements in "
+    "%select{ConstantBuffers or TextureBuffers|"
+    "tessellation patches|geometry streams|node records|"
+    "cbuffers or tbuffers|user-defined struct parameter|"
+    "entry function parameters|entry function return type|"
+    "patch constant function parameters|patch constant function return type|"
+    "payload parameters}0 are not supported">;
 def err_hlsl_logical_binop_scalar : Error<
    "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">;
 def err_hlsl_ternary_scalar : Error<

From a2979e7d014a4a7a55a9d1d1ebc6e9697dce4ed9 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista@microsoft.com>
Date: Tue, 11 Mar 2025 10:51:22 -0700
Subject: [PATCH 24/88] Resolve some default error warnings (#7191)

DxilContainerValidation.cpp has some int / bool comparisons that cause
default error warnings in some of the private builds.
This needs to be addressed. This PR changes the comparisons by
converting the numerical expressions into the appropriate boolean, then
comparing the booleans.
---
 lib/DxilValidation/DxilContainerValidation.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/DxilValidation/DxilContainerValidation.cpp b/lib/DxilValidation/DxilContainerValidation.cpp
index 890e90e354..c21e588cf5 100644
--- a/lib/DxilValidation/DxilContainerValidation.cpp
+++ b/lib/DxilValidation/DxilContainerValidation.cpp
@@ -337,9 +337,12 @@ void PSVContentVerifier::VerifySignatureElement(
 
   PSVSignatureElement PSVSE(StrTab, IndexTab, PSVSE0);
   if (SE.IsArbitrary())
-    Mismatch |= strcmp(PSVSE.GetSemanticName(), SE.GetName());
+    Mismatch |=
+        strcmp(PSVSE.GetSemanticName(), SE.GetName()) == 0 ? false : true;
   else
-    Mismatch |= PSVSE0->SemanticKind != static_cast<uint8_t>(SE.GetKind());
+    Mismatch |= PSVSE0->SemanticKind != static_cast<uint8_t>(SE.GetKind()) == 0
+                    ? false
+                    : true;
 
   ModulePSVSE0.SemanticName = PSVSE0->SemanticName;
   // Compare all fields.
@@ -494,7 +497,8 @@ void PSVContentVerifier::Verify(unsigned ValMajor, unsigned ValMinor,
                         std::to_string(ShaderStage));
       return;
     }
-    if (PSV1->UsesViewID != DM.m_ShaderFlags.GetViewID())
+    bool ViewIDUsed = PSV1->UsesViewID == 0 ? false : true;
+    if (ViewIDUsed != DM.m_ShaderFlags.GetViewID())
       EmitMismatchError("UsesViewID", std::to_string(PSV1->UsesViewID),
                         std::to_string(DM.m_ShaderFlags.GetViewID()));
 

From 3d6917137ec429d3050f6a6256afdd3d9e1b3e20 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista@microsoft.com>
Date: Wed, 12 Mar 2025 16:13:11 -0700
Subject: [PATCH 25/88] Shorten bool conversion, remove unneeded change (#7197)

Polishing up changes made to improve dxc buildability in different
environments.
---
 lib/DxilValidation/DxilContainerValidation.cpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/DxilValidation/DxilContainerValidation.cpp b/lib/DxilValidation/DxilContainerValidation.cpp
index c21e588cf5..89e23767fe 100644
--- a/lib/DxilValidation/DxilContainerValidation.cpp
+++ b/lib/DxilValidation/DxilContainerValidation.cpp
@@ -337,12 +337,9 @@ void PSVContentVerifier::VerifySignatureElement(
 
   PSVSignatureElement PSVSE(StrTab, IndexTab, PSVSE0);
   if (SE.IsArbitrary())
-    Mismatch |=
-        strcmp(PSVSE.GetSemanticName(), SE.GetName()) == 0 ? false : true;
+    Mismatch |= strcmp(PSVSE.GetSemanticName(), SE.GetName()) != 0;
   else
-    Mismatch |= PSVSE0->SemanticKind != static_cast<uint8_t>(SE.GetKind()) == 0
-                    ? false
-                    : true;
+    Mismatch |= PSVSE0->SemanticKind != static_cast<uint8_t>(SE.GetKind());
 
   ModulePSVSE0.SemanticName = PSVSE0->SemanticName;
   // Compare all fields.
@@ -497,7 +494,7 @@ void PSVContentVerifier::Verify(unsigned ValMajor, unsigned ValMinor,
                         std::to_string(ShaderStage));
       return;
     }
-    bool ViewIDUsed = PSV1->UsesViewID == 0 ? false : true;
+    bool ViewIDUsed = PSV1->UsesViewID != 0;
     if (ViewIDUsed != DM.m_ShaderFlags.GetViewID())
       EmitMismatchError("UsesViewID", std::to_string(PSV1->UsesViewID),
                         std::to_string(DM.m_ShaderFlags.GetViewID()));

From ec5324d66215cd748c162e0e5efed9a85b402ff9 Mon Sep 17 00:00:00 2001
From: Tex Riddell <texr@microsoft.com>
Date: Thu, 13 Mar 2025 12:29:13 -0700
Subject: [PATCH 26/88] NFC: Update HLSL_INTRINSIC struct for Flags and
 MinShaderModel fields (#7199)

HLSL_INTRINSIC will need to be updated for SM 6.9, specifically:
- to add a new flag
- to encode minimum shader model version for an availability attribute

Changing this structure is a breaking change to the internal intrinsic
table protocol, which is used for the extension mechanism.

This change separates out the breaking change with no functional changes
for simpler review and testing.

For the new flag, this change switches to using a UINT Flags field to
make flags extensible without breaking the table format. For the
version, a UINT MinShaderModel will be the encoded version format used
elsewhere: (Major << 4) | (Minor & 0xF)

Commented code for using the MinShaderModel is provided for when a
subsequent change will implement the availability attribute checks.
---
 include/dxc/dxcapi.internal.h                | 11 ++-
 tools/clang/lib/Sema/SemaHLSL.cpp            | 15 +++-
 tools/clang/unittests/HLSL/ExtensionTest.cpp | 77 +++++++++++---------
 utils/hct/hctdb.py                           | 29 ++++++++
 utils/hct/hctdb_instrhelp.py                 | 18 ++++-
 5 files changed, 106 insertions(+), 44 deletions(-)

diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h
index b0f9a467a4..4b8e237201 100644
--- a/include/dxc/dxcapi.internal.h
+++ b/include/dxc/dxcapi.internal.h
@@ -160,11 +160,16 @@ struct HLSL_INTRINSIC_ARGUMENT {
               // matching input constraints.
 };
 
+// HLSL_INTRINSIC flags
+static const UINT INTRIN_FLAG_READ_ONLY = 1U << 0;
+static const UINT INTRIN_FLAG_READ_NONE = 1U << 1;
+static const UINT INTRIN_FLAG_IS_WAVE = 1U << 2;
+
 struct HLSL_INTRINSIC {
   UINT Op;                 // Intrinsic Op ID
-  BOOL bReadOnly;          // Only read memory
-  BOOL bReadNone;          // Not read memory
-  BOOL bIsWave;            // Is a wave-sensitive op
+  UINT Flags;              // INTRIN_FLAG_* flags
+  UINT MinShaderModel;     // Encoded minimum shader model, 0 = no minimum
+                           // (Major << 4) + (Minor & 0xf)
   INT iOverloadParamIndex; // Parameter decide the overload type, -1 means ret
                            // type
   UINT uNumArgs;           // Count of arguments in pArgs.
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 2d1873fd55..c41e899278 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -1810,12 +1810,21 @@ static void AddHLSLIntrinsicAttr(FunctionDecl *FD, ASTContext &context,
   }
   FD->addAttr(
       HLSLIntrinsicAttr::CreateImplicit(context, tableName, lowering, opcode));
-  if (pIntrinsic->bReadNone)
+  if (pIntrinsic->Flags & INTRIN_FLAG_READ_NONE)
     FD->addAttr(ConstAttr::CreateImplicit(context));
-  if (pIntrinsic->bReadOnly)
+  if (pIntrinsic->Flags & INTRIN_FLAG_READ_ONLY)
     FD->addAttr(PureAttr::CreateImplicit(context));
-  if (pIntrinsic->bIsWave)
+  if (pIntrinsic->Flags & INTRIN_FLAG_IS_WAVE)
     FD->addAttr(HLSLWaveSensitiveAttr::CreateImplicit(context));
+  // TBD: Add availability attribute if MinShaderModel is set.
+  // if (pIntrinsic->MinShaderModel) {
+  //  unsigned Major = pIntrinsic->MinShaderModel >> 4;
+  //  unsigned Minor = pIntrinsic->MinShaderModel & 0xF;
+  //  FD->addAttr(AvailabilityAttr::CreateImplicit(
+  //      context, &context.Idents.get(""), clang::VersionTuple(Major, Minor),
+  //      clang::VersionTuple(), clang::VersionTuple(), false,
+  //      "HLSL Intrinsic availability limited by shader model."));
+  //}
 }
 
 static FunctionDecl *
diff --git a/tools/clang/unittests/HLSL/ExtensionTest.cpp b/tools/clang/unittests/HLSL/ExtensionTest.cpp
index 51dda5533c..65407291ca 100644
--- a/tools/clang/unittests/HLSL/ExtensionTest.cpp
+++ b/tools/clang/unittests/HLSL/ExtensionTest.cpp
@@ -204,79 +204,86 @@ Intrinsic Intrinsics[] = {
     {L"test_fn",
      DEFAULT_NAME,
      "r",
-     {1, false, true, false, -1, countof(TestFnArgs), TestFnArgs}},
+     {1, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnArgs), TestFnArgs}},
     {L"test_proc",
      DEFAULT_NAME,
      "r",
-     {2, false, false, false, -1, countof(TestProcArgs), TestProcArgs}},
+     {2, 0, 0, -1, countof(TestProcArgs), TestProcArgs}},
     {L"test_poly",
      "test_poly.$o",
      "r",
-     {3, false, true, false, -1, countof(TestFnCustomArgs), TestFnCustomArgs}},
+     {3, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnCustomArgs),
+      TestFnCustomArgs}},
     {L"test_int",
      "test_int",
      "r",
-     {4, false, true, false, -1, countof(TestFnIntArgs), TestFnIntArgs}},
+     {4, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnIntArgs), TestFnIntArgs}},
     {L"test_nolower",
      "test_nolower.$o",
      "n",
-     {5, false, true, false, -1, countof(TestFnNoLowerArgs),
+     {5, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnNoLowerArgs),
       TestFnNoLowerArgs}},
     {L"test_pack_0",
      "test_pack_0.$o",
      "p",
-     {6, false, false, false, -1, countof(TestFnPack0), TestFnPack0}},
+     {6, 0, 0, -1, countof(TestFnPack0), TestFnPack0}},
     {L"test_pack_1",
      "test_pack_1.$o",
      "p",
-     {7, false, true, false, -1, countof(TestFnPack1), TestFnPack1}},
+     {7, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnPack1), TestFnPack1}},
     {L"test_pack_2",
      "test_pack_2.$o",
      "p",
-     {8, false, true, false, -1, countof(TestFnPack2), TestFnPack2}},
+     {8, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnPack2), TestFnPack2}},
     {L"test_pack_3",
      "test_pack_3.$o",
      "p",
-     {9, false, true, false, -1, countof(TestFnPack3), TestFnPack3}},
+     {9, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnPack3), TestFnPack3}},
     {L"test_pack_4",
      "test_pack_4.$o",
      "p",
-     {10, false, false, false, -1, countof(TestFnPack4), TestFnPack4}},
+     {10, 0, 0, -1, countof(TestFnPack4), TestFnPack4}},
     {L"test_rand",
      "test_rand",
      "r",
-     {11, false, false, false, -1, countof(TestRand), TestRand}},
+     {11, 0, 0, -1, countof(TestRand), TestRand}},
     {L"test_isinf",
      "test_isinf",
      "d",
-     {13, true, true, false, -1, countof(TestIsInf), TestIsInf}},
+     {13, INTRIN_FLAG_READ_ONLY | INTRIN_FLAG_READ_NONE, 0, -1,
+      countof(TestIsInf), TestIsInf}},
     {L"test_ibfe",
      "test_ibfe",
      "d",
-     {14, true, true, false, -1, countof(TestIBFE), TestIBFE}},
+     {14, INTRIN_FLAG_READ_ONLY | INTRIN_FLAG_READ_NONE, 0, -1,
+      countof(TestIBFE), TestIBFE}},
     // Make this intrinsic have the same opcode as an hlsl intrinsic with an
     // unsigned counterpart for testing purposes.
     {L"test_unsigned",
      "test_unsigned",
      "n",
-     {static_cast<unsigned>(hlsl::IntrinsicOp::IOP_min), false, true, false, -1,
-      countof(TestUnsigned), TestUnsigned}},
+     {static_cast<unsigned>(hlsl::IntrinsicOp::IOP_min), INTRIN_FLAG_READ_NONE,
+      0, -1, countof(TestUnsigned), TestUnsigned}},
     {L"wave_proc",
      DEFAULT_NAME,
      "r",
-     {16, false, true, true, -1, countof(WaveProcArgs), WaveProcArgs}},
+     {16, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1,
+      countof(WaveProcArgs), WaveProcArgs}},
     {L"test_o_1",
      "test_o_1.$o:1",
      "r",
-     {18, false, true, true, -1, countof(TestOverloadArgs), TestOverloadArgs}},
+     {18, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1,
+      countof(TestOverloadArgs), TestOverloadArgs}},
     {L"test_o_2",
      "test_o_2.$o:2",
      "r",
-     {19, false, true, true, -1, countof(TestOverloadArgs), TestOverloadArgs}},
+     {19, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1,
+      countof(TestOverloadArgs), TestOverloadArgs}},
     {L"test_o_3",
      "test_o_3.$o:3",
      "r",
-     {20, false, true, true, -1, countof(TestOverloadArgs), TestOverloadArgs}},
+     {20, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1,
+      countof(TestOverloadArgs), TestOverloadArgs}},
     // custom lowering with both optional arguments and vector exploding.
     // Arg 0 = Opcode
     // Arg 1 = Pass as is
@@ -286,16 +293,17 @@ Intrinsic Intrinsics[] = {
     {L"CustomLoadOp",
      "CustomLoadOp",
      "c:{\"default\" : \"0,1,2:?i1,3.0:?i32,3.1:?i32\"}",
-     {21, true, false, false, -1, countof(TestCustomLoadOp), TestCustomLoadOp}},
+     {21, INTRIN_FLAG_READ_ONLY, 0, -1, countof(TestCustomLoadOp),
+      TestCustomLoadOp}},
     {L"CustomLoadOp",
      "CustomLoadOp",
      "c:{\"default\" : \"0,1,2:?i1,3.0:?i32,3.1:?i32\"}",
-     {21, true, false, false, -1, countof(TestCustomLoadOpBool),
+     {21, INTRIN_FLAG_READ_ONLY, 0, -1, countof(TestCustomLoadOpBool),
       TestCustomLoadOpBool}},
     {L"CustomLoadOp",
      "CustomLoadOp",
      "c:{\"default\" : \"0,1,2:?i1,3.0:?i32,3.1:?i32\"}",
-     {21, true, false, false, -1, countof(TestCustomLoadOpSubscript),
+     {21, INTRIN_FLAG_READ_ONLY, 0, -1, countof(TestCustomLoadOpSubscript),
       TestCustomLoadOpSubscript}},
 };
 
@@ -303,7 +311,8 @@ Intrinsic BufferIntrinsics[] = {
     {L"MyBufferOp",
      "MyBufferOp",
      "m",
-     {12, false, true, false, -1, countof(TestMyBufferOp), TestMyBufferOp}},
+     {12, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyBufferOp),
+      TestMyBufferOp}},
 };
 
 // Test adding a method to an object that normally has no methods (SamplerState
@@ -312,7 +321,8 @@ Intrinsic SamplerIntrinsics[] = {
     {L"MySamplerOp",
      "MySamplerOp",
      "m",
-     {15, false, true, false, -1, countof(TestMySamplerOp), TestMySamplerOp}},
+     {15, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMySamplerOp),
+      TestMySamplerOp}},
 };
 
 // Define a lowering string to target a common dxil extension operation defined
@@ -345,12 +355,12 @@ Intrinsic Texture1DIntrinsics[] = {
     {L"MyTextureOp",
      "MyTextureOp",
      MyTextureOp_LoweringInfo,
-     {17, false, true, false, -1, countof(TestMyTexture1DOp_0),
+     {17, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyTexture1DOp_0),
       TestMyTexture1DOp_0}},
     {L"MyTextureOp",
      "MyTextureOp",
      MyTextureOp_LoweringInfo,
-     {17, false, true, false, -1, countof(TestMyTexture1DOp_1),
+     {17, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyTexture1DOp_1),
       TestMyTexture1DOp_1}},
 };
 
@@ -358,7 +368,7 @@ Intrinsic Texture2DIntrinsics[] = {
     {L"MyTextureOp",
      "MyTextureOp",
      MyTextureOp_LoweringInfo,
-     {17, false, true, false, -1, countof(TestMyTexture2DOp),
+     {17, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyTexture2DOp),
       TestMyTexture2DOp}},
 };
 
@@ -1497,8 +1507,8 @@ TEST_F(ExtensionTest, EvalAttributeCollision) {
     Intrinsic Intrinsic = {L"collide_proc",
                            "collide_proc",
                            "r",
-                           {static_cast<unsigned>(op), true, false, false, -1,
-                            countof(Args), Args}};
+                           {static_cast<unsigned>(op), INTRIN_FLAG_READ_ONLY, 0,
+                            -1, countof(Args), Args}};
     Compiler c(m_dllSupport);
     c.RegisterIntrinsicTable(new TestIntrinsicTable(&Intrinsic, 1));
     c.Compile(R"(
@@ -1532,10 +1542,8 @@ TEST_F(ExtensionTest, NoUnwind) {
        IA_C},
       {"value", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_NUMERIC, 1, IA_C}};
 
-  Intrinsic Intrinsic = {L"test_proc",
-                         "test_proc",
-                         "r",
-                         {1, false, false, false, -1, countof(Args), Args}};
+  Intrinsic Intrinsic = {
+      L"test_proc", "test_proc", "r", {1, 0, 0, -1, countof(Args), Args}};
   Compiler c(m_dllSupport);
   c.RegisterIntrinsicTable(new TestIntrinsicTable(&Intrinsic, 1));
   c.Compile(R"(
@@ -1572,7 +1580,8 @@ TEST_F(ExtensionTest, DCE) {
   Intrinsic Intrinsic = {L"test_proc",
                          "test_proc",
                          "r",
-                         {1, true, true, false, -1, countof(Args), Args}};
+                         {1, INTRIN_FLAG_READ_ONLY | INTRIN_FLAG_READ_NONE, 0,
+                          -1, countof(Args), Args}};
   Compiler c(m_dllSupport);
   c.RegisterIntrinsicTable(new TestIntrinsicTable(&Intrinsic, 1));
   c.Compile(R"(
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 66376c3b9b..1c3fd0f717 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -8208,6 +8208,7 @@ def __init__(
         unsigned_op,
         overload_idx,
         hidden,
+        min_shader_model,
     ):
         self.name = name  # Function name
         self.idx = idx  # Unique number within namespace
@@ -8235,6 +8236,12 @@ def __init__(
             overload_idx  # Parameter determines the overload type, -1 means ret type
         )
         self.hidden = hidden  # Internal high-level op, not exposed to HLSL
+        # Encoded minimum shader model for this intrinsic
+        self.min_shader_model = 0
+        if min_shader_model:
+            self.min_shader_model = (min_shader_model[0] << 4) | (
+                min_shader_model[1] & 0x0F
+            )
         self.key = (
             ("%3d" % ns_idx)
             + "!"
@@ -8612,6 +8619,7 @@ def process_attr(attr):
                 -1
             )  # Parameter determines the overload type, -1 means ret type.
             hidden = False
+            min_shader_model = (0, 0)
             for a in attrs:
                 if a == "":
                     continue
@@ -8644,6 +8652,24 @@ def process_attr(attr):
                 if d == "overload":
                     overload_param_index = int(v)
                     continue
+                if d == "min_sm":
+                    # min_sm is a string like "6.0" or "6.5"
+                    # Convert to a tuple of integers (major, minor)
+                    try:
+                        major_minor = v.split(".")
+                        if len(major_minor) != 2:
+                            raise ValueError
+                        major, minor = major_minor
+                        major = int(major)
+                        minor = int(minor)
+                        # minor of 15 has special meaning, and larger values
+                        # cannot be encoded in the version DWORD.
+                        if major < 0 or minor < 0 or minor > 14:
+                            raise ValueError
+                        min_shader_model = (major, minor)
+                    except ValueError:
+                        assert False, "invalid min_sm: %s" % (v)
+                    continue
                 assert False, "invalid attr %s" % (a)
 
             return (
@@ -8654,6 +8680,7 @@ def process_attr(attr):
                 unsigned_op,
                 overload_param_index,
                 hidden,
+                min_shader_model,
             )
 
         current_namespace = None
@@ -8701,6 +8728,7 @@ def process_attr(attr):
                     unsigned_op,
                     overload_param_index,
                     hidden,
+                    min_shader_model,
                 ) = process_attr(attr)
                 # Add an entry for this intrinsic.
                 if bracket_cleanup_re.search(opts):
@@ -8739,6 +8767,7 @@ def process_attr(attr):
                         unsigned_op,
                         overload_param_index,
                         hidden,
+                        min_shader_model,
                     )
                 )
                 num_entries += 1
diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py
index 17eefd4918..353f8f9634 100644
--- a/utils/hct/hctdb_instrhelp.py
+++ b/utils/hct/hctdb_instrhelp.py
@@ -989,13 +989,23 @@ def get_hlsl_intrinsics():
                 result += "#ifdef ENABLE_SPIRV_CODEGEN\n\n"
             # SPIRV Change Ends
             arg_idx = 0
-        ns_table += "    {(UINT)%s::%s_%s, %s, %s, %s, %d, %d, g_%s_Args%s},\n" % (
+        flags = []
+        if i.readonly:
+            flags.append("INTRIN_FLAG_READ_ONLY")
+        if i.readnone:
+            flags.append("INTRIN_FLAG_READ_NONE")
+        if i.wave:
+            flags.append("INTRIN_FLAG_IS_WAVE")
+        if flags:
+            flags = " | ".join(flags)
+        else:
+            flags = "0"
+        ns_table += "    {(UINT)%s::%s_%s, %s, 0x%x, %d, %d, g_%s_Args%s},\n" % (
             opcode_namespace,
             id_prefix,
             i.name,
-            str(i.readonly).lower(),
-            str(i.readnone).lower(),
-            str(i.wave).lower(),
+            flags,
+            i.min_shader_model,
             i.overload_param_index,
             len(i.params),
             last_ns,

From 24dedfde13cfe6bdcf6206a1ce00bbaf584a90dd Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista@microsoft.com>
Date: Fri, 14 Mar 2025 02:24:39 -0700
Subject: [PATCH 27/88] [OMM] Implement front end diagnostics for OMM,
 including on TraceRayInline, and add Availability Attributes (#7156)

This PR addresses the front end part of OMM, defining the new flags
defined in the spec, and implementing the relevant diagnostics should
the flags be incompatible. It also adds the second template argument to
the RayQuery object, which is set to have a default value of 0 if no
explicit template argument is provided.
Fixes #7145

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
---
 include/dxc/DXIL/DxilConstants.h              |   2 +-
 tools/clang/include/clang/Basic/Attr.td       |   7 +
 .../include/clang/Basic/DiagnosticGroups.td   |   2 +
 .../clang/Basic/DiagnosticSemaKinds.td        |   9 +
 tools/clang/lib/AST/ASTContextHLSL.cpp        |  79 +++++---
 .../lib/CodeGen/CGHLSLMSFinishCodeGen.cpp     |   7 +-
 tools/clang/lib/Sema/SemaHLSL.cpp             |  89 ++++++++-
 tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp   | 171 +++++++++++++++---
 .../SemaHLSL/rayquery-ast-dump-implicit.hlsl  |  14 ++
 .../test/SemaHLSL/rayquery-ast-dump.hlsl      |  26 +++
 .../rayquery-omm-diag-TU-export-sm65.hlsl     |  11 ++
 .../rayquery-omm-diag-TU-sm65-warnings.hlsl   |  11 ++
 .../SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl   |  46 +++++
 .../test/SemaHLSL/rayquery-omm-diag-sm65.hlsl |  22 +++
 .../test/SemaHLSL/rayquery-omm-type-diag.hlsl |   8 +
 15 files changed, 447 insertions(+), 57 deletions(-)
 create mode 100644 tools/clang/test/SemaHLSL/rayquery-ast-dump-implicit.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/rayquery-ast-dump.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-export-sm65.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65-warnings.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/rayquery-omm-diag-sm65.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl

diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
index b3c510a038..54131f3948 100644
--- a/include/dxc/DXIL/DxilConstants.h
+++ b/include/dxc/DXIL/DxilConstants.h
@@ -1827,7 +1827,7 @@ enum class RayFlag : uint32_t {
   CullNonOpaque = 0x80,
   SkipTriangles = 0x100,
   SkipProceduralPrimitives = 0x200,
-  ForceOMM2State = 0x400, // Force 2-state in Opacity Micromaps
+  ForceOMM2State = 0x400
 };
 
 // Corresponds to RAYQUERY_FLAG_* in HLSL
diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td
index 9e48df51fd..3afbaa91c7 100644
--- a/tools/clang/include/clang/Basic/Attr.td
+++ b/tools/clang/include/clang/Basic/Attr.td
@@ -1149,6 +1149,13 @@ def HLSLNodeObject : InheritableAttr {
     }];
 }
 
+// HLSL Ray Query Attribute
+
+def HLSLRayQueryObject : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+}
 
 // HLSL Parameter Attributes
 
diff --git a/tools/clang/include/clang/Basic/DiagnosticGroups.td b/tools/clang/include/clang/Basic/DiagnosticGroups.td
index 39618aed04..ff21b34652 100644
--- a/tools/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/tools/clang/include/clang/Basic/DiagnosticGroups.td
@@ -799,10 +799,12 @@ def HLSLPayloadAccessQualifer: DiagGroup<"payload-access-qualifier", [
      HLSLPayloadAccessQualiferPerf,
      HLSLPayloadAccessQualiferCall
   ]>;
+def HLSLRayQueryFlags : DiagGroup<"hlsl-rayquery-flags">;
 def HLSLSemanticIdentifierCollision : DiagGroup<"semantic-identifier-collision">;
 def HLSLStructurizeExitsLifetimeMarkersConflict: DiagGroup<"structurize-exits-lifetime-markers-conflict">;
 def HLSLParameterUsage : DiagGroup<"parameter-usage">;
 def HLSLAvailability: DiagGroup<"hlsl-availability">;
+def HLSLAvailabilityConstant: DiagGroup<"hlsl-availability-constant">;
 def HLSLBarrier : DiagGroup<"hlsl-barrier">;
 def HLSLLegacyLiterals : DiagGroup<"hlsl-legacy-literal">;
 // HLSL Change Ends
diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 8d428073bd..b8a772b3a8 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7652,8 +7652,17 @@ def err_payload_fields_is_payload_and_overqualified : Error<
   "payload field '%0' is a payload struct. Payload access qualifiers are not allowed on payload types.">;
 def warn_hlsl_payload_qualifer_dropped : Warning<
   "payload access qualifiers ignored. These are only supported for lib_6_7+ targets and lib_6_6 with with the -enable-payload-qualifiers flag.">, InGroup<HLSLPayloadAccessQualifer>;
+def warn_hlsl_rayquery_flags_disallowed : Warning<
+  "A non-zero value for the RayQueryFlags template argument requires"
+  " shader model 6.9 or above.">, DefaultError, InGroup<HLSLAvailability>;
+def warn_hlsl_rayquery_flags_conflict : Warning<
+  "When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags"
+  " must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.">, DefaultError, InGroup<HLSLRayQueryFlags>;
 def err_hlsl_unsupported_builtin_op: Error<
   "operator cannot be used with built-in type %0">;
+def warn_hlsl_builtin_constant_unavailable: Warning<
+  "potential misuse of built-in constant %0 in shader model %1; introduced"
+  " in shader model %2">, InGroup<HLSLAvailabilityConstant>;
 def err_hlsl_unsupported_char_literal : Error<
   "unsupported style of char literal - use a single-character char-based literal">;
 def err_hlsl_unsupported_clipplane_argument_expression : Error<
diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp
index 870d032d39..1b6c346acd 100644
--- a/tools/clang/lib/AST/ASTContextHLSL.cpp
+++ b/tools/clang/lib/AST/ASTContextHLSL.cpp
@@ -545,10 +545,19 @@ hlsl::DeclareRecordTypeWithHandle(ASTContext &context, StringRef name,
   return typeDeclBuilder.getRecordDecl();
 }
 
+AvailabilityAttr *ConstructAvailabilityAttribute(clang::ASTContext &context,
+                                                 VersionTuple Introduced) {
+  AvailabilityAttr *AAttr = AvailabilityAttr::CreateImplicit(
+      context, &context.Idents.get(""), clang::VersionTuple(6, 9),
+      clang::VersionTuple(), clang::VersionTuple(), false, "");
+  return AAttr;
+}
+
 // creates a global static constant unsigned integer with value.
 // equivalent to: static const uint name = val;
 static void AddConstUInt(clang::ASTContext &context, DeclContext *DC,
-                         StringRef name, unsigned val) {
+                         StringRef name, unsigned val,
+                         AvailabilityAttr *AAttr = nullptr) {
   IdentifierInfo &Id = context.Idents.get(name, tok::TokenKind::identifier);
   QualType type = context.getConstType(context.UnsignedIntTy);
   VarDecl *varDecl = VarDecl::Create(context, DC, NoLoc, NoLoc, &Id, type,
@@ -558,6 +567,9 @@ static void AddConstUInt(clang::ASTContext &context, DeclContext *DC,
       context, llvm::APInt(context.getIntWidth(type), val), type, NoLoc);
   varDecl->setInit(exprVal);
   varDecl->setImplicit(true);
+  if (AAttr)
+    varDecl->addAttr(AAttr);
+
   DC->addDecl(varDecl);
 }
 
@@ -570,6 +582,7 @@ static void AddConstUInt(clang::ASTContext &context, StringRef name,
 struct Enumerant {
   StringRef name;
   unsigned value;
+  AvailabilityAttr *avail = nullptr;
 };
 
 static void AddTypedefPseudoEnum(ASTContext &context, StringRef name,
@@ -585,33 +598,45 @@ static void AddTypedefPseudoEnum(ASTContext &context, StringRef name,
   enumDecl->setImplicit(true);
   // static const uint <enumerant.name> = <enumerant.value>;
   for (const Enumerant &enumerant : enumerants) {
-    AddConstUInt(context, curDC, enumerant.name, enumerant.value);
+    AddConstUInt(context, curDC, enumerant.name, enumerant.value,
+                 enumerant.avail);
   }
 }
 
 /// <summary> Adds all constants and enums for ray tracing </summary>
 void hlsl::AddRaytracingConstants(ASTContext &context) {
+
+  // Create aversion tuple for availability attributes
+  // for the RAYQUERY_FLAG enum
+  VersionTuple VT69 = VersionTuple(6, 9);
+
   AddTypedefPseudoEnum(
       context, "RAY_FLAG",
-      {
-          {"RAY_FLAG_NONE", (unsigned)DXIL::RayFlag::None},
-          {"RAY_FLAG_FORCE_OPAQUE", (unsigned)DXIL::RayFlag::ForceOpaque},
-          {"RAY_FLAG_FORCE_NON_OPAQUE",
-           (unsigned)DXIL::RayFlag::ForceNonOpaque},
-          {"RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH",
-           (unsigned)DXIL::RayFlag::AcceptFirstHitAndEndSearch},
-          {"RAY_FLAG_SKIP_CLOSEST_HIT_SHADER",
-           (unsigned)DXIL::RayFlag::SkipClosestHitShader},
-          {"RAY_FLAG_CULL_BACK_FACING_TRIANGLES",
-           (unsigned)DXIL::RayFlag::CullBackFacingTriangles},
-          {"RAY_FLAG_CULL_FRONT_FACING_TRIANGLES",
-           (unsigned)DXIL::RayFlag::CullFrontFacingTriangles},
-          {"RAY_FLAG_CULL_OPAQUE", (unsigned)DXIL::RayFlag::CullOpaque},
-          {"RAY_FLAG_CULL_NON_OPAQUE", (unsigned)DXIL::RayFlag::CullNonOpaque},
-          {"RAY_FLAG_SKIP_TRIANGLES", (unsigned)DXIL::RayFlag::SkipTriangles},
-          {"RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES",
-           (unsigned)DXIL::RayFlag::SkipProceduralPrimitives},
-      });
+      {{"RAY_FLAG_NONE", (unsigned)DXIL::RayFlag::None},
+       {"RAY_FLAG_FORCE_OPAQUE", (unsigned)DXIL::RayFlag::ForceOpaque},
+       {"RAY_FLAG_FORCE_NON_OPAQUE", (unsigned)DXIL::RayFlag::ForceNonOpaque},
+       {"RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH",
+        (unsigned)DXIL::RayFlag::AcceptFirstHitAndEndSearch},
+       {"RAY_FLAG_SKIP_CLOSEST_HIT_SHADER",
+        (unsigned)DXIL::RayFlag::SkipClosestHitShader},
+       {"RAY_FLAG_CULL_BACK_FACING_TRIANGLES",
+        (unsigned)DXIL::RayFlag::CullBackFacingTriangles},
+       {"RAY_FLAG_CULL_FRONT_FACING_TRIANGLES",
+        (unsigned)DXIL::RayFlag::CullFrontFacingTriangles},
+       {"RAY_FLAG_CULL_OPAQUE", (unsigned)DXIL::RayFlag::CullOpaque},
+       {"RAY_FLAG_CULL_NON_OPAQUE", (unsigned)DXIL::RayFlag::CullNonOpaque},
+       {"RAY_FLAG_SKIP_TRIANGLES", (unsigned)DXIL::RayFlag::SkipTriangles},
+       {"RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES",
+        (unsigned)DXIL::RayFlag::SkipProceduralPrimitives},
+       {"RAY_FLAG_FORCE_OMM_2_STATE", (unsigned)DXIL::RayFlag::ForceOMM2State,
+        ConstructAvailabilityAttribute(context, VT69)}});
+
+  AddTypedefPseudoEnum(
+      context, "RAYQUERY_FLAG",
+      {{"RAYQUERY_FLAG_NONE", (unsigned)DXIL::RayQueryFlag::None},
+       {"RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS",
+        (unsigned)DXIL::RayQueryFlag::AllowOpacityMicromaps,
+        ConstructAvailabilityAttribute(context, VT69)}});
 
   AddTypedefPseudoEnum(
       context, "COMMITTED_STATUS",
@@ -1161,7 +1186,14 @@ CXXRecordDecl *hlsl::DeclareRayQueryType(ASTContext &context) {
   // template<uint kind> RayQuery { ... }
   BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(),
                                          "RayQuery");
-  typeDeclBuilder.addIntegerTemplateParam("flags", context.UnsignedIntTy);
+  typeDeclBuilder.addIntegerTemplateParam("constRayFlags",
+                                          context.UnsignedIntTy);
+  // create an optional second template argument with default value
+  // that contains the value of DXIL::RayFlag::None
+  llvm::Optional<int64_t> DefaultRayQueryFlag =
+      static_cast<int64_t>(DXIL::RayFlag::None);
+  typeDeclBuilder.addIntegerTemplateParam(
+      "RayQueryFlags", context.UnsignedIntTy, DefaultRayQueryFlag);
   typeDeclBuilder.startDefinition();
   typeDeclBuilder.addField(
       "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle.
@@ -1178,7 +1210,8 @@ CXXRecordDecl *hlsl::DeclareRayQueryType(ASTContext &context) {
       context.DeclarationNames.getCXXConstructorName(canQualType), false,
       &pConstructorDecl, &pTypeSourceInfo);
   typeDeclBuilder.getRecordDecl()->addDecl(pConstructorDecl);
-
+  typeDeclBuilder.getRecordDecl()->addAttr(
+      HLSLRayQueryObjectAttr::CreateImplicit(context));
   return typeDeclBuilder.getRecordDecl();
 }
 
diff --git a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
index 8af96cc3cd..16f268f102 100644
--- a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
+++ b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
@@ -2839,8 +2839,11 @@ void TranslateRayQueryConstructor(HLModule &HLM) {
           HLM.GetTypeSystem().GetStructAnnotation(pRQType);
       DXASSERT(SA, "otherwise, could not find type annoation for RayQuery "
                    "specialization");
-      DXASSERT(SA->GetNumTemplateArgs() == 1 &&
-                   SA->GetTemplateArgAnnotation(0).IsIntegral(),
+      DXASSERT((SA->GetNumTemplateArgs() == 1 &&
+                SA->GetTemplateArgAnnotation(0).IsIntegral()) ||
+                   (SA->GetNumTemplateArgs() == 2 &&
+                    SA->GetTemplateArgAnnotation(0).IsIntegral() &&
+                    SA->GetTemplateArgAnnotation(1).IsIntegral()),
                "otherwise, RayQuery has changed, or lacks template args");
       llvm::IRBuilder<> Builder(CI);
       llvm::Value *rayFlags =
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index c41e899278..031e49408f 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -3992,13 +3992,6 @@ class HLSLExternalSource : public ExternalSemaSource {
     return IsSubobjectBasicKind(GetTypeElementKind(type));
   }
 
-  bool IsRayQueryBasicKind(ArBasicKind kind) {
-    return kind == AR_OBJECT_RAY_QUERY;
-  }
-  bool IsRayQueryType(QualType type) {
-    return IsRayQueryBasicKind(GetTypeElementKind(type));
-  }
-
   void WarnMinPrecision(QualType Type, SourceLocation Loc) {
     Type = Type->getCanonicalTypeUnqualified();
     if (IsVectorType(m_sema, Type) || IsMatrixType(m_sema, Type)) {
@@ -5326,6 +5319,39 @@ class HLSLExternalSource : public ExternalSemaSource {
         return true;
       }
       return false;
+    } else if (Template->getTemplatedDecl()
+                   ->hasAttr<HLSLRayQueryObjectAttr>()) {
+      int numArgs = TemplateArgList.size();
+      DXASSERT(numArgs == 1 || numArgs == 2,
+               "otherwise the template has not been declared properly");
+
+      // first, determine if the rayquery flag AllowOpacityMicromaps is set
+      bool HasRayQueryFlagAllowOpacityMicromaps = false;
+      if (numArgs > 1) {
+        const TemplateArgument &Arg2 = TemplateArgList[1].getArgument();
+        Expr *Expr2 = Arg2.getAsExpr();
+        llvm::APSInt Arg2val;
+        Expr2->isIntegerConstantExpr(Arg2val, m_sema->getASTContext());
+        if (Arg2val.getZExtValue() &
+            (unsigned)DXIL::RayQueryFlag::AllowOpacityMicromaps)
+          HasRayQueryFlagAllowOpacityMicromaps = true;
+      }
+
+      // next, get the first template argument, to check if
+      // the ForceOMM2State flag is set
+      const TemplateArgument &Arg1 = TemplateArgList[0].getArgument();
+      Expr *Expr1 = Arg1.getAsExpr();
+      llvm::APSInt Arg1val;
+      bool HasRayFlagForceOMM2State =
+          Expr1->isIntegerConstantExpr(Arg1val, m_sema->getASTContext()) &&
+          (Arg1val.getLimitedValue() &
+           (uint64_t)DXIL::RayFlag::ForceOMM2State) != 0;
+
+      // finally, if ForceOMM2State is set and AllowOpacityMicromaps
+      // isn't, emit a warning
+      if (HasRayFlagForceOMM2State && !HasRayQueryFlagAllowOpacityMicromaps)
+        m_sema->Diag(TemplateArgList[0].getLocation(),
+                     diag::warn_hlsl_rayquery_flags_conflict);
     } else if (Template->getTemplatedDecl()->hasAttr<HLSLTessPatchAttr>()) {
       DXASSERT(TemplateArgList.size() > 0,
                "Tessellation patch should have at least one template args");
@@ -11568,6 +11594,52 @@ static void DiagnoseReachableBarrier(Sema &S, CallExpr *CE,
   }
 }
 
+bool IsRayFlagForceOMM2StateSet(Sema &sema, const CallExpr *CE) {
+  const Expr *Expr1 = CE->getArg(1);
+  llvm::APSInt constantResult;
+  return Expr1->isIntegerConstantExpr(constantResult, sema.getASTContext()) &&
+         (constantResult.getLimitedValue() &
+          (uint64_t)DXIL::RayFlag::ForceOMM2State) != 0;
+}
+
+void DiagnoseTraceRayInline(Sema &sema, CallExpr *callExpr) {
+  // Validate if the RayFlag parameter has RAY_FLAG_FORCE_OMM_2_STATE set,
+  // the RayQuery decl must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set,
+  // otherwise emit a diagnostic.
+  if (IsRayFlagForceOMM2StateSet(sema, callExpr)) {
+    CXXMemberCallExpr *CXXCallExpr = dyn_cast<CXXMemberCallExpr>(callExpr);
+    if (!CXXCallExpr) {
+      return;
+    }
+    const DeclRefExpr *DRE =
+        dyn_cast<DeclRefExpr>(CXXCallExpr->getImplicitObjectArgument());
+    assert(DRE);
+    QualType QT = DRE->getType();
+    auto *typeRecordDecl = QT->getAsCXXRecordDecl();
+    ClassTemplateSpecializationDecl *SpecDecl =
+        llvm::dyn_cast<ClassTemplateSpecializationDecl>(typeRecordDecl);
+
+    if (!SpecDecl)
+      return;
+
+    // Guaranteed 2 arguments since the rayquery constructor
+    // automatically creates 2 template args
+    DXASSERT(SpecDecl->getTemplateArgs().size() == 2,
+             "else rayquery constructor template args are not 2");
+    llvm::APSInt Arg2val = SpecDecl->getTemplateArgs()[1].getAsIntegral();
+    bool IsRayQueryAllowOMMSet =
+        Arg2val.getZExtValue() &
+        (unsigned)DXIL::RayQueryFlag::AllowOpacityMicromaps;
+    if (!IsRayQueryAllowOMMSet) {
+      // Diagnose the call
+      sema.Diag(CXXCallExpr->getExprLoc(),
+                diag::warn_hlsl_rayquery_flags_conflict);
+      sema.Diag(DRE->getDecl()->getLocation(), diag::note_previous_decl)
+          << "RayQueryFlags";
+    }
+  }
+}
+
 static bool isStringLiteral(QualType type) {
   if (!type->isConstantArrayType())
     return false;
@@ -11612,6 +11684,9 @@ void Sema::DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM,
     DiagnoseReachableBarrier(*this, CE, SM, EntrySK, NodeLaunchTy, EntryDecl,
                              Diags);
     break;
+  case hlsl::IntrinsicOp::MOP_TraceRayInline:
+    DiagnoseTraceRayInline(*this, CE);
+    break;
   default:
     break;
   }
diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
index feefd4f625..827798a852 100644
--- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
+++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
@@ -10,6 +10,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 #include "dxc/DXIL/DxilShaderModel.h"
+#include "dxc/HLSL/HLOperations.h"
 #include "dxc/HlslIntrinsicOp.h"
 #include "dxc/Support/Global.h"
 #include "clang/AST/ASTContext.h"
@@ -18,6 +19,8 @@
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "clang/Sema/SemaHLSL.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Debug.h"
 #include <optional>
 
@@ -142,13 +145,21 @@ class CallGraphWithRecurseGuard {
   }
 
 public:
-  void BuildForEntry(FunctionDecl *EntryFnDecl) {
+  void BuildForEntry(FunctionDecl *EntryFnDecl,
+                     llvm::ArrayRef<VarDecl *> GlobalsWithInit) {
     DXASSERT_NOMSG(EntryFnDecl);
     EntryFnDecl = getFunctionWithBody(EntryFnDecl);
     PendingFunctions pendingFunctions;
     FnReferenceVisitor visitor(m_visitedFunctions, pendingFunctions,
                                m_callNodes);
-    pendingFunctions.push_back(EntryFnDecl);
+
+    // First, traverse all initializers, then entry function.
+    m_visitedFunctions.insert(EntryFnDecl);
+    visitor.setSourceFn(EntryFnDecl);
+    for (VarDecl *VD : GlobalsWithInit)
+      visitor.TraverseDecl(VD);
+    visitor.TraverseDecl(EntryFnDecl);
+
     while (!pendingFunctions.empty()) {
       FunctionDecl *pendingDecl = pendingFunctions.pop_back_val();
       if (m_visitedFunctions.insert(pendingDecl).second == true) {
@@ -284,33 +295,56 @@ std::vector<FunctionDecl *> GetAllExportedFDecls(clang::Sema *self) {
   return AllExportedFDecls;
 }
 
+void GatherGlobalsWithInitializers(
+    DeclContext *DC, llvm::SmallVectorImpl<VarDecl *> &GlobalsWithInit) {
+  for (auto *D : DC->decls()) {
+    // Skip built-ins and function decls.
+    if (D->isImplicit() || isa<FunctionDecl>(D))
+      continue;
+    if (auto *VD = dyn_cast<VarDecl>(D)) {
+      // Add if user-defined static or groupshared global with initializer.
+      if (VD->hasInit() && VD->hasGlobalStorage() &&
+          (VD->getStorageClass() == SC_Static ||
+           VD->hasAttr<HLSLGroupSharedAttr>()))
+        GlobalsWithInit.push_back(VD);
+    } else if (auto *DC = dyn_cast<DeclContext>(D)) {
+      // Recurse into DeclContexts like namespace, cbuffer, class/struct, etc.
+      GatherGlobalsWithInitializers(DC, GlobalsWithInit);
+    }
+  }
+}
+
 // in the non-library case, this function will be run only once,
 // but in the library case, this function will be run for each
 // viable top-level function declaration by
 // ValidateNoRecursionInTranslationUnit.
 //  (viable as in, is exported)
-clang::FunctionDecl *ValidateNoRecursion(CallGraphWithRecurseGuard &callGraph,
-                                         clang::FunctionDecl *FD) {
+clang::FunctionDecl *
+ValidateNoRecursion(CallGraphWithRecurseGuard &callGraph,
+                    clang::FunctionDecl *FD,
+                    llvm::ArrayRef<VarDecl *> GlobalsWithInit) {
   // Validate that there is no recursion reachable by this function declaration
   // NOTE: the information gathered here could be used to bypass code generation
   // on functions that are unreachable (as an early form of dead code
   // elimination).
   if (FD) {
-    callGraph.BuildForEntry(FD);
+    callGraph.BuildForEntry(FD, GlobalsWithInit);
     return callGraph.CheckRecursion(FD);
   }
   return nullptr;
 }
 
-class HLSLCallDiagnoseVisitor
+class HLSLCallDiagnoseVisitor // Could rename to HLSLReachableDiagnoseVisitor
     : public RecursiveASTVisitor<HLSLCallDiagnoseVisitor> {
 public:
   explicit HLSLCallDiagnoseVisitor(
       Sema *S, const hlsl::ShaderModel *SM, DXIL::ShaderKind EntrySK,
       DXIL::NodeLaunchType NodeLaunchTy, const FunctionDecl *EntryDecl,
-      llvm::SmallPtrSetImpl<CallExpr *> &DiagnosedCalls)
+      llvm::SmallPtrSetImpl<CallExpr *> &DiagnosedCalls,
+      llvm::SmallPtrSetImpl<DeclRefExpr *> &DeclAvailabilityChecked)
       : sema(S), SM(SM), EntrySK(EntrySK), NodeLaunchTy(NodeLaunchTy),
-        EntryDecl(EntryDecl), DiagnosedCalls(DiagnosedCalls) {}
+        EntryDecl(EntryDecl), DiagnosedCalls(DiagnosedCalls),
+        DeclAvailabilityChecked(DeclAvailabilityChecked) {}
 
   bool VisitCallExpr(CallExpr *CE) {
     // Set flag if already diagnosed from another entry, allowing some
@@ -325,6 +359,86 @@ class HLSLCallDiagnoseVisitor
     return true;
   }
 
+  bool VisitVarDecl(VarDecl *VD) {
+    QualType VarType = VD->getType();
+    if (const TemplateSpecializationType *TST =
+            dyn_cast<TemplateSpecializationType>(VarType.getTypePtr())) {
+      const TemplateDecl *TD = TST->getTemplateName().getAsTemplateDecl();
+      if (!TD)
+        return true;
+
+      // verify this is a rayquery decl
+      if (TD->getTemplatedDecl()->hasAttr<HLSLRayQueryObjectAttr>()) {
+        if (TST->getNumArgs() == 1) {
+          return true;
+        }
+        // now guaranteed 2 args
+        const TemplateArgument &Arg2 = TST->getArg(1);
+        Expr *Expr2 = Arg2.getAsExpr();
+        llvm::APSInt Arg2val;
+        Expr2->isIntegerConstantExpr(Arg2val, sema->getASTContext());
+
+        const ShaderModel *SM = hlsl::ShaderModel::GetByName(
+            sema->getLangOpts().HLSLProfile.c_str());
+
+        if (Arg2val.getZExtValue() != 0 && !SM->IsSMAtLeast(6, 9)) {
+          // if it's an integer literal, emit
+          // warn_hlsl_rayquery_flags_disallowed
+          if (Arg2.getKind() == TemplateArgument::Expression) {
+            if (auto *castExpr = dyn_cast<ImplicitCastExpr>(
+                    Arg2.getAsExpr()->IgnoreParens())) {
+              // Now check if the sub-expression is a DeclRefExpr
+              Expr *subExpr = castExpr->getSubExpr();
+              if (auto *IL = dyn_cast<IntegerLiteral>(subExpr))
+                sema->Diag(VD->getLocStart(),
+                           diag::warn_hlsl_rayquery_flags_disallowed);
+              return true;
+            }
+          }
+        }
+      }
+    }
+    return true;
+  }
+
+  bool VisitDeclRefExpr(DeclRefExpr *DRE) {
+    // Diagnose availability for referenced decl.
+    if (AvailabilityAttr *AAttr = GetAvailabilityAttrOnce(DRE)) {
+      NamedDecl *ND = DRE->getDecl();
+      DiagnoseAvailability(AAttr, ND, DRE->getExprLoc());
+    }
+
+    return true;
+  }
+
+  AvailabilityAttr *GetAvailabilityAttrOnce(DeclRefExpr *DRE) {
+    AvailabilityAttr *AAttr = DRE->getDecl()->getAttr<AvailabilityAttr>();
+    if (!AAttr)
+      return nullptr;
+    // Skip redundant availability diagnostics for the same Decl.
+    if (!DeclAvailabilityChecked.insert(DRE).second)
+      return nullptr;
+
+    return AAttr;
+  }
+
+  void DiagnoseAvailability(AvailabilityAttr *AAttr, NamedDecl *ND,
+                            SourceLocation Loc) {
+    VersionTuple AAttrVT = AAttr->getIntroduced();
+    VersionTuple SMVT = VersionTuple(SM->GetMajor(), SM->GetMinor());
+
+    // if the current shader model is lower than what
+    // is stated in the availability attribute, emit
+    // the availability warning.
+
+    if (SMVT < AAttrVT) {
+      // TBD: Determine best way to distinguish between builtin constant decls
+      // and other decls.
+      sema->Diag(Loc, diag::warn_hlsl_builtin_constant_unavailable)
+          << ND << SM->GetName() << AAttrVT.getAsString();
+    }
+  }
+
   clang::Sema *getSema() { return sema; }
 
 private:
@@ -334,6 +448,7 @@ class HLSLCallDiagnoseVisitor
   DXIL::NodeLaunchType NodeLaunchTy;
   const FunctionDecl *EntryDecl;
   llvm::SmallPtrSetImpl<CallExpr *> &DiagnosedCalls;
+  llvm::SmallPtrSetImpl<DeclRefExpr *> &DeclAvailabilityChecked;
 };
 
 std::optional<uint32_t>
@@ -428,18 +543,26 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
   const auto *shaderModel =
       hlsl::ShaderModel::GetByName(self->getLangOpts().HLSLProfile.c_str());
 
-  std::set<FunctionDecl *> DiagnosedDecls;
+  llvm::SmallVector<VarDecl *, 16> GlobalsWithInit;
+  GatherGlobalsWithInitializers(self->getASTContext().getTranslationUnitDecl(),
+                                GlobalsWithInit);
+
+  std::set<FunctionDecl *> DiagnosedRecursiveDecls;
   llvm::SmallPtrSet<CallExpr *, 16> DiagnosedCalls;
+  llvm::SmallPtrSet<DeclRefExpr *, 16> DeclAvailabilityChecked;
   // for each FDecl, check for recursion
   for (FunctionDecl *FDecl : FDeclsToCheck) {
     CallGraphWithRecurseGuard callGraph;
-    FunctionDecl *result = ValidateNoRecursion(callGraph, FDecl);
+    ArrayRef<VarDecl *> InitGlobals = {};
+    // if entry function, include globals with initializers.
+    if (FDecl->hasAttr<HLSLShaderAttr>())
+      InitGlobals = GlobalsWithInit;
+    FunctionDecl *result = ValidateNoRecursion(callGraph, FDecl, InitGlobals);
 
     if (result) {
       // don't emit duplicate diagnostics for the same recursive function
       // if A and B call recursive function C, only emit 1 diagnostic for C.
-      if (DiagnosedDecls.find(result) == DiagnosedDecls.end()) {
-        DiagnosedDecls.insert(result);
+      if (DiagnosedRecursiveDecls.insert(result).second) {
         self->Diag(result->getSourceRange().getBegin(),
                    diag::err_hlsl_no_recursion)
             << FDecl->getQualifiedNameAsString()
@@ -463,12 +586,12 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
     }
 
     if (pPatchFnDecl) {
-      FunctionDecl *patchResult = ValidateNoRecursion(callGraph, pPatchFnDecl);
+      FunctionDecl *patchResult =
+          ValidateNoRecursion(callGraph, pPatchFnDecl, GlobalsWithInit);
 
       // In this case, recursion was detected in the patch-constant function
       if (patchResult) {
-        if (DiagnosedDecls.find(patchResult) == DiagnosedDecls.end()) {
-          DiagnosedDecls.insert(patchResult);
+        if (DiagnosedRecursiveDecls.insert(patchResult).second) {
           self->Diag(patchResult->getSourceRange().getBegin(),
                      diag::err_hlsl_no_recursion)
               << pPatchFnDecl->getQualifiedNameAsString()
@@ -482,15 +605,12 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
       // disconnected with respect to the call graph.
       // Only check this if neither function decl is recursive
       if (!result && !patchResult) {
-        CallGraphWithRecurseGuard CG;
-        CG.BuildForEntry(pPatchFnDecl);
-        if (CG.CheckReachability(pPatchFnDecl, FDecl)) {
+        if (callGraph.CheckReachability(pPatchFnDecl, FDecl)) {
           self->Diag(FDecl->getSourceRange().getBegin(),
                      diag::err_hlsl_patch_reachability_not_allowed)
               << 1 << FDecl->getName() << 0 << pPatchFnDecl->getName();
         }
-        CG.BuildForEntry(FDecl);
-        if (CG.CheckReachability(FDecl, pPatchFnDecl)) {
+        if (callGraph.CheckReachability(FDecl, pPatchFnDecl)) {
           self->Diag(FDecl->getSourceRange().getBegin(),
                      diag::err_hlsl_patch_reachability_not_allowed)
               << 0 << pPatchFnDecl->getName() << 1 << FDecl->getName();
@@ -553,10 +673,13 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
     }
     // Visit all visited functions in call graph to collect illegal intrinsic
     // calls.
-    for (FunctionDecl *FD : callGraph.GetVisitedFunctions()) {
-      HLSLCallDiagnoseVisitor Visitor(self, shaderModel, EntrySK, NodeLaunchTy,
-                                      FDecl, DiagnosedCalls);
+    HLSLCallDiagnoseVisitor Visitor(self, shaderModel, EntrySK, NodeLaunchTy,
+                                    FDecl, DiagnosedCalls,
+                                    DeclAvailabilityChecked);
+    // Visit globals with initializers when processing entry point.
+    for (VarDecl *VD : InitGlobals)
+      Visitor.TraverseDecl(VD);
+    for (FunctionDecl *FD : callGraph.GetVisitedFunctions())
       Visitor.TraverseDecl(FD);
-    }
   }
 }
diff --git a/tools/clang/test/SemaHLSL/rayquery-ast-dump-implicit.hlsl b/tools/clang/test/SemaHLSL/rayquery-ast-dump-implicit.hlsl
new file mode 100644
index 0000000000..55b4623725
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-ast-dump-implicit.hlsl
@@ -0,0 +1,14 @@
+// RUN: %dxc -T vs_6_9 -E main -ast-dump-implicit %s | FileCheck %s
+
+float main(RayDesc rayDesc : RAYDESC) : OUT {  
+  return 0;
+}
+
+// CHECK: VarDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit RAY_FLAG_FORCE_OMM_2_STATE 'const unsigned int' static cinit
+// CHECK: IntegerLiteral 0x{{.+}} <<invalid sloc>> 'const unsigned int' 1024
+// CHECK: AvailabilityAttr 0x{{.+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+// CHECK: VarDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS 'const unsigned int' static cinit
+// CHECK: IntegerLiteral 0x{{.+}} <<invalid sloc>> 'const unsigned int' 1
+// CHECK: AvailabilityAttr 0x{{.+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
diff --git a/tools/clang/test/SemaHLSL/rayquery-ast-dump.hlsl b/tools/clang/test/SemaHLSL/rayquery-ast-dump.hlsl
new file mode 100644
index 0000000000..2ec79a060f
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-ast-dump.hlsl
@@ -0,0 +1,26 @@
+// RUN: %dxc -T vs_6_9 -E main -ast-dump %s | FileCheck %s
+
+RaytracingAccelerationStructure RTAS;
+
+
+float main(RayDesc rayDesc : RAYDESC) : OUT {
+  RayQuery<0, RAYQUERY_FLAG_NONE> rayQuery1;
+  RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS> rayQuery2;
+  rayQuery1.TraceRayInline(RTAS, 1, 2, rayDesc);
+  rayQuery2.TraceRayInline(RTAS, RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc);
+  return 0;
+}
+
+// CHECK: -DeclStmt 0x{{.+}}
+// CHECK-NEXT: `-VarDecl 0x{{.+}} used rayQuery1 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>' callinit
+// CHECK-NEXT:  `-CXXConstructExpr 0x{{.+}} 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>' 'void ()'
+// CHECK-NEXT: -DeclStmt 0x{{.+}} 
+// CHECK-NEXT: `-VarDecl 0x{{.+}} used rayQuery2 'RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS>':'RayQuery<1024, 1>' callinit
+// CHECK-NEXT:  `-CXXConstructExpr 0x{{.+}} 'RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS>':'RayQuery<1024, 1>' 'void ()'
+// CHECK-NEXT: -CXXMemberCallExpr 0x{{.+}} 'void'
+// CHECK-NEXT: -MemberExpr 0x{{.+}} '<bound member function type>' .TraceRayInline
+// CHECK-NEXT:  `-DeclRefExpr 0x{{.+}} 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>' lvalue Var 0x{{.+}} 'rayQuery1' 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>'
+
+// CHECK: -CXXMemberCallExpr 0x{{.+}} 'void'
+// CHECK-NEXT: -MemberExpr 0x{{.+}} '<bound member function type>' .TraceRayInline
+// CHECK-NEXT: `-DeclRefExpr 0x{{.+}} 'RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS>':'RayQuery<1024, 1>' lvalue Var 0x{{.+}} 'rayQuery2' 'RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS>':'RayQuery<1024, 1>'
diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-export-sm65.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-export-sm65.hlsl
new file mode 100644
index 0000000000..3e2031e0a7
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-export-sm65.hlsl
@@ -0,0 +1,11 @@
+// RUN: %dxc -T lib_6_5 -verify %s
+
+// expect no diagnostics here, since global variables
+// are not picked up through the recursive AST visitor's
+// traversal of the exported function.
+int x = RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS;
+
+export float4 MyExportedFunction(float4 color) {
+    // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_5; introduced in shader model 6.9}}
+    return color * RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS;
+}
diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65-warnings.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65-warnings.hlsl
new file mode 100644
index 0000000000..476c1a503e
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65-warnings.hlsl
@@ -0,0 +1,11 @@
+// RUN: %dxc -Wno-error-hlsl-rayquery-flags -Wno-error-hlsl-availability -T vs_6_5 -E main -verify %s
+
+RaytracingAccelerationStructure RTAS;
+void main(uint i : IDX, RayDesc rayDesc : RAYDESC) {
+
+  // expected-warning@+3{{A non-zero value for the RayQueryFlags template argument requires shader model 6.9 or above.}}
+  // expected-warning@+2{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}}
+  // expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}}
+  RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, 2> rayQuery0a;
+  
+}
diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl
new file mode 100644
index 0000000000..6904f58c7d
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl
@@ -0,0 +1,46 @@
+// RUN: %dxc -T vs_6_5 -E main -verify %s
+
+// tests that RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS usage will emit
+// one warning for each incompatible availability attribute decl,
+// when the compilation target is less than shader model 6.9.
+
+namespace MyNamespace {
+  // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}}
+  static const int badVar = RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS;
+}
+
+// expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}}
+groupshared const int otherBadVar = RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS;
+
+int retNum(){
+  // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}}
+  return RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS;
+}
+
+int retNumUncalled(){
+  // no diagnostic expected here
+  return RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS;
+}
+
+RaytracingAccelerationStructure RTAS;
+void main(uint i : IDX, RayDesc rayDesc : RAYDESC) {
+
+  int x = MyNamespace::badVar + otherBadVar + retNum();
+  RayQuery<0> rayQuery0a;
+
+  if (x > 4){
+    rayQuery0a.TraceRayInline(RTAS, 8, 2, rayDesc);
+  }
+  else{
+    rayQuery0a.TraceRayInline(RTAS, 16, 2, rayDesc);
+  }
+
+  // expected-error@+2{{A non-zero value for the RayQueryFlags template argument requires shader model 6.9 or above.}}
+  // expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}}
+  RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, 1> rayQuery0b;
+
+  // expected-warning@+2{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}}
+  // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}}
+  RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS> rayQuery0d;
+
+}
diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-sm65.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-sm65.hlsl
new file mode 100644
index 0000000000..d31d9bf289
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-sm65.hlsl
@@ -0,0 +1,22 @@
+// RUN: %dxc -T vs_6_5 -E main -verify %s
+
+// Test that at the call site of any TraceRayInline call, a default error
+// warning is emitted that indicates the ray query object has the
+// RAY_FLAG_FORCE_OMM_2_STATE set, but doesn't have 
+// RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set
+
+RaytracingAccelerationStructure RTAS;
+void main(RayDesc rayDesc : RAYDESC) : OUT {
+  // expected-note@+1 2 {{RayQueryFlags declared here}}
+  RayQuery<0> rayQuery; // implicitly, the second arg is 0.
+
+  // expected-error@+2{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}}
+  // expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}}
+  rayQuery.TraceRayInline(RTAS, RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc);
+  
+  // expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}}
+  rayQuery.TraceRayInline(RTAS, 1024, 2, rayDesc);
+
+  // expected-error@+1{{A non-zero value for the RayQueryFlags template argument requires shader model 6.9 or above.}}
+  RayQuery<0, 1> rayQueryInvalid;
+}
diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl
new file mode 100644
index 0000000000..981788a688
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl
@@ -0,0 +1,8 @@
+// RUN: %dxc -T vs_6_9 -E RayQueryTests -verify %s
+// RUN: %dxc -T vs_6_5 -E RayQueryTests2 -verify %s
+
+// validate 2nd template argument flags
+// expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}}
+typedef RayQuery<RAY_FLAG_FORCE_OMM_2_STATE> BadRayQuery;
+// expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}}
+typedef RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, 0> BadRayQuery2;

From ebc8c5cd7eb8fc191b1fd8e76ccdd388a52c85bc Mon Sep 17 00:00:00 2001
From: Lumina <starry.qvq@gmail.com>
Date: Mon, 17 Mar 2025 22:55:49 +0800
Subject: [PATCH 28/88] [NFC][Doc] Update HLSL to SPIR-V document (#7204)

This is all about updating urls and tables in docs/SPIR-V.rst. I'm
currently working on a demo & tutorial about how to work with
Vulkan+HLSL+dxc toolchain so I made this PR. Thanks a lot for your
contribution to the ecosystem, and glad to see your feedback!

Signed-off-by: lumina37 <starry.qvq@gmail.com>
---
 docs/SPIR-V.rst | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst
index c30286e4e6..072a2fe9c1 100644
--- a/docs/SPIR-V.rst
+++ b/docs/SPIR-V.rst
@@ -282,7 +282,7 @@ Right now the following ``<builtin>`` are supported:
   Need ``SPV_KHR_device_group`` extension.
 * ``ViewportMaskNV``: The GLSL equivalent is ``gl_ViewportMask``.
 
-Please see Vulkan spec. `14.6. Built-In Variables <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#interfaces-builtin-variables>`_
+Please see Vulkan spec. `15.9. Built-In Variables <https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#interfaces-builtin-variables>`_
 for detailed explanation of these builtins.
 
 Supported extensions
@@ -446,7 +446,7 @@ environment (hence SPIR-V version) and SPIR-V extension control:
 ``-fspv-target-env=`` accepts a Vulkan target environment (see ``-help`` for
 supported values). If such an option is not given, the CodeGen defaults to
 ``vulkan1.0``. When targeting ``vulkan1.0``, trying to use features that are only
-available in Vulkan 1.1 (SPIR-V 1.3), like `Shader Model 6.0 wave intrinsics`_,
+available in Vulkan 1.1 (SPIR-V 1.3), like `Shader Model 6.0 wave intrinsic <https://github.com/microsoft/directxshadercompiler/wiki/wave-intrinsics>`_,
 will trigger a compiler error.
 
 If ``-fspv-extension=`` is not specified, the CodeGen will select suitable
@@ -494,7 +494,7 @@ Specifically, we need to legalize the following HLSL source code patterns:
 Legalization transformations will not run unless the above patterns are
 encountered in the source code.
 
-For more details, please see the `SPIR-V cookbook <https://github.com/Microsoft/DirectXShaderCompiler/tree/master/docs/SPIRV-Cookbook.rst>`_,
+For more details, please see the `SPIR-V cookbook <https://github.com/Microsoft/DirectXShaderCompiler/tree/main/docs/SPIRV-Cookbook.rst>`_,
 which contains examples of what HLSL code patterns will be accepted and
 generate valid SPIR-V for Vulkan.
 
@@ -561,7 +561,7 @@ So if you want to run loop unrolling additionally after the default optimization
 recipe, you can specify ``-Oconfig=-O,--loop-unroll``.
 
 For the whole list of accepted passes and details about each one, please see
-``spirv-opt``'s help manual (``spirv-opt --help``), or the SPIRV-Tools `optimizer header file <https://github.com/KhronosGroup/SPIRV-Tools/blob/master/include/spirv-tools/optimizer.hpp>`_.
+``spirv-opt``'s help manual (``spirv-opt --help``), or the SPIRV-Tools `optimizer header file <https://github.com/KhronosGroup/SPIRV-Tools/blob/main/include/spirv-tools/optimizer.hpp>`_.
 
 Validation
 ~~~~~~~~~~
@@ -640,7 +640,7 @@ HLSL Semantic
 
 HLSL semantic strings are by default not emitted into the SPIR-V binary module.
 If you need them, by specifying ``-fspv-reflect``, the compiler will use
-the ``Op*DecorateStringGOOGLE`` instruction in `SPV_GOOGLE_hlsl_funtionality1 <https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/GOOGLE/SPV_GOOGLE_hlsl_functionality1.asciidoc>`_
+the ``Op*DecorateStringGOOGLE`` instruction in `SPV_GOOGLE_hlsl_funtionality1 <https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/GOOGLE/SPV_GOOGLE_hlsl_functionality1.asciidoc>`_
 extension to emit them.
 
 HLSL User Types
@@ -661,7 +661,7 @@ Counter buffers for RW/Append/Consume StructuredBuffer
 The association between a counter buffer and its main RW/Append/Consume
 StructuredBuffer is conveyed by ``OpDecorateId <structured-buffer-id>
 HLSLCounterBufferGOOGLE <counter-buffer-id>`` instruction from the
-`SPV_GOOGLE_hlsl_funtionality1 <https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/GOOGLE/SPV_GOOGLE_hlsl_functionality1.asciidoc>`_
+`SPV_GOOGLE_hlsl_funtionality1 <https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/GOOGLE/SPV_GOOGLE_hlsl_functionality1.asciidoc>`_
 extension. This information is by default missing; you need to specify
 ``-fspv-reflect`` to direct the compiler to emit them.
 
@@ -911,7 +911,7 @@ For example,
 
   RWTexture2D<float2> Tex2; // Works like before
 
-``rgba8`` means ``Rgba8`` `SPIR-V Image Format <https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_image_format_a_image_format>`_.
+``rgba8`` means ``Rgba8`` `SPIR-V Image Format <https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#Image_Format>`_.
 The following table lists the mapping between ``FORMAT`` of
 ``[[vk::image_format("FORMAT")]]`` and its corresponding SPIR-V Image Format.
 
@@ -994,7 +994,7 @@ Please see the following sections for the details of each type. As a summary:
 =========================== ================== ================================ ==================== =================
 
 To know more about the Vulkan buffer types, please refer to the Vulkan spec
-`13.1 Descriptor Types <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#descriptorsets-types>`_.
+`14.1 Descriptor Types <https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#descriptorsets-types>`_.
 
 Memory layout rules
 ~~~~~~~~~~~~~~~~~~~
@@ -1004,7 +1004,7 @@ right now:
 
 1. Vector-relaxed OpenGL ``std140`` for uniform buffers and vector-relaxed
    OpenGL ``std430`` for storage buffers: these rules satisfy Vulkan `"Standard
-   Uniform Buffer Layout" and "Standard Storage Buffer Layout" <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#interfaces-resources-layout>`_,
+   Uniform Buffer Layout" and "Standard Storage Buffer Layout" <https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#interfaces-resources-layout>`_,
    respectively.
    They are the default.
 2. DirectX memory layout rules for uniform buffers and storage buffers:
@@ -1027,7 +1027,7 @@ In the above, "vector-relaxed OpenGL ``std140``/``std430``" rules mean OpenGL
 alignment:
 
 1. The alignment of a vector type is set to be the alignment of its element type
-2. If the above causes an `improper straddle <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#interfaces-resources-layout>`_,
+2. If the above causes an `improper straddle <https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#interfaces-resources-layout>`_,
    the alignment will be set to 16 bytes.
 
 As an exmaple, for the following HLSL definition:
@@ -1471,7 +1471,7 @@ Without hints from the developer, the compiler will try its best to map
 semantics to ``Location`` numbers. However, there is no single rule for this
 mapping; semantic strings should be handled case by case.
 
-Firstly, under certain `SigPoints <https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#hlsl-signatures-and-semantics>`_,
+Firstly, under certain `SigPoints <https://github.com/Microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#hlsl-signatures-and-semantics>`_,
 some system-value (SV) semantic strings will be translated into SPIR-V
 ``BuiltIn`` decorations:
 
@@ -1655,7 +1655,7 @@ some system-value (SV) semantic strings will be translated into SPIR-V
 |                           +-------------+----------------------------------------+-----------------------+-----------------------------+
 |                           | MSOut       | ``PrimitiveShadingRateKHR``            | N/A                   | ``FragmentShadingRate``     |
 +---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
-| SV_CullPrimitive          | MSOut       | ``CullPrimitiveEXT``                   | N/A                   | ``MeshShadingEXT ``         |
+| SV_CullPrimitive          | MSOut       | ``CullPrimitiveEXT``                   | N/A                   | ``MeshShadingEXT``          |
 +---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
 
 
@@ -3596,8 +3596,8 @@ Mesh and Amplification Shaders
 | Amplification shaders corresponds to Task Shaders in Vulkan.
 |
 | Refer to following HLSL and SPIR-V specs for details:
-| https://docs.microsoft.com/<TBD>
-| https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/NV/SPV_NV_mesh_shader.asciidoc
+| https://microsoft.github.io/DirectX-Specs/d3d/MeshShader.html
+| https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_mesh_shader.asciidoc
 |
 | This section describes how Mesh and Amplification shaders are translated to SPIR-V for Vulkan.
 
@@ -3704,8 +3704,8 @@ Raytracing in Vulkan and SPIRV
 | SPIR-V codegen is currently supported for NVIDIA platforms via SPV_NV_ray_tracing extension or
 | on other platforms via provisional cross vendor SPV_KHR_ray_tracing extension.
 | SPIR-V specification for reference:
-| https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/NV/SPV_NV_ray_tracing.asciidoc
-| https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/KHR/SPV_KHR_ray_tracing.asciidoc
+| https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_ray_tracing.asciidoc
+| https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/KHR/SPV_KHR_ray_tracing.asciidoc
 
 | Vulkan ray tracing samples:
 | https://developer.nvidia.com/rtx/raytracing/vkray
@@ -3868,7 +3868,7 @@ Ray Query in SPIRV
 ~~~~~~~~~~~~~~~~~~
 RayQuery SPIR-V codegen is currently supported via SPV_KHR_ray_query extension
 SPIR-V specification for reference:
-https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/KHR/SPV_KHR_ray_query.asciidoc
+https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/KHR/SPV_KHR_ray_query.asciidoc
 
 Object Type
 ~~~~~~~~~~~
@@ -4081,7 +4081,7 @@ This intrinsic funcion has the following signature:
 
   uint64_t ReadClock(in uint scope);
 
-It translates to performing ``OpReadClockKHR`` defined in `VK_KHR_shader_clock <https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_KHR_shader_clock.html>`_.
+It translates to performing ``OpReadClockKHR`` defined in `VK_KHR_shader_clock <https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_clock.html>`_.
 One can use the predefined scopes in the ``vk`` namepsace to specify the scope argument.
 For example:
 
@@ -4091,11 +4091,11 @@ For example:
 
 RawBufferLoad and RawBufferStore
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The Vulkan extension `VK_KHR_buffer_device_address <https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_KHR_buffer_device_address.html>`_
+The Vulkan extension `VK_KHR_buffer_device_address <https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_buffer_device_address.html>`_
 supports getting the 64-bit address of a buffer and passing it to SPIR-V as a
 Uniform buffer. SPIR-V can use the address to load and store data without a descriptor.
 We add the following intrinsic functions to expose a subset of the
-`VK_KHR_buffer_device_address <https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_KHR_buffer_device_address.html>`_
+`VK_KHR_buffer_device_address <https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_buffer_device_address.html>`_
 and `SPV_KHR_physical_storage_buffer <https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/KHR/SPV_KHR_physical_storage_buffer.asciidoc>`_
 functionality to HLSL:
 

From 909c552458acdcfeddf077e790d419a15ca8a3b6 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 17 Mar 2025 08:57:54 -0700
Subject: [PATCH 29/88] Create new raw buffer load lowering function (#7144)

Disentangles the raw, structured, and typed buffer lowering implementations into an isolated
function. Alters the various places that lowering took place to call
into the common function. The Load lowering takes place in a few phases now. The basic information about the load is gathered as part of the ResLoadHelper constructor. One variant extracts most of this information from a call instruction. The other sets a lot of things such as offsets more explicitly, usually for subscripted or matrix loads. The helper is used to assemble call instruction arguments appropriate for the call. The call is issued possibly repeatedly for raw buffers of types greater than 4 elements. The results are then packaged and converted from memory storage type into a vector of register types.

When raw buffers use a templated load with a struct, they reuse the
subscript path also used for subscripted structured buffers. Such loads
with structs containing vectors or matrices will invoke the load
lowering from within this recursive call that traverses GEPs and other
users of the original call to set up correct offsets etc.

This adapts that code to use the common load lowering that enables long
vectors within structs to be correctly loaded.

This requires the ability to override the type used by the resloadhelper
explicitly, so a member is added to accommodate the matrices vector
representation that doesn't match the types of the load call.

This also requires removing the bufIdx and offset swapping that was
done, confusingly throughout the TranslateStructBufSubscriptUser code to
account for the fact that byte address buffers have to represent offsets
using the main coord parameter in favor of passing the Resource Kind
down such that the right parameter can receive the incrementation when
necessary for longer types such as matrices. This is enabled also by
adding ResKind appropriate offset calculation in the ResLoadHelper.
ResLoadHelper also gets an opcode set based on the ResKind for both
overloads in preparation for further expansion to different resource
kinds.

Adds filecheck, verify, and IR pass tests.

Lays groundwork for #7118
---
 lib/HLSL/HLOperationLower.cpp                 | 765 ++++++++----------
 .../intrinsics/buffer-agg-load-stores.hlsl    |  36 +-
 .../buffer-load-stores-scalars.hlsl           | 162 ++++
 .../hlsl/intrinsics/buffer-load-stores.hlsl   | 189 ++++-
 .../hlsl/intrinsics/buffer-load.hlsl          | 152 ++++
 .../hlsl/intrinsics/buffer-load.ll            | 404 +++++++++
 .../hlsl/intrinsics/buffer-typed-load.hlsl    | 112 +++
 .../hlsl/intrinsics/buffer-typed-load.ll      | 346 ++++++++
 8 files changed, 1722 insertions(+), 444 deletions(-)
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.ll
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.ll

diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index bc293357d6..9c3ad76b92 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -2985,23 +2985,6 @@ static Value *ScalarizeResRet(Type *RetTy, Value *ResRet,
   return retVal;
 }
 
-static Value *ScalarizeElements(Type *RetTy, ArrayRef<Value *> Elts,
-                                IRBuilder<> &Builder) {
-  // Extract value part.
-  Value *retVal = llvm::UndefValue::get(RetTy);
-  if (RetTy->isVectorTy()) {
-    unsigned vecSize = RetTy->getVectorNumElements();
-    DXASSERT(vecSize <= Elts.size(), "vector size mismatch");
-    for (unsigned i = 0; i < vecSize; i++) {
-      Value *retComp = Elts[i];
-      retVal = Builder.CreateInsertElement(retVal, retComp, i);
-    }
-  } else {
-    retVal = Elts[0];
-  }
-  return retVal;
-}
-
 void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder,
                   hlsl::OP *hlslOp) {
   if (status && !isa<UndefValue>(status)) {
@@ -3941,14 +3924,36 @@ TranslateWriteSamplerFeedback(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 }
 
 // Load/Store intrinsics.
+OP::OpCode LoadOpFromResKind(DxilResource::Kind RK) {
+  switch (RK) {
+  case DxilResource::Kind::RawBuffer:
+  case DxilResource::Kind::StructuredBuffer:
+    return OP::OpCode::RawBufferLoad;
+  case DxilResource::Kind::TypedBuffer:
+    return OP::OpCode::BufferLoad;
+  case DxilResource::Kind::Invalid:
+    DXASSERT(0, "invalid resource kind");
+    break;
+  default:
+    return OP::OpCode::TextureLoad;
+  }
+  return OP::OpCode::TextureLoad;
+}
+
 struct ResLoadHelper {
+  // Default constructor uses CI load intrinsic call
+  //  to get the retval and various location indicators.
   ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
-                Value *h, IntrinsicOp IOP, bool bForSubscript = false);
-  // For double subscript.
-  ResLoadHelper(Instruction *ldInst, Value *h, Value *idx, Value *mip)
-      : opcode(OP::OpCode::TextureLoad),
-        intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(ldInst),
-        addr(idx), offset(nullptr), status(nullptr), mipLevel(mip) {}
+                Value *h, IntrinsicOp IOP, LoadInst *TyBufSubLoad = nullptr);
+  // Alternative constructor explicitly sets the index.
+  // Used for some subscript operators that feed the generic HL call inst
+  // into a load op and by the matrixload call instruction.
+  ResLoadHelper(Instruction *Inst, DxilResource::Kind RK, Value *h, Value *idx,
+                Value *Offset, Value *mip = nullptr)
+      : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst),
+        addr(idx), offset(Offset), status(nullptr), mipLevel(mip) {
+    opcode = LoadOpFromResKind(RK);
+  }
   OP::OpCode opcode;
   IntrinsicOp intrinsicOpCode;
   unsigned dxilMajor;
@@ -3961,122 +3966,85 @@ struct ResLoadHelper {
   Value *mipLevel;
 };
 
+// Uses CI arguments to determine the index, offset, and mipLevel also depending
+// on the RK/RC resource kind and class, which determine the opcode.
+// Handle and IOP are set explicitly.
+// For typed buffer loads, the call instruction feeds into a load
+// represented by TyBufSubLoad which determines the instruction to replace.
+// Otherwise, CI is replaced.
 ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
                              DxilResourceBase::Class RC, Value *hdl,
-                             IntrinsicOp IOP, bool bForSubscript)
+                             IntrinsicOp IOP, LoadInst *TyBufSubLoad)
     : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) {
-  switch (RK) {
-  case DxilResource::Kind::RawBuffer:
-  case DxilResource::Kind::StructuredBuffer:
-    opcode = OP::OpCode::RawBufferLoad;
-    break;
-  case DxilResource::Kind::TypedBuffer:
-    opcode = OP::OpCode::BufferLoad;
-    break;
-  case DxilResource::Kind::Invalid:
-    DXASSERT(0, "invalid resource kind");
-    break;
-  default:
-    opcode = OP::OpCode::TextureLoad;
-    break;
-  }
-  retVal = CI;
+  opcode = LoadOpFromResKind(RK);
+  bool bForSubscript = false;
+  if (TyBufSubLoad) {
+    bForSubscript = true;
+    retVal = TyBufSubLoad;
+  } else
+    retVal = CI;
   const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx;
   addr = CI->getArgOperand(kAddrIdx);
   unsigned argc = CI->getNumArgOperands();
+  Type *i32Ty = Type::getInt32Ty(CI->getContext());
+  unsigned StatusIdx = HLOperandIndex::kBufLoadStatusOpIdx;
+  unsigned OffsetIdx = HLOperandIndex::kInvalidIdx;
 
   if (opcode == OP::OpCode::TextureLoad) {
-    // mip at last channel
-    unsigned coordSize = DxilResource::GetNumCoords(RK);
-
-    if (RC == DxilResourceBase::Class::SRV) {
-      if (bForSubscript) {
-        // Use 0 when access by [].
-        mipLevel = IRBuilder<>(CI).getInt32(0);
-      } else {
-        if (coordSize == 1 && !addr->getType()->isVectorTy()) {
-          // Use addr when access by Load.
-          mipLevel = addr;
-        } else {
-          mipLevel = IRBuilder<>(CI).CreateExtractElement(addr, coordSize);
-        }
-      }
-    } else {
-      // Set mip level to undef for UAV.
-      mipLevel = UndefValue::get(Type::getInt32Ty(addr->getContext()));
-    }
-
-    if (RC == DxilResourceBase::Class::SRV) {
-      unsigned offsetIdx = HLOperandIndex::kTexLoadOffsetOpIdx;
-      unsigned statusIdx = HLOperandIndex::kTexLoadStatusOpIdx;
-      if (RK == DxilResource::Kind::Texture2DMS ||
-          RK == DxilResource::Kind::Texture2DMSArray) {
-        offsetIdx = HLOperandIndex::kTex2DMSLoadOffsetOpIdx;
-        statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx;
+    bool IsMS = (RK == DxilResource::Kind::Texture2DMS ||
+                 RK == DxilResource::Kind::Texture2DMSArray);
+    // Set mip and status index.
+    offset = UndefValue::get(i32Ty);
+    if (IsMS) {
+      // Retrieve appropriate MS parameters.
+      StatusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx;
+      // MS textures keep the sample param (mipLevel) regardless of writability.
+      if (bForSubscript)
+        mipLevel = ConstantInt::get(i32Ty, 0);
+      else
         mipLevel =
             CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx);
-      }
-
-      if (argc > offsetIdx)
-        offset = CI->getArgOperand(offsetIdx);
-
-      if (argc > statusIdx)
-        status = CI->getArgOperand(statusIdx);
-    } else if (RC == DxilResourceBase::Class::UAV &&
-               (RK == DxilResource::Kind::Texture2DMS ||
-                RK == DxilResource::Kind::Texture2DMSArray)) {
-      unsigned statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx;
-      mipLevel = CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx);
-
-      if (argc > statusIdx)
-        status = CI->getArgOperand(statusIdx);
-
+    } else if (RC == DxilResourceBase::Class::UAV) {
+      // DXIL requires that non-MS UAV accesses set miplevel to undef.
+      mipLevel = UndefValue::get(i32Ty);
+      StatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx;
     } else {
-      const unsigned kStatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx;
-
-      if (argc > kStatusIdx)
-        status = CI->getArgOperand(kStatusIdx);
+      // Non-MS SRV case.
+      StatusIdx = HLOperandIndex::kTexLoadStatusOpIdx;
+      if (bForSubscript)
+        // Having no miplevel param, single subscripted SRVs default to 0.
+        mipLevel = ConstantInt::get(i32Ty, 0);
+      else
+        // Mip is stored at the last channel of the coordinate vector.
+        mipLevel = IRBuilder<>(CI).CreateExtractElement(
+            addr, DxilResource::GetNumCoords(RK));
     }
-  } else {
-    const unsigned kStatusIdx = HLOperandIndex::kBufLoadStatusOpIdx;
-    if (argc > kStatusIdx)
-      status = CI->getArgOperand(kStatusIdx);
-  }
+    if (RC == DxilResourceBase::Class::SRV)
+      OffsetIdx = IsMS ? HLOperandIndex::kTex2DMSLoadOffsetOpIdx
+                       : HLOperandIndex::kTexLoadOffsetOpIdx;
+  }
+
+  // Set offset.
+  if (DXIL::IsStructuredBuffer(RK))
+    // Structured buffers receive no exterior offset in this constructor,
+    // but may need to increment it later.
+    offset = ConstantInt::get(i32Ty, 0U);
+  else if (argc > OffsetIdx)
+    // Textures may set the offset from an explicit argument.
+    offset = CI->getArgOperand(OffsetIdx);
+  else
+    // All other cases use undef.
+    offset = UndefValue::get(i32Ty);
+
+  // Retrieve status value if provided.
+  if (argc > StatusIdx)
+    status = CI->getArgOperand(StatusIdx);
 }
 
 void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
                                  hlsl::OP *OP, HLResource::Kind RK,
                                  const DataLayout &DL);
 
-// Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi }
-void Make64bitResultForLoad(Type *EltTy, ArrayRef<Value *> resultElts32,
-                            unsigned size, MutableArrayRef<Value *> resultElts,
-                            hlsl::OP *hlslOP, IRBuilder<> &Builder) {
-  Type *i64Ty = Builder.getInt64Ty();
-  Type *doubleTy = Builder.getDoubleTy();
-  if (EltTy == doubleTy) {
-    Function *makeDouble =
-        hlslOP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy);
-    Value *makeDoubleOpArg =
-        Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
-    for (unsigned i = 0; i < size; i++) {
-      Value *lo = resultElts32[2 * i];
-      Value *hi = resultElts32[2 * i + 1];
-      Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
-      resultElts[i] = V;
-    }
-  } else {
-    for (unsigned i = 0; i < size; i++) {
-      Value *lo = resultElts32[2 * i];
-      Value *hi = resultElts32[2 * i + 1];
-      lo = Builder.CreateZExt(lo, i64Ty);
-      hi = Builder.CreateZExt(hi, i64Ty);
-      hi = Builder.CreateShl(hi, 32);
-      resultElts[i] = Builder.CreateOr(lo, hi);
-    }
-  }
-}
-
 static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents,
                                         hlsl::OP *OP) {
   unsigned mask = 0;
@@ -4108,183 +4076,194 @@ Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset,
                         IRBuilder<> &Builder, unsigned NumComponents,
                         Constant *alignment);
 
-static Value *TranslateRawBufVecLd(Type *VecEltTy, unsigned VecElemCount,
-                                   IRBuilder<> &Builder, Value *handle,
-                                   hlsl::OP *OP, Value *status, Value *bufIdx,
-                                   Value *baseOffset, const DataLayout &DL,
-                                   std::vector<Value *> &bufLds,
-                                   unsigned baseAlign, bool isScalarTy = false);
-
-void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
-                   IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) {
-
-  Type *Ty = helper.retVal->getType();
-  if (Ty->isPointerTy()) {
-    DXASSERT(!DxilResource::IsAnyTexture(RK),
-             "Textures should not be treated as structured buffers.");
-    TranslateStructBufSubscript(cast<CallInst>(helper.retVal), helper.handle,
-                                helper.status, OP, RK, DL);
-    return;
-  }
-
+// Sets up arguments for buffer load call.
+static SmallVector<Value *, 10> GetBufLoadArgs(ResLoadHelper helper,
+                                               HLResource::Kind RK,
+                                               IRBuilder<> Builder, Type *EltTy,
+                                               unsigned LdSize) {
   OP::OpCode opcode = helper.opcode;
+  llvm::Constant *opArg = Builder.getInt32((uint32_t)opcode);
 
-  Type *i32Ty = Builder.getInt32Ty();
-  Type *i64Ty = Builder.getInt64Ty();
-  Type *doubleTy = Builder.getDoubleTy();
-  Type *EltTy = Ty->getScalarType();
-  unsigned numComponents = 1;
-  if (Ty->isVectorTy()) {
-    numComponents = Ty->getVectorNumElements();
-  }
-
-  if (DXIL::IsStructuredBuffer(RK) || DXIL::IsRawBuffer(RK)) {
-    std::vector<Value *> bufLds;
-    const bool isBool = EltTy->isIntegerTy(1);
+  unsigned alignment = RK == DxilResource::Kind::RawBuffer ? 4U : 8U;
+  alignment = std::min(alignment, LdSize);
+  Constant *alignmentVal = Builder.getInt32(alignment);
 
-    // Bool are represented as i32 in memory
-    Type *MemReprTy = isBool ? Builder.getInt32Ty() : EltTy;
-    bool isScalarTy = !Ty->isVectorTy();
-
-    Value *retValNew = nullptr;
-    if (DXIL::IsStructuredBuffer(RK)) {
-      retValNew = TranslateRawBufVecLd(
-          MemReprTy, numComponents, Builder, helper.handle, OP, helper.status,
-          helper.addr, OP->GetU32Const(0), DL, bufLds,
-          /*baseAlign (in bytes)*/ 8, isScalarTy);
-    } else {
-      retValNew =
-          TranslateRawBufVecLd(MemReprTy, numComponents, Builder, helper.handle,
-                               OP, helper.status, nullptr, helper.addr, DL,
-                               bufLds, /*baseAlign (in bytes)*/ 4, isScalarTy);
-    }
+  // Assemble args specific to the type bab/struct/typed:
+  // - Typed needs to handle the possibility of vector coords
+  // - Raws need to calculate alignment and mask values.
+  SmallVector<Value *, 10> Args;
+  Args.emplace_back(opArg);         // opcode @0.
+  Args.emplace_back(helper.handle); // Resource handle @1
 
-    DXASSERT_NOMSG(!bufLds.empty());
-    dxilutil::MigrateDebugValue(helper.retVal, bufLds.front());
+  // Set offsets appropriate for the load operation.
+  bool isVectorAddr = helper.addr->getType()->isVectorTy();
+  if (opcode == OP::OpCode::TextureLoad) {
+    llvm::Value *undefI = llvm::UndefValue::get(Builder.getInt32Ty());
 
-    if (isBool) {
-      // Convert result back to register representation.
-      retValNew = Builder.CreateICmpNE(
-          retValNew, Constant::getNullValue(retValNew->getType()));
+    // Set mip level or sample for MS texutures @2.
+    Args.emplace_back(helper.mipLevel);
+    // Set texture coords according to resource kind @3-5
+    // Coords unused by the resource kind are undefs.
+    unsigned coordSize = DxilResource::GetNumCoords(RK);
+    for (unsigned i = 0; i < 3; i++)
+      if (i < coordSize)
+        Args.emplace_back(isVectorAddr
+                              ? Builder.CreateExtractElement(helper.addr, i)
+                              : helper.addr);
+      else
+        Args.emplace_back(undefI);
+
+    // Set texture offsets according to resource kind @7-9
+    // Coords unused by the resource kind are undefs.
+    unsigned offsetSize = DxilResource::GetNumOffsets(RK);
+    if (!helper.offset || isa<llvm::UndefValue>(helper.offset))
+      offsetSize = 0;
+    for (unsigned i = 0; i < 3; i++)
+      if (i < offsetSize)
+        Args.emplace_back(Builder.CreateExtractElement(helper.offset, i));
+      else
+        Args.emplace_back(undefI);
+  } else {
+    // If not TextureLoad, it could be a typed or raw buffer load.
+    // They have mostly similar arguments.
+    DXASSERT(opcode == OP::OpCode::RawBufferLoad ||
+                 opcode == OP::OpCode::BufferLoad,
+             "Wrong opcode in get load args");
+    Args.emplace_back(
+        isVectorAddr ? Builder.CreateExtractElement(helper.addr, (uint64_t)0)
+                     : helper.addr);
+    Args.emplace_back(helper.offset);
+    if (opcode == OP::OpCode::RawBufferLoad) {
+      // Unlike typed buffer load, raw buffer load has mask and alignment.
+      Args.emplace_back(nullptr);      // Mask will be added later %4.
+      Args.emplace_back(alignmentVal); // alignment @5.
     }
-
-    helper.retVal->replaceAllUsesWith(retValNew);
-    helper.retVal = retValNew;
-    return;
-  }
-
-  bool isTyped = opcode == OP::OpCode::TextureLoad ||
-                 RK == DxilResource::Kind::TypedBuffer;
-  bool is64 = EltTy == i64Ty || EltTy == doubleTy;
-  if (is64 && isTyped) {
-    EltTy = i32Ty;
-  }
-  bool isBool = EltTy->isIntegerTy(1);
-  if (isBool) {
-    // Value will be loaded in its memory representation.
-    EltTy = i32Ty;
-    if (Ty->isVectorTy())
-      Ty = VectorType::get(EltTy, numComponents);
   }
+  return Args;
+}
 
-  Function *F = OP->GetOpFunc(opcode, EltTy);
-  llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
-
-  llvm::Value *undefI = llvm::UndefValue::get(i32Ty);
+// Emits as many calls as needed to load the full vector
+// Performs any needed extractions and conversions of the results.
+Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
+                        IRBuilder<> &Builder, hlsl::OP *OP,
+                        const DataLayout &DL) {
+  OP::OpCode opcode = helper.opcode;
+  Type *Ty = helper.retVal->getType();
 
-  SmallVector<Value *, 12> loadArgs;
-  loadArgs.emplace_back(opArg);         // opcode
-  loadArgs.emplace_back(helper.handle); // resource handle
+  unsigned NumComponents = 1;
+  if (Ty->isVectorTy())
+    NumComponents = Ty->getVectorNumElements();
 
-  if (opcode == OP::OpCode::TextureLoad) {
-    // set mip level
-    loadArgs.emplace_back(helper.mipLevel);
-  }
+  const bool isTyped = DXIL::IsTyped(RK);
+  Type *EltTy = Ty->getScalarType();
+  const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy());
+  const bool isBool = EltTy->isIntegerTy(1);
+  // Values will be loaded in memory representations.
+  if (isBool || (is64 && isTyped))
+    EltTy = Builder.getInt32Ty();
 
-  if (opcode == OP::OpCode::TextureLoad) {
-    // texture coord
-    unsigned coordSize = DxilResource::GetNumCoords(RK);
-    bool isVectorAddr = helper.addr->getType()->isVectorTy();
-    for (unsigned i = 0; i < 3; i++) {
-      if (i < coordSize) {
-        loadArgs.emplace_back(isVectorAddr
-                                  ? Builder.CreateExtractElement(helper.addr, i)
-                                  : helper.addr);
-      } else
-        loadArgs.emplace_back(undefI);
+  // 64-bit types are stored as int32 pairs in typed buffers.
+  if (is64 && isTyped) {
+    DXASSERT(NumComponents <= 2, "Typed buffers only allow 4 dwords.");
+    NumComponents *= 2;
+  }
+
+  unsigned LdSize = DL.getTypeAllocSize(EltTy);
+
+  SmallVector<Value *, 4> Elts(NumComponents);
+
+  SmallVector<Value *, 10> Args =
+      GetBufLoadArgs(helper, RK, Builder, EltTy, LdSize);
+
+  // Keep track of the first load for debug info migration.
+  Value *FirstLd = nullptr;
+
+  unsigned OffsetIdx = 0;
+  if (RK == DxilResource::Kind::RawBuffer)
+    // Raw buffers can't use offset param. Add to coord index.
+    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx;
+  else if (RK == DxilResource::Kind::StructuredBuffer)
+    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx;
+
+  // Create calls to function object.
+  // Typed buffer loads are limited to one load of up to 4 32-bit values.
+  // Raw buffer loads might need multiple loads in chunks of 4.
+  for (unsigned i = 0; i < NumComponents;) {
+    // Load 4 elements or however many less than 4 are left to load.
+    unsigned chunkSize = std::min(NumComponents - i, 4U);
+
+    // Assign mask for raw buffer loads.
+    if (opcode == OP::OpCode::RawBufferLoad) {
+      Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
+          GetRawBufferMaskForETy(EltTy, chunkSize, OP);
+      // If we've loaded a chunk already, update offset to next chunk.
+      if (FirstLd != nullptr && opcode == OP::OpCode::RawBufferLoad)
+        Args[OffsetIdx] =
+            Builder.CreateAdd(Args[OffsetIdx], OP->GetU32Const(4 * LdSize));
     }
-  } else {
-    if (helper.addr->getType()->isVectorTy()) {
-      Value *scalarOffset =
-          Builder.CreateExtractElement(helper.addr, (uint64_t)0);
-
-      // TODO: calculate the real address based on opcode
 
-      loadArgs.emplace_back(scalarOffset); // offset
-    } else {
-      // TODO: calculate the real address based on opcode
-
-      loadArgs.emplace_back(helper.addr); // offset
-    }
-  }
-  // offset 0
-  if (opcode == OP::OpCode::TextureLoad) {
-    if (helper.offset && !isa<llvm::UndefValue>(helper.offset)) {
-      unsigned offsetSize = DxilResource::GetNumOffsets(RK);
-      for (unsigned i = 0; i < 3; i++) {
-        if (i < offsetSize)
-          loadArgs.emplace_back(Builder.CreateExtractElement(helper.offset, i));
-        else
-          loadArgs.emplace_back(undefI);
+    Function *F = OP->GetOpFunc(opcode, EltTy);
+    Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode));
+
+    // Extract elements from returned ResRet.
+    for (unsigned j = 0; j < chunkSize; j++, i++)
+      Elts[i] = Builder.CreateExtractValue(Ld, j);
+
+    // Update status.
+    UpdateStatus(Ld, helper.status, Builder, OP);
+
+    if (!FirstLd)
+      FirstLd = Ld;
+  }
+  DXASSERT(FirstLd, "No loads created by TranslateBufLoad");
+
+  // Convert loaded 32-bit integers to intended 64-bit type representation.
+  if (isTyped) {
+    Type *RegEltTy = Ty->getScalarType();
+    if (RegEltTy->isDoubleTy()) {
+      Function *makeDouble = OP->GetOpFunc(DXIL::OpCode::MakeDouble, RegEltTy);
+      Value *makeDoubleOpArg =
+          Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
+      NumComponents /= 2; // Convert back to number of doubles.
+      for (unsigned i = 0; i < NumComponents; i++) {
+        Value *lo = Elts[2 * i];
+        Value *hi = Elts[2 * i + 1];
+        Elts[i] = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
       }
-    } else {
-      loadArgs.emplace_back(undefI);
-      loadArgs.emplace_back(undefI);
-      loadArgs.emplace_back(undefI);
+      EltTy = RegEltTy;
+    } else if (RegEltTy->isIntegerTy(64)) {
+      NumComponents /= 2; // Convert back to number of int64s.
+      for (unsigned i = 0; i < NumComponents; i++) {
+        Value *lo = Elts[2 * i];
+        Value *hi = Elts[2 * i + 1];
+        lo = Builder.CreateZExt(lo, RegEltTy);
+        hi = Builder.CreateZExt(hi, RegEltTy);
+        hi = Builder.CreateShl(hi, 32);
+        Elts[i] = Builder.CreateOr(lo, hi);
+      }
+      EltTy = RegEltTy;
     }
   }
 
-  // Offset 1
-  if (RK == DxilResource::Kind::TypedBuffer) {
-    loadArgs.emplace_back(undefI);
-  }
-
-  Value *ResRet = Builder.CreateCall(F, loadArgs, OP->GetOpCodeName(opcode));
-  dxilutil::MigrateDebugValue(helper.retVal, ResRet);
-
+  // Package elements into a vector.
   Value *retValNew = nullptr;
-  if (!is64 || !isTyped) {
-    retValNew = ScalarizeResRet(Ty, ResRet, Builder);
+  if (!Ty->isVectorTy()) {
+    retValNew = Elts[0];
   } else {
-    unsigned size = numComponents;
-    DXASSERT(size <= 2, "typed buffer only allow 4 dwords");
-    EltTy = Ty->getScalarType();
-    Value *Elts[2];
-
-    Make64bitResultForLoad(Ty->getScalarType(),
-                           {
-                               Builder.CreateExtractValue(ResRet, 0),
-                               Builder.CreateExtractValue(ResRet, 1),
-                               Builder.CreateExtractValue(ResRet, 2),
-                               Builder.CreateExtractValue(ResRet, 3),
-                           },
-                           size, Elts, OP, Builder);
-
-    retValNew = ScalarizeElements(Ty, Elts, Builder);
+    retValNew = UndefValue::get(VectorType::get(EltTy, NumComponents));
+    for (unsigned i = 0; i < NumComponents; i++)
+      retValNew = Builder.CreateInsertElement(retValNew, Elts[i], i);
   }
 
-  if (isBool) {
-    // Convert result back to register representation.
+  // Convert loaded int32 bool results to i1 register representation.
+  if (isBool)
     retValNew = Builder.CreateICmpNE(
         retValNew, Constant::getNullValue(retValNew->getType()));
-  }
 
-  // replace
   helper.retVal->replaceAllUsesWith(retValNew);
-  // Save new ret val.
   helper.retVal = retValNew;
-  // get status
-  UpdateStatus(ResRet, helper.status, Builder, OP);
+
+  return FirstLd;
 }
 
 Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -4292,6 +4271,7 @@ Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                              HLObjectOperationLowerHelper *pObjHelper,
                              bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
+  DataLayout &DL = helper.dataLayout;
   Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
 
   IRBuilder<> Builder(CI);
@@ -4299,9 +4279,19 @@ Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
   DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
 
-  ResLoadHelper loadHelper(CI, RK, RC, handle, IOP);
-  TranslateLoad(loadHelper, RK, Builder, hlslOP, helper.dataLayout);
-  // CI is replaced in TranslateLoad.
+  ResLoadHelper ldHelper(CI, RK, RC, handle, IOP);
+  Type *Ty = CI->getType();
+  Value *Ld = nullptr;
+  if (Ty->isPointerTy()) {
+    DXASSERT(!DxilResource::IsAnyTexture(RK),
+             "Textures should not be treated as structured buffers.");
+    TranslateStructBufSubscript(cast<CallInst>(ldHelper.retVal), handle,
+                                ldHelper.status, hlslOP, RK, DL);
+  } else {
+    Ld = TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL);
+    dxilutil::MigrateDebugValue(CI, Ld);
+  }
+  // CI is replaced by above translation calls..
   return nullptr;
 }
 
@@ -7887,69 +7877,21 @@ void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
   Builder.CreateCall(dxilF, Args);
 }
 
-static Value *TranslateRawBufVecLd(Type *VecEltTy, unsigned ElemCount,
-                                   IRBuilder<> &Builder, Value *handle,
-                                   hlsl::OP *OP, Value *status, Value *bufIdx,
-                                   Value *baseOffset, const DataLayout &DL,
-                                   std::vector<Value *> &bufLds,
-                                   unsigned baseAlign, bool isScalarTy) {
-
-  unsigned EltSize = DL.getTypeAllocSize(VecEltTy);
-  unsigned alignment = std::min(baseAlign, EltSize);
-  Constant *alignmentVal = OP->GetI32Const(alignment);
-
-  if (baseOffset == nullptr) {
-    baseOffset = OP->GetU32Const(0);
-  }
-
-  std::vector<Value *> elts(ElemCount);
-  unsigned rest = (ElemCount % 4);
-  for (unsigned i = 0; i < ElemCount - rest; i += 4) {
-    Value *ResultElts[4];
-    Value *bufLd =
-        GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy,
-                         ResultElts, OP, Builder, 4, alignmentVal);
-    bufLds.emplace_back(bufLd);
-    elts[i] = ResultElts[0];
-    elts[i + 1] = ResultElts[1];
-    elts[i + 2] = ResultElts[2];
-    elts[i + 3] = ResultElts[3];
-
-    baseOffset = Builder.CreateAdd(baseOffset, OP->GetU32Const(4 * EltSize));
-  }
-
-  if (rest) {
-    Value *ResultElts[4];
-    Value *bufLd =
-        GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy,
-                         ResultElts, OP, Builder, rest, alignmentVal);
-    bufLds.emplace_back(bufLd);
-    for (unsigned i = 0; i < rest; i++)
-      elts[ElemCount - rest + i] = ResultElts[i];
-  }
-
-  // If the expected return type is scalar then skip building a vector
-  if (isScalarTy) {
-    return elts[0];
-  }
-
-  Value *Vec = HLMatrixLower::BuildVector(VecEltTy, elts, Builder);
-  return Vec;
-}
-
-Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
-                               Value *handle, hlsl::OP *OP, Value *status,
-                               Value *bufIdx, Value *baseOffset,
+Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder,
+                               Value *handle, HLResource::Kind RK, hlsl::OP *OP,
+                               Value *status, Value *bufIdx, Value *baseOffset,
                                const DataLayout &DL) {
+
+  ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset);
+#ifndef NDEBUG
+  Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
+  Type *matType = ptr->getType()->getPointerElementType();
   HLMatrixType MatTy = HLMatrixType::cast(matType);
-  Type *EltTy = MatTy.getElementTypeForMem();
-  unsigned matSize = MatTy.getNumElements();
-  std::vector<Value *> bufLds;
-  Value *Vec =
-      TranslateRawBufVecLd(EltTy, matSize, Builder, handle, OP, status, bufIdx,
-                           baseOffset, DL, bufLds, /*baseAlign (in bytes)*/ 8);
-  Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
-  return Vec;
+  DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) ==
+               helper.retVal->getType(),
+           "helper type should match vectorized matrix");
+#endif
+  return TranslateBufLoad(helper, RK, Builder, OP, DL);
 }
 
 void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
@@ -7991,9 +7933,9 @@ void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
   }
 }
 
-void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP,
-                               Value *status, Value *bufIdx, Value *baseOffset,
-                               const DataLayout &DL) {
+void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, HLResource::Kind RK,
+                               hlsl::OP *OP, Value *status, Value *bufIdx,
+                               Value *baseOffset, const DataLayout &DL) {
   IRBuilder<> Builder(CI);
   HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
   unsigned opcode = GetHLOpcode(CI);
@@ -8006,13 +7948,10 @@ void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP,
   // orientation.
   switch (matOp) {
   case HLMatLoadStoreOpcode::RowMatLoad:
-  case HLMatLoadStoreOpcode::ColMatLoad: {
-    Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
-    Value *NewLd = TranslateStructBufMatLd(
-        ptr->getType()->getPointerElementType(), Builder, handle, OP, status,
-        bufIdx, baseOffset, DL);
-    CI->replaceAllUsesWith(NewLd);
-  } break;
+  case HLMatLoadStoreOpcode::ColMatLoad:
+    TranslateStructBufMatLd(CI, Builder, handle, RK, OP, status, bufIdx,
+                            baseOffset, DL);
+    break;
   case HLMatLoadStoreOpcode::RowMatStore:
   case HLMatLoadStoreOpcode::ColMatStore: {
     Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
@@ -8283,57 +8222,47 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
       }
       userCall->eraseFromParent();
     } else if (group == HLOpcodeGroup::HLMatLoadStore)
-      TranslateStructBufMatLdSt(userCall, handle, OP, status, bufIdx,
+      // Load/Store matrix within a struct
+      TranslateStructBufMatLdSt(userCall, handle, ResKind, OP, status, bufIdx,
                                 baseOffset, DL);
     else if (group == HLOpcodeGroup::HLSubscript) {
+      // Subscript of matrix within a struct
       TranslateStructBufMatSubscript(userCall, handle, ResKind, bufIdx,
                                      baseOffset, status, OP, DL);
     }
-  } else if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
-    LoadInst *LdInst = dyn_cast<LoadInst>(user);
-    StoreInst *StInst = dyn_cast<StoreInst>(user);
-
-    Type *Ty = isa<LoadInst>(user) ? LdInst->getType()
-                                   : StInst->getValueOperand()->getType();
+  } else if (LoadInst *LdInst = dyn_cast<LoadInst>(user)) {
+    // Load of scalar/vector within a struct or structured raw load.
+    ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset);
+    TranslateBufLoad(helper, ResKind, Builder, OP, DL);
+
+    LdInst->eraseFromParent();
+  } else if (StoreInst *StInst = dyn_cast<StoreInst>(user)) {
+    // Store of scalar/vector within a struct or structured raw store.
+    Type *Ty = StInst->getValueOperand()->getType();
     Type *pOverloadTy = Ty->getScalarType();
-    Value *Offset = baseOffset;
+    Value *offset = baseOffset;
 
-    if (LdInst) {
-      unsigned NumComponents = 0;
-      if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-        NumComponents = VTy->getNumElements();
-      else
-        NumComponents = 1;
-      Value *ResultElts[4];
-      Constant *Alignment =
-          OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
-      GenerateRawBufLd(handle, bufIdx, Offset, status, pOverloadTy, ResultElts,
-                       OP, Builder, NumComponents, Alignment);
-      Value *NewLd = ScalarizeElements(Ty, ResultElts, Builder);
-      LdInst->replaceAllUsesWith(NewLd);
-    } else {
-      Value *val = StInst->getValueOperand();
-      Value *undefVal = llvm::UndefValue::get(pOverloadTy);
-      Value *vals[] = {undefVal, undefVal, undefVal, undefVal};
-      uint8_t mask = 0;
-      if (Ty->isVectorTy()) {
-        unsigned vectorNumElements = Ty->getVectorNumElements();
-        DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector");
-        assert(vectorNumElements <= 4);
-        for (unsigned i = 0; i < vectorNumElements; i++) {
-          vals[i] = Builder.CreateExtractElement(val, i);
-          mask |= (1 << i);
-        }
-      } else {
-        vals[0] = val;
-        mask = DXIL::kCompMask_X;
+    Value *val = StInst->getValueOperand();
+    Value *undefVal = llvm::UndefValue::get(pOverloadTy);
+    Value *vals[] = {undefVal, undefVal, undefVal, undefVal};
+    uint8_t mask = 0;
+    if (Ty->isVectorTy()) {
+      unsigned vectorNumElements = Ty->getVectorNumElements();
+      DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector");
+      assert(vectorNumElements <= 4);
+      for (unsigned i = 0; i < vectorNumElements; i++) {
+        vals[i] = Builder.CreateExtractElement(val, i);
+        mask |= (1 << i);
       }
-      Constant *alignment =
-          OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
-      GenerateStructBufSt(handle, bufIdx, Offset, pOverloadTy, OP, Builder,
-                          vals, mask, alignment);
+    } else {
+      vals[0] = val;
+      mask = DXIL::kCompMask_X;
     }
-    user->eraseFromParent();
+    Constant *alignment =
+        OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
+    GenerateStructBufSt(handle, bufIdx, offset, pOverloadTy, OP, Builder, vals,
+                        mask, alignment);
+    StInst->eraseFromParent();
   } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
     // Recurse users
     for (auto U = BCI->user_begin(); U != BCI->user_end();) {
@@ -8368,13 +8297,18 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
     DXASSERT_LOCALVAR(Ty,
                       offset->getType() == Type::getInt32Ty(Ty->getContext()),
                       "else bitness is wrong");
-    offset = Builder.CreateAdd(offset, baseOffset);
+    // No offset into element for Raw buffers; byte offset is in bufIdx.
+    if (DXIL::IsRawBuffer(ResKind))
+      bufIdx = Builder.CreateAdd(offset, bufIdx);
+    else
+      baseOffset = Builder.CreateAdd(offset, baseOffset);
 
     for (auto U = GEP->user_begin(); U != GEP->user_end();) {
       Value *GEPUser = *(U++);
 
       TranslateStructBufSubscriptUser(cast<Instruction>(GEPUser), handle,
-                                      ResKind, bufIdx, offset, status, OP, DL);
+                                      ResKind, bufIdx, baseOffset, status, OP,
+                                      DL);
     }
     // delete the inst
     GEP->eraseFromParent();
@@ -8388,13 +8322,12 @@ void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
       CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
   Value *bufIdx = nullptr;
   Value *offset = nullptr;
-  if (ResKind == HLResource::Kind::RawBuffer) {
-    offset = subscriptIndex;
-  } else {
+  bufIdx = subscriptIndex;
+  if (ResKind == HLResource::Kind::RawBuffer)
+    offset = UndefValue::get(Type::getInt32Ty(CI->getContext()));
+  else
     // StructuredBuffer, TypedBuffer, etc.
-    bufIdx = subscriptIndex;
     offset = OP->GetU32Const(0);
-  }
 
   for (auto U = CI->user_begin(); U != CI->user_end();) {
     Value *user = *(U++);
@@ -8408,19 +8341,14 @@ void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
 // HLSubscript.
 namespace {
 
-Value *TranslateTypedBufLoad(CallInst *CI, DXIL::ResourceKind RK,
-                             DXIL::ResourceClass RC, Value *handle,
-                             LoadInst *ldInst, IRBuilder<> &Builder,
-                             hlsl::OP *hlslOP, const DataLayout &DL) {
-  ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load,
-                         /*bForSubscript*/ true);
-  // Default sampleIdx for 2DMS textures.
-  if (RK == DxilResource::Kind::Texture2DMS ||
-      RK == DxilResource::Kind::Texture2DMSArray)
-    ldHelper.mipLevel = hlslOP->GetU32Const(0);
-  // use ldInst as retVal
-  ldHelper.retVal = ldInst;
-  TranslateLoad(ldHelper, RK, Builder, hlslOP, DL);
+Value *TranslateTypedBufSubscript(CallInst *CI, DXIL::ResourceKind RK,
+                                  DXIL::ResourceClass RC, Value *handle,
+                                  LoadInst *ldInst, IRBuilder<> &Builder,
+                                  hlsl::OP *hlslOP, const DataLayout &DL) {
+  // The arguments to the call instruction are used to determine the access,
+  // the return value and type come from the load instruction.
+  ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, ldInst);
+  TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL);
   // delete the ld
   ldInst->eraseFromParent();
   return ldHelper.retVal;
@@ -8463,9 +8391,9 @@ Value *UpdateVectorElt(Value *VecVal, Value *EltVal, Value *EltIdx,
   return VecVal;
 }
 
-void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper,
-                               HLObjectOperationLowerHelper *pObjHelper,
-                               bool &Translated) {
+void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper,
+                                   HLObjectOperationLowerHelper *pObjHelper,
+                                   bool &Translated) {
   Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
 
   hlsl::OP *hlslOP = &helper.hlslOP;
@@ -8481,8 +8409,8 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper,
     Instruction *I = cast<Instruction>(user);
     IRBuilder<> Builder(I);
     if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
-      TranslateTypedBufLoad(CI, RK, RC, handle, ldInst, Builder, hlslOP,
-                            helper.dataLayout);
+      TranslateTypedBufSubscript(CI, RK, RC, handle, ldInst, Builder, hlslOP,
+                                 helper.dataLayout);
     } else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
       Value *val = stInst->getValueOperand();
       TranslateStore(RK, handle, val,
@@ -8504,7 +8432,7 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper,
           // Generate Ld.
           LoadInst *tmpLd = StBuilder.CreateLoad(CI);
 
-          Value *ldVal = TranslateTypedBufLoad(
+          Value *ldVal = TranslateTypedBufSubscript(
               CI, RK, RC, handle, tmpLd, StBuilder, hlslOP, helper.dataLayout);
           // Update vector.
           ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx,
@@ -8524,7 +8452,7 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper,
           // Generate tmp vector load with vector type & translate it
           LoadInst *tmpLd = LdBuilder.CreateLoad(CI);
 
-          Value *ldVal = TranslateTypedBufLoad(
+          Value *ldVal = TranslateTypedBufSubscript(
               CI, RK, RC, handle, tmpLd, LdBuilder, hlslOP, helper.dataLayout);
 
           // get the single element
@@ -8697,8 +8625,9 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
     DXASSERT(CI->hasOneUse(), "subscript should only have one use");
     IRBuilder<> Builder(CI);
     if (LoadInst *ldInst = dyn_cast<LoadInst>(*U)) {
-      ResLoadHelper ldHelper(ldInst, handle, coord, mipLevel);
-      TranslateLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout);
+      Value *Offset = UndefValue::get(Builder.getInt32Ty());
+      ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset, mipLevel);
+      TranslateBufLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout);
       ldInst->eraseFromParent();
     } else {
       StoreInst *stInst = cast<StoreInst>(*U);
@@ -8736,7 +8665,7 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
         TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK,
                                     helper.dataLayout);
       else
-        TranslateDefaultSubscript(CI, helper, pObjHelper, Translated);
+        TranslateTypedBufferSubscript(CI, helper, pObjHelper, Translated);
 
       return;
     }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl
index e6246845b3..9f7a487a05 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl
@@ -3,14 +3,34 @@
 // RUN: %dxc -T vs_6_6              -DETY=uint64_t -DCOLS=2 %s | FileCheck %s
 // RUN: %dxc -T vs_6_6              -DETY=double   -DCOLS=2 %s | FileCheck %s
 
+// RUN: %dxc -T vs_6_6              -DETY=float1    -DCOLS=4 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6              -DETY=bool1     -DCOLS=4 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6              -DETY=uint64_t1 -DCOLS=2 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6              -DETY=double1   -DCOLS=2 %s | FileCheck %s
+
+// RUN: %dxc -T vs_6_6              -DETY=float4    -DCOLS=4 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6              -DETY=bool4     -DCOLS=4 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6              -DETY=uint64_t4 -DCOLS=2 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6              -DETY=double4   -DCOLS=2 %s | FileCheck %s
+
 // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float    -DCOLS=2 -DROWS=2 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool     -DCOLS=2 -DROWS=2 %s | FileCheck %s
 // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s
 // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double   -DCOLS=2 -DROWS=2 %s | FileCheck %s
+
 // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float    -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
 // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool     -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
 // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
 // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double   -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
 
+// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float    -DCOLS=2 -DROWS=2 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double   -DCOLS=2 -DROWS=2 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float    -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
+// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=bool     -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
+// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
+// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double   -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
+
 // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=float    -DCOLS=4 %s | FileCheck %s
 // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=bool     -DCOLS=4 %s | FileCheck %s
 // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=uint64_t -DCOLS=2 %s | FileCheck %s
@@ -26,8 +46,6 @@
 //  for different aggregate buffer types and indices.
 ///////////////////////////////////////////////////////////////////////
 
-
-
 // CHECK: %dx.types.ResRet.[[TY:[a-z][0-9][0-9]]] = type { [[TYPE:[a-z0-9]*]],
 
 #if !defined(ATY)
@@ -68,6 +86,16 @@ struct OffVector {
   }
 };
 
+template<typename T, int N, int M>
+struct Matrix {
+  matrix<T, N, M> m;
+  Matrix operator+(Matrix mat) {
+    Matrix ret;
+    ret.m = m + mat.m;
+    return ret;
+  }
+};
+
   ByteAddressBuffer RoByBuf : register(t1);
 RWByteAddressBuffer RwByBuf : register(u1);
 
@@ -156,6 +184,8 @@ void main(uint ix[2] : IX) {
   // StructuredBuffer Tests
   // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]]
   // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[BOFF]]
+  // MAT:  call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]]
+  // MAT:  call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]]
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
@@ -163,6 +193,8 @@ void main(uint ix[2] : IX) {
   TYPE stbElt1 SS = RwStBuf.Load(ix[0]);
   // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4,
   // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[BOFF]]
+  // MAT:  call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p4]]
+  // MAT:  call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p8]]
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl
new file mode 100644
index 0000000000..03735cb968
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl
@@ -0,0 +1,162 @@
+// RUN: %dxc -DTYPE=float    -T vs_6_6 %s | FileCheck %s
+// RUN: %dxc -DTYPE=bool     -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I1
+// RUN: %dxc -DTYPE=uint64_t -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I64
+// RUN: %dxc -DTYPE=double   -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,F64
+
+// RUN: %dxc -DTYPE=float1    -T vs_6_6 %s | FileCheck %s
+// RUN: %dxc -DTYPE=bool1     -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I1
+// RUN: %dxc -DTYPE=uint64_t1 -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I64
+// RUN: %dxc -DTYPE=double1   -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,F64
+
+// Confirm that 6.9 doesn't use vector loads for scalars and vec1s
+// RUN: %dxc -DTYPE=float    -T vs_6_9 %s | FileCheck %s
+// RUN: %dxc -DTYPE=bool     -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1
+// RUN: %dxc -DTYPE=uint64_t -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I64
+// RUN: %dxc -DTYPE=double   -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,F64
+
+// RUN: %dxc -DTYPE=float1    -T vs_6_9 %s | FileCheck %s
+// RUiN: %dxc -DTYPE=bool1     -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1
+// RUN: %dxc -DTYPE=uint64_t1 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I64
+// RUN: %dxc -DTYPE=double1   -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,F64
+
+///////////////////////////////////////////////////////////////////////
+// Test codegen for various load and store operations and conversions
+//  for different scalar buffer types and confirm that the proper
+//  loads, stores, and conversion operations take place.
+///////////////////////////////////////////////////////////////////////
+
+
+// These -DAGs must match the same line. That is the only reason for the -DAG.
+// The first match will assign [[TY]] to the native type
+// For most runs, the second match will assign [[TY32]] to the same thing.
+// For 64-bit types, the memory representation is i32 and a separate variable is needed.
+// For these cases, there is another line that will always match i32.
+// This line will also force the previous -DAGs to match the same line since the most
+// This shader can produce is two ResRet types.
+// CHECK-DAG: %dx.types.ResRet.[[TY:[a-z][0-9][0-9]]] = type { [[TYPE:[a-z0-9]*]],
+// CHECK-DAG: %dx.types.ResRet.[[TY32:[a-z][0-9][0-9]]] = type { [[TYPE]],
+// I64: %dx.types.ResRet.[[TY32:i32]]
+// F64: %dx.types.ResRet.[[TY32:i32]]
+
+  ByteAddressBuffer RoByBuf : register(t1);
+RWByteAddressBuffer RwByBuf : register(u1);
+
+  StructuredBuffer< TYPE > RoStBuf : register(t2);
+RWStructuredBuffer< TYPE > RwStBuf : register(u2);
+
+  Buffer< TYPE > RoTyBuf : register(t3);
+RWBuffer< TYPE > RwTyBuf : register(u3);
+
+ConsumeStructuredBuffer<TYPE> CnStBuf : register(u4);
+AppendStructuredBuffer<TYPE> ApStBuf  : register(u5);
+
+void main(uint ix[2] : IX) {
+  // ByteAddressBuffer Tests
+
+  // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false)
+  // CHECK-DAG: [[HDLRWBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false)
+
+  // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false)
+  // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false)
+
+  // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false)
+  // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false)
+
+  // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false)
+  // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false)
+
+  // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4,
+
+  // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]]
+  // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]]
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE babElt1 = RwByBuf.Load< TYPE >(ix[0]);
+
+  // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]]
+  // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]]
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE babElt2 = RoByBuf.Load< TYPE >(ix[0]);
+
+  // I1: zext i1 %{{.*}} to i32
+  // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]]
+  RwByBuf.Store< TYPE >(ix[0], babElt1 + babElt2);
+
+  // StructuredBuffer Tests
+  // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]]
+  // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]]
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE stbElt1 = RwStBuf.Load(ix[0]);
+  // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4,
+  // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]]
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE stbElt2 = RwStBuf[ix[1]];
+
+  // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]]
+  // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]]
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE stbElt3 = RoStBuf.Load(ix[0]);
+  // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]]
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE stbElt4 = RoStBuf[ix[1]];
+
+  // I1: zext i1 %{{.*}} to i32
+  // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]]
+  RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4;
+
+  // {Append/Consume}StructuredBuffer Tests
+  // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]]
+  // CHECK: [[CONIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLCON]], i8 -1)
+  // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]]
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE cnElt = CnStBuf.Consume();
+
+  // CHECK: [[ANHDLAPP:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLAPP]]
+  // CHECK: [[APPIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLAPP]], i8 1)
+  // I1: zext i1 %{{.*}} to i32
+  // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]]
+  ApStBuf.Append(cnElt);
+
+  // TypedBuffer Tests
+  // CHECK: [[ANHDLRWTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTY]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]]
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE typElt1 = RwTyBuf.Load(ix[0]);
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX1]]
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE typElt2 = RwTyBuf[ix[1]];
+  // CHECK: [[ANHDLROTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTY]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX0]]
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE typElt3 = RoTyBuf.Load(ix[0]);
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX1]]
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE typElt4 = RoTyBuf[ix[1]];
+
+  // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102
+  // I64: trunc i64 %{{.*}} to i32
+  // I64: lshr i64  %{{.*}}, 32
+  // I64: trunc i64 %{{.*}} to i32
+  // I1: zext i1 %{{.*}} to i32
+  // CHECK: all void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]]
+  RwTyBuf[ix[0]] = typElt1 + typElt2 + typElt3 + typElt4;
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl
index ea44fef604..8dcf5ead1c 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl
@@ -27,13 +27,20 @@ RWByteAddressBuffer RwByBuf : register(u1);
   StructuredBuffer< TYPE > RoStBuf : register(t2);
 RWStructuredBuffer< TYPE > RwStBuf : register(u2);
 
-  Buffer< TYPE > RoTyBuf : register(t3);
-RWBuffer< TYPE > RwTyBuf : register(u3);
+ConsumeStructuredBuffer<TYPE> CnStBuf : register(u3);
+AppendStructuredBuffer<TYPE> ApStBuf  : register(u4);
 
-ConsumeStructuredBuffer<TYPE> CnStBuf : register(u4);
-AppendStructuredBuffer<TYPE> ApStBuf  : register(u5);
+  Buffer< TYPE > RoTyBuf : register(t5);
+RWBuffer< TYPE > RwTyBuf : register(u5);
 
-void main(uint ix[2] : IX) {
+  Texture1D< TYPE > RoTex1d : register(t6);
+RWTexture1D< TYPE > RwTex1d : register(u6);
+  Texture2D< TYPE > RoTex2d : register(t7);
+RWTexture2D< TYPE > RwTex2d : register(u7);
+  Texture3D< TYPE > RoTex3d : register(t8);
+RWTexture3D< TYPE > RwTex3d : register(u8);
+
+void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) {
   // ByteAddressBuffer Tests
 
   // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false)
@@ -42,13 +49,27 @@ void main(uint ix[2] : IX) {
   // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false)
   // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false)
 
-  // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false)
-  // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false)
+  // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false)
+  // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false)
+
+  // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 0 }, i32 5, i1 false)
+  // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false)
 
-  // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false)
-  // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false)
+  // CHECK-DAG: [[HDLROTX1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 6, i32 6, i32 0, i8 0 }, i32 6, i1 false)
+  // CHECK-DAG: [[HDLRWTX1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 6, i32 6, i32 0, i8 1 }, i32 6, i1 false)
+  // CHECK-DAG: [[HDLROTX2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 0, i8 0 }, i32 7, i1 false)
+  // CHECK-DAG: [[HDLRWTX2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 0, i8 1 }, i32 7, i1 false)
+  // CHECK-DAG: [[HDLROTX3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 0, i8 0 }, i32 8, i1 false)
+  // CHECK-DAG: [[HDLRWTX3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 0, i8 1 }, i32 8, i1 false)
 
-  // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4,
+
+  // CHECK-DAG: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0
+  // CHECK-DAG: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0
+  // CHECK-DAG: [[IX20:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 0
+  // CHECK-DAG: [[IX21:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 1
+  // CHECK-DAG: [[IX30:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 0
+  // CHECK-DAG: [[IX31:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 1
+  // CHECK-DAG: [[IX32:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 2
 
   // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]]
   // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]]
@@ -56,7 +77,7 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE babElt1 = RwByBuf.Load< TYPE >(ix[0]);
+  TYPE babElt1 = RwByBuf.Load< TYPE >(ix0);
 
   // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]]
   // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]]
@@ -64,14 +85,14 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE babElt2 = RoByBuf.Load< TYPE >(ix[0]);
+  TYPE babElt2 = RoByBuf.Load< TYPE >(ix0);
 
   // I1: zext i1 %{{.*}} to i32
   // I1: zext i1 %{{.*}} to i32
   // I1: zext i1 %{{.*}} to i32
   // I1: zext i1 %{{.*}} to i32
   // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]]
-  RwByBuf.Store< TYPE >(ix[0], babElt1 + babElt2);
+  RwByBuf.Store< TYPE >(ix0, babElt1 + babElt2);
 
   // StructuredBuffer Tests
   // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]]
@@ -80,14 +101,13 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE stbElt1 = RwStBuf.Load(ix[0]);
-  // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4,
+  TYPE stbElt1 = RwStBuf.Load(ix0);
   // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]]
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE stbElt2 = RwStBuf[ix[1]];
+  TYPE stbElt2 = RwStBuf[ix1];
 
   // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]]
   // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]]
@@ -95,20 +115,20 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE stbElt3 = RoStBuf.Load(ix[0]);
+  TYPE stbElt3 = RoStBuf.Load(ix0);
   // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]]
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE stbElt4 = RoStBuf[ix[1]];
+  TYPE stbElt4 = RoStBuf[ix1];
 
   // I1: zext i1 %{{.*}} to i32
   // I1: zext i1 %{{.*}} to i32
   // I1: zext i1 %{{.*}} to i32
   // I1: zext i1 %{{.*}} to i32
   // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]]
-  RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4;
+  RwStBuf[ix0] = stbElt1 + stbElt2 + stbElt3 + stbElt4;
 
   // {Append/Consume}StructuredBuffer Tests
   // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]]
@@ -146,7 +166,7 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE typElt1 = RwTyBuf.Load(ix[0]);
+  TYPE typElt1 = RwTyBuf.Load(ix0);
   // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX1]]
   // F64: call double @dx.op.makeDouble.f64(i32 101
   // F64: call double @dx.op.makeDouble.f64(i32 101
@@ -162,7 +182,7 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE typElt2 = RwTyBuf[ix[1]];
+  TYPE typElt2 = RwTyBuf[ix1];
   // CHECK: [[ANHDLROTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTY]]
   // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX0]]
   // F64: call double @dx.op.makeDouble.f64(i32 101
@@ -179,7 +199,7 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE typElt3 = RoTyBuf.Load(ix[0]);
+  TYPE typElt3 = RoTyBuf.Load(ix0);
   // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX1]]
   // F64: call double @dx.op.makeDouble.f64(i32 101
   // F64: call double @dx.op.makeDouble.f64(i32 101
@@ -195,7 +215,7 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE typElt4 = RoTyBuf[ix[1]];
+  TYPE typElt4 = RoTyBuf[ix1];
 
   // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102
   // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102
@@ -210,5 +230,126 @@ void main(uint ix[2] : IX) {
   // I1: zext i1 %{{.*}} to i32
   // I1: zext i1 %{{.*}} to i32
   // CHECK: all void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]]
-  RwTyBuf[ix[0]] = typElt1 + typElt2 + typElt3 + typElt4;
+  RwTyBuf[ix0] = typElt1 + typElt2 + typElt3 + typElt4;
+
+  // Texture Tests
+  // CHECK: [[ANHDLROTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX1]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLROTX1]], i32 0, i32 [[IX0]], i32 undef, i32 undef
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE texElt1 = RoTex1d[ix0];
+  // CHECK: [[ANHDLRWTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX1]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX1]], i32 undef, i32 [[IX0]], i32 undef, i32 undef
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE texElt2 = RwTex1d[ix0];
+
+  // CHECK: [[ANHDLROTX2:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX2]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLROTX2]], i32 0, i32 [[IX20]], i32 [[IX21]], i32 undef
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE texElt3 = RoTex2d[ix2];
+  // CHECK: [[ANHDLRWTX2:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX2]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX2]], i32 undef, i32 [[IX20]], i32 [[IX21]], i32 undef
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE texElt4 = RwTex2d[ix2];
+
+  // CHECK: [[ANHDLROTX3:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX3]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLROTX3]], i32 0, i32 [[IX30]], i32 [[IX31]], i32 [[IX32]]
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE texElt5 = RoTex3d[ix3];
+  // CHECK: [[ANHDLRWTX3:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX3]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX3]], i32 undef, i32 [[IX30]], i32 [[IX31]], i32 [[IX32]]
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE texElt6 = RwTex3d[ix3];
+
+  // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102
+  // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102
+  // I64: trunc i64 %{{.*}} to i32
+  // lshr i64  %{{.*}}, 32
+  // I64: trunc i64 %{{.*}} to i32
+  // I64: trunc i64 %{{.*}} to i32
+  // lshr i64  %{{.*}}, 32
+  // I64: trunc i64 %{{.*}} to i32
+  // I1: zext i1 %{{.*}} to i32
+  // I1: zext i1 %{{.*}} to i32
+  // I1: zext i1 %{{.*}} to i32
+  // I1: zext i1 %{{.*}} to i32
+  // CHECK: call void @dx.op.textureStore.[[TY32]](i32 67, %dx.types.Handle [[ANHDLRWTX3]], i32 [[IX30]], i32 [[IX31]], i32 [[IX32]]
+  RwTex3d[ix3] = texElt1 + texElt2 + texElt3 + texElt4 + texElt5 + texElt6;
 }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.hlsl
new file mode 100644
index 0000000000..7cd54e0387
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.hlsl
@@ -0,0 +1,152 @@
+// RUN: %dxc -fcgl  -T vs_6_6 %s | FileCheck %s
+
+// Source file for DxilGen IR test for buffer load lowering
+// Much of this mirrors buffer-load-store and buffer-agg-load-store
+
+template<typename T, int N>
+struct Vector {
+  float4 pad1;
+  double pad2;
+  vector<T, N> v;
+  Vector operator+(Vector vec) {
+    Vector ret;
+    ret.pad1 = 0.0;
+    ret.pad2 = 0.0;
+    ret.v = v + vec.v;
+    return ret;
+  }
+};
+
+template<typename T, int N, int M>
+struct Matrix {
+  float4 pad1;
+  matrix<T, N, M> m;
+  Matrix operator+(Matrix mat) {
+    Matrix ret;
+    ret.m = m + mat.m;
+    return ret;
+  }
+};
+
+RWByteAddressBuffer                        BabBuf : register(u1);
+RWStructuredBuffer< float2 >               VecBuf : register(u2);
+  StructuredBuffer< float[2] >             ArrBuf : register(t3);
+  StructuredBuffer< Vector<float, 2> >    SVecBuf : register(t4);
+  StructuredBuffer< float2x2 >             MatBuf : register(t5);
+  StructuredBuffer< Matrix<float, 2, 2> > SMatBuf : register(t6);
+
+void main(uint ix0 : IX0) {
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 0
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  bool2  Bab0  = BabBuf.Load< bool2 >(ix0 + 0);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 1
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Bab1  = (float2)BabBuf.Load< float[2] >(ix0 + 1);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 2
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Bab2  = BabBuf.Load< Vector<float,2> >(ix0 + 2).v;
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 3
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call %class.matrix.float.2.2 @"dx.hl.op.ro.%class.matrix.float.2.2 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Bab3  = BabBuf.Load< float2x2 >(ix0 + 3)[1];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 4
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: [[MSS:%.*]] = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Bab4  = BabBuf.Load< Matrix<float,2,2> >(ix0 + 4).m[1];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 5
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32 277, %dx.types.Handle [[ANHDL]], i32 [[IX]], <2 x float>
+  BabBuf.Store< float2 >(ix0+5, select(Bab0, Bab1+Bab2, Bab3+Bab4));
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 0
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Sld0 = VecBuf.Load(ix0 + 0);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 1
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<float [2]>\22)"(i32 0, %"class.StructuredBuffer<float [2]>"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer<float [2]>" undef)
+  // CHECK: call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Sld1 = (float2)ArrBuf.Load(ix0 + 1);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 2
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.StructuredBuffer<Vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer<Vector<float, 2> >" undef)
+  // CHECK: call %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Sld2 = SVecBuf.Load(ix0 + 2).v;
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 3
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<matrix<float, 2, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer<matrix<float, 2, 2> >" undef)
+  // CHECK: call %class.matrix.float.2.2 @"dx.hl.op.ro.%class.matrix.float.2.2 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Sld3 = MatBuf.Load(ix0 + 3)[1];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 4
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<Matrix<float, 2, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer<Matrix<float, 2, 2> >" undef)
+  // CHECK: [[MSS:%.*]] = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  // CHECK: [[GEP:%.*]] = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* [[MSS]], i32 0, i32 1
+  // CHECK: call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* [[GEP]], i32 1, i32 3)
+  float2 Sld4 = SMatBuf.Load(ix0 + 4).m[1];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 5
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  VecBuf[ix0+5] = select(Sld0, Sld1+Sld2, Sld3+Sld4);
+  
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 6
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]
+  float2 Sss0 = VecBuf[ix0 + 6];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 7
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<float [2]>\22)"(i32 0, %"class.StructuredBuffer<float [2]>"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer<float [2]>" undef)
+  // CHECK: call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Sss1 = (float2)ArrBuf[ix0 + 7];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 8
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.StructuredBuffer<Vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer<Vector<float, 2> >" undef)
+  // CHECK: call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Sss2 = SVecBuf[ix0 + 8].v;
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 9
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<matrix<float, 2, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer<matrix<float, 2, 2> >" undef)
+  // CHECK: [[SS:%.*]] = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  // CHECK: call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* [[SS]], i32 1, i32 3)
+  float2 Sss3 = MatBuf[ix0 + 9][1];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 10
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<Matrix<float, 2, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer<Matrix<float, 2, 2> >" undef)
+  // CHECK: [[MSS:%.*]] = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  // CHECK: [[GEP:%.*]] = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* [[MSS]], i32 0, i32 1
+  // CHECK: call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* [[GEP]], i32 1, i32 3)
+  float2 Sss4 = SMatBuf[ix0 + 10].m[1];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 11
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  VecBuf[ix0+11] = select(Sss0, Sss1+Sss2, Sss3+Sss4);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.ll
new file mode 100644
index 0000000000..6b01120f7b
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.ll
@@ -0,0 +1,404 @@
+; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
+
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%struct.RWByteAddressBuffer = type { i32 }
+%"class.RWStructuredBuffer<vector<float, 2> >" = type { <2 x float> }
+%"class.StructuredBuffer<float [2]>" = type { [2 x float] }
+%"class.StructuredBuffer<Vector<float, 2> >" = type { %"struct.Vector<float, 2>" }
+%"struct.Vector<float, 2>" = type { <4 x float>, double, <2 x float> }
+%"class.StructuredBuffer<matrix<float, 2, 2> >" = type { %class.matrix.float.2.2 }
+%class.matrix.float.2.2 = type { [2 x <2 x float>] }
+%"class.StructuredBuffer<Matrix<float, 2, 2> >" = type { %"struct.Matrix<float, 2, 2>" }
+%"struct.Matrix<float, 2, 2>" = type { <4 x float>, %class.matrix.float.2.2 }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+
+@"\01?BabBuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4
+@"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.RWStructuredBuffer<vector<float, 2> >", align 4
+@"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A" = external global %"class.StructuredBuffer<float [2]>", align 4
+@"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.StructuredBuffer<Vector<float, 2> >", align 8
+@"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.StructuredBuffer<matrix<float, 2, 2> >", align 4
+@"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.StructuredBuffer<Matrix<float, 2, 2> >", align 4
+
+; Function Attrs: nounwind
+define void @main(i32 %ix0) #0 {
+  %1 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A"
+
+  ; Booleans require some conversion after being loaded
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %10, i32 %7, i32 undef, i8 3, i32 4)
+  ; CHECK: [[EL0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0
+  ; CHECK: [[EL1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1
+  ; CHECK: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[EL0]], i64 0
+  ; CHECK: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[EL1]], i64 1
+  ; CHECK: {{%.*}} = icmp ne <2 x i32> [[VEC1]], zeroinitializer
+  %2 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %1)
+  %3 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer)
+  %4 = call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %3, i32 %ix0)
+  %5 = zext <2 x i1> %4 to <2 x i32>
+  %6 = add i32 %ix0, 1
+  %7 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A"
+
+  ; Array loads do so one element at a time.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  %8 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %7)
+  %9 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %8, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer)
+  %10 = call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %9, i32 %6)
+
+  %11 = getelementptr inbounds [2 x float], [2 x float]* %10, i32 0, i32 0
+  %12 = load float, float* %11
+  %13 = getelementptr inbounds [2 x float], [2 x float]* %10, i32 0, i32 1
+  %14 = load float, float* %13
+  %15 = insertelement <2 x float> undef, float %12, i32 0
+  %16 = insertelement <2 x float> %15, float %14, i32 1
+  %17 = add i32 %ix0, 3
+  %18 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A"
+
+  ; Vector inside a struct is a simple load.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 3, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1
+  %19 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %18)
+  %20 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %19, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer)
+  %21 = call %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %20, i32 %17)
+  %22 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %21, i32 0, i32 2
+  %23 = load <2 x float>, <2 x float>* %22, align 4
+  %24 = add i32 %ix0, 4
+  %25 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A"
+
+  ; 2x2 matrix loads the full storage vector and converts the orientation.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 15, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 2
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 3
+  %26 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %25)
+  %27 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %26, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer)
+  %28 = call <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %27, i32 %24)
+  %row2col = shufflevector <4 x float> %28, <4 x float> %28, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  %29 = shufflevector <4 x float> %row2col, <4 x float> %row2col, <2 x i32> <i32 1, i32 3>
+  %30 = add i32 %ix0, 5
+  %31 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A"
+
+  ; Matrix struct members get their elements extracted with individual loads on account of already dealing with GEPs
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  %32 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %31)
+  %33 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %32, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer)
+  %34 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %33, i32 %30)
+  %35 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %34, i32 0, i32 1
+  %36 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %35, i32 1, i32 3)
+  %37 = load <2 x float>, <2 x float>* %36
+  %38 = fadd <2 x float> %29, %37
+  %39 = fadd <2 x float> %16, %23
+  %40 = icmp ne <2 x i32> %5, zeroinitializer
+  %41 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %40, <2 x float> %39, <2 x float> %38)
+  %42 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A"
+
+  %43 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %42)
+  %44 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %43, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer)
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32 277, %dx.types.Handle %44, i32 %ix0, <2 x float> %41)
+  %45 = load %"class.RWStructuredBuffer<vector<float, 2> >", %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A"
+
+  ; Normal vector. Standard load.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 3, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1
+  %46 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" %45)
+  %47 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %46, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" zeroinitializer)
+  %48 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %47, i32 %ix0)
+  %49 = add i32 %ix0, 1
+  %50 = load %"class.StructuredBuffer<float [2]>", %"class.StructuredBuffer<float [2]>"* @"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A"
+
+  ; Array loads do so one element at a time.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<float [2]>"(i32 160, %"class.StructuredBuffer<float [2]>"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 4, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  %51 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<float [2]>\22)"(i32 0, %"class.StructuredBuffer<float [2]>" %50)
+  %52 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle %51, %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer<float [2]>" zeroinitializer)
+  %53 = call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %52, i32 %49)
+  %54 = getelementptr inbounds [2 x float], [2 x float]* %53, i32 0, i32 0
+  %55 = load float, float* %54
+  %56 = getelementptr inbounds [2 x float], [2 x float]* %53, i32 0, i32 1
+  %57 = load float, float* %56
+  %58 = insertelement <2 x float> undef, float %55, i32 0
+  %59 = insertelement <2 x float> %58, float %57, i32 1
+  %60 = add i32 %ix0, 3
+  %61 = load %"class.StructuredBuffer<Vector<float, 2> >", %"class.StructuredBuffer<Vector<float, 2> >"* @"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A"
+
+  ; Vector inside a struct is a simple load.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<Vector<float, 2> >"(i32 160, %"class.StructuredBuffer<Vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 24, i8 3, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1
+  %62 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.StructuredBuffer<Vector<float, 2> >" %61)
+  %63 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle %62, %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer<Vector<float, 2> >" zeroinitializer)
+  %64 = call %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %63, i32 %60)
+  %65 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %64, i32 0, i32 2
+  %66 = load <2 x float>, <2 x float>* %65, align 4
+  %67 = add i32 %ix0, 4
+  %68 = load %"class.StructuredBuffer<matrix<float, 2, 2> >", %"class.StructuredBuffer<matrix<float, 2, 2> >"* @"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A"
+
+  ; 2x2 matrix loads the full storage vector and converts the orientation.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<matrix<float, 2, 2> >"(i32 160, %"class.StructuredBuffer<matrix<float, 2, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 15, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 2
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 3
+  %69 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<matrix<float, 2, 2> >" %68)
+  %70 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %69, %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer<matrix<float, 2, 2> >" zeroinitializer)
+  %71 = call <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %70, i32 %67)
+  %row2col1 = shufflevector <4 x float> %71, <4 x float> %71, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  %72 = shufflevector <4 x float> %row2col1, <4 x float> %row2col1, <2 x i32> <i32 1, i32 3>
+  %73 = add i32 %ix0, 5
+  %74 = load %"class.StructuredBuffer<Matrix<float, 2, 2> >", %"class.StructuredBuffer<Matrix<float, 2, 2> >"* @"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A"
+
+  ; Matrix struct members get their elements extracted with individual loads on account of already dealing with GEPs
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<Matrix<float, 2, 2> >"(i32 160, %"class.StructuredBuffer<Matrix<float, 2, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 20, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 28, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  %75 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<Matrix<float, 2, 2> >" %74)
+  %76 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %75, %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer<Matrix<float, 2, 2> >" zeroinitializer)
+  %77 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %76, i32 %73)
+  %78 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %77, i32 0, i32 1
+  %79 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %78, i32 1, i32 3)
+  %80 = load <2 x float>, <2 x float>* %79
+  %81 = fadd <2 x float> %72, %80
+  %82 = fadd <2 x float> %59, %66
+  %83 = fcmp une <2 x float> %48, zeroinitializer
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDL]]
+  %84 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %83, <2 x float> %82, <2 x float> %81)
+  %85 = load %"class.RWStructuredBuffer<vector<float, 2> >", %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A"
+
+  ; Normal vector. Standard load.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 3, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1
+  %86 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" %85)
+  %87 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %86, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" zeroinitializer)
+  %88 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %87, i32 %ix0)
+  store <2 x float> %84, <2 x float>* %88
+  %89 = load %"class.RWStructuredBuffer<vector<float, 2> >", %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A"
+
+  %90 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" %89)
+  %91 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %90, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" zeroinitializer)
+  %92 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %91, i32 %ix0)
+  %93 = load <2 x float>, <2 x float>* %92
+  %94 = add i32 %ix0, 1
+  %95 = load %"class.StructuredBuffer<float [2]>", %"class.StructuredBuffer<float [2]>"* @"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A"
+
+  ; Array loads do so one element at a time.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<float [2]>"(i32 160, %"class.StructuredBuffer<float [2]>"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 4, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  %96 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<float [2]>\22)"(i32 0, %"class.StructuredBuffer<float [2]>" %95)
+  %97 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle %96, %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer<float [2]>" zeroinitializer)
+  %98 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %97, i32 %94)
+  %99 = getelementptr inbounds [2 x float], [2 x float]* %98, i32 0, i32 0
+  %100 = load float, float* %99
+  %101 = getelementptr inbounds [2 x float], [2 x float]* %98, i32 0, i32 1
+  %102 = load float, float* %101
+  %103 = insertelement <2 x float> undef, float %100, i32 0
+  %104 = insertelement <2 x float> %103, float %102, i32 1
+  %105 = add i32 %ix0, 3
+  %106 = load %"class.StructuredBuffer<Vector<float, 2> >", %"class.StructuredBuffer<Vector<float, 2> >"* @"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A"
+
+  ; Vector inside a struct is a simple load.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<Vector<float, 2> >"(i32 160, %"class.StructuredBuffer<Vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 24, i8 3, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1
+  %107 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.StructuredBuffer<Vector<float, 2> >" %106)
+  %108 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle %107, %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer<Vector<float, 2> >" zeroinitializer)
+  %109 = call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %108, i32 %105)
+  %110 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %109, i32 0, i32 2
+  %111 = load <2 x float>, <2 x float>* %110, align 4
+  %112 = add i32 %ix0, 4
+  %113 = load %"class.StructuredBuffer<matrix<float, 2, 2> >", %"class.StructuredBuffer<matrix<float, 2, 2> >"* @"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A"
+
+  ; Subscripted matrices get their elements extracted with individual loads on account of already dealing with GEPs
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<matrix<float, 2, 2> >"(i32 160, %"class.StructuredBuffer<matrix<float, 2, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 4, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 12, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  %114 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<matrix<float, 2, 2> >" %113)
+  %115 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %114, %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer<matrix<float, 2, 2> >" zeroinitializer)
+  %116 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %115, i32 %112)
+  %117 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %116, i32 1, i32 3)
+  %118 = load <2 x float>, <2 x float>* %117
+  %119 = add i32 %ix0, 5
+  %120 = load %"class.StructuredBuffer<Matrix<float, 2, 2> >", %"class.StructuredBuffer<Matrix<float, 2, 2> >"* @"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A"
+
+  ; Matrix struct members get their elements extracted with individual loads on account of already dealing with GEPs
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<Matrix<float, 2, 2> >"(i32 160, %"class.StructuredBuffer<Matrix<float, 2, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 20, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 28, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  %121 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<Matrix<float, 2, 2> >" %120)
+  %122 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %121, %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer<Matrix<float, 2, 2> >" zeroinitializer)
+  %123 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %122, i32 %119)
+  %124 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %123, i32 0, i32 1
+  %125 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %124, i32 1, i32 3)
+  %126 = load <2 x float>, <2 x float>* %125
+  %127 = fadd <2 x float> %118, %126
+  %128 = fadd <2 x float> %104, %111
+  %129 = fcmp une <2 x float> %93, zeroinitializer
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDL]]
+  %130 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %129, <2 x float> %128, <2 x float> %127)
+  %131 = add i32 %ix0, 1
+  %132 = load %"class.RWStructuredBuffer<vector<float, 2> >", %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A"
+
+  %133 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" %132)
+  %134 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %133, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" zeroinitializer)
+  %135 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %134, i32 %131)
+  store <2 x float> %130, <2 x float>* %135
+  ret void
+}
+
+declare <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32, %class.matrix.float.2.2*, i32, i32) #1
+declare <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #1
+declare [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32, %dx.types.Handle, i32, <2 x float>) #0
+declare <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32, <2 x i1>, <2 x float>, <2 x float>) #1
+declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32, %"class.RWStructuredBuffer<vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<float [2]>\22)"(i32, %"class.StructuredBuffer<float [2]>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<float [2]>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer<float [2]>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32, %"class.StructuredBuffer<Vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer<Vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %"class.StructuredBuffer<matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer<matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %"class.StructuredBuffer<Matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer<Matrix<float, 2, 2> >") #1
+declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!3}
+!dx.valver = !{!4}
+!dx.shaderModel = !{!5}
+!dx.typeAnnotations = !{!6, !43}
+!dx.entryPoints = !{!50}
+!dx.fnprops = !{!63}
+!dx.options = !{!64, !65}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!2 = !{!"dxc(private) 1.8.0.4807 (longvec_bab_ldst, 88cfe61c3-dirty)"}
+!3 = !{i32 1, i32 6}
+!4 = !{i32 1, i32 9}
+!5 = !{!"vs", i32 6, i32 6}
+!6 = !{i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" undef, !7, %"class.StructuredBuffer<float [2]>" undef, !12, %"class.StructuredBuffer<Vector<float, 2> >" undef, !16, %"struct.Vector<float, 2>" undef, !21, %"class.StructuredBuffer<matrix<float, 2, 2> >" undef, !29, %"class.StructuredBuffer<Matrix<float, 2, 2> >" undef, !35, %"struct.Matrix<float, 2, 2>" undef, !39}
+!7 = !{i32 8, !8, !9}
+!8 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9}
+!9 = !{i32 0, !10}
+!10 = !{!11}
+!11 = !{i32 0, <2 x float> undef}
+!12 = !{i32 20, !8, !13}
+!13 = !{i32 0, !14}
+!14 = !{!15}
+!15 = !{i32 0, [2 x float] undef}
+!16 = !{i32 32, !17, !18}
+!17 = !{i32 6, !"h", i32 3, i32 0}
+!18 = !{i32 0, !19}
+!19 = !{!20}
+!20 = !{i32 0, %"struct.Vector<float, 2>" undef}
+!21 = !{i32 32, !22, !23, !24, !25}
+!22 = !{i32 6, !"pad1", i32 3, i32 0, i32 7, i32 9}
+!23 = !{i32 6, !"pad2", i32 3, i32 16, i32 7, i32 10}
+!24 = !{i32 6, !"v", i32 3, i32 24, i32 7, i32 9}
+!25 = !{i32 0, !26}
+!26 = !{!27, !28}
+!27 = !{i32 0, float undef}
+!28 = !{i32 1, i64 2}
+!29 = !{i32 24, !30, !32}
+!30 = !{i32 6, !"h", i32 2, !31, i32 3, i32 0, i32 7, i32 9}
+!31 = !{i32 2, i32 2, i32 2}
+!32 = !{i32 0, !33}
+!33 = !{!34}
+!34 = !{i32 0, %class.matrix.float.2.2 undef}
+!35 = !{i32 40, !17, !36}
+!36 = !{i32 0, !37}
+!37 = !{!38}
+!38 = !{i32 0, %"struct.Matrix<float, 2, 2>" undef}
+!39 = !{i32 40, !22, !40, !41}
+!40 = !{i32 6, !"m", i32 2, !31, i32 3, i32 16, i32 7, i32 9}
+!41 = !{i32 0, !42}
+!42 = !{!27, !28, !28}
+!43 = !{i32 1, void (i32)* @main, !44}
+!44 = !{!45, !47}
+!45 = !{i32 1, !46, !46}
+!46 = !{}
+!47 = !{i32 0, !48, !49}
+!48 = !{i32 4, !"IX0", i32 7, i32 5}
+!49 = !{i32 0}
+!50 = !{void (i32)* @main, !"main", null, !51, null}
+!51 = !{!52, !60, null, null}
+!52 = !{!53, !55, !57, !59}
+!53 = !{i32 0, %"class.StructuredBuffer<float [2]>"* @"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A", !"ArrBuf", i32 0, i32 3, i32 1, i32 12, i32 0, !54}
+!54 = !{i32 1, i32 8}
+!55 = !{i32 1, %"class.StructuredBuffer<Vector<float, 2> >"* @"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A", !"SVecBuf", i32 0, i32 4, i32 1, i32 12, i32 0, !56}
+!56 = !{i32 1, i32 32}
+!57 = !{i32 2, %"class.StructuredBuffer<matrix<float, 2, 2> >"* @"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A", !"MatBuf", i32 0, i32 5, i32 1, i32 12, i32 0, !58}
+!58 = !{i32 1, i32 16}
+!59 = !{i32 3, %"class.StructuredBuffer<Matrix<float, 2, 2> >"* @"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A", !"SMatBuf", i32 0, i32 6, i32 1, i32 12, i32 0, !56}
+!60 = !{!61, !62}
+!61 = !{i32 0, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A", !"BabBuf", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!62 = !{i32 1, %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A", !"VecBuf", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !54}
+!63 = !{void (i32)* @main, i32 1}
+!64 = !{i32 64}
+!65 = !{i32 -1}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.hlsl
new file mode 100644
index 0000000000..47355d633f
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.hlsl
@@ -0,0 +1,112 @@
+// RUN: %dxc -fcgl  -T vs_6_6 %s | FileCheck %s
+
+// Source file for DxilGen IR test for typed buffer/texture load lowering
+
+RWBuffer< bool2 > TyBuf : register(u1);
+Texture2DMS< bool2 > Tex2dMs : register(t2);
+
+Texture1D< float2 > Tex1d : register(t3);
+Texture2D< float2 > Tex2d : register(t4);
+Texture3D< float2 > Tex3d : register(t5);
+Texture2DArray< float2 > Tex2dArr : register(t6);
+
+RWBuffer< float2 > OutBuf : register(u7);
+
+void main(uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3, uint4 ix4 : IX4) {
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 1
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" undef)
+  // CHECK: call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  bool2  Tyb0  = TyBuf.Load(ix1 + 1);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 2
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" undef)
+  // CHECK: call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  bool2  Tyb1  = TyBuf[ix1 + 2];
+
+  // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 3, i32 3>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.Texture2DMS<vector<bool, 2>, 0>"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS<vector<bool, 2>, 0>" undef),
+  // CHECK: call <2 x i1> @"dx.hl.op..<2 x i1> (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 231, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]]
+  bool2  TxMs0  = Tex2dMs.Load(ix2 + 3, ix1);
+
+  // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 4, i32 4>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.Texture2DMS<vector<bool, 2>, 0>"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS<vector<bool, 2>, 0>" undef)
+  // CHECK: call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]])
+  bool2  TxMs1  = Tex2dMs[ix2 + 4];
+
+  // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 5, i32 5>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D<vector<float, 2> >\22)"(i32 0, %"class.Texture1D<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D<vector<float, 2> >" undef)
+  // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <2 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]])
+  float2 Tx1d0  = Tex1d.Load(ix2 + 5);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 6
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D<vector<float, 2> >\22)"(i32 0, %"class.Texture1D<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Tx1d1  = Tex1d[ix1 + 6];
+
+  // CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, <i32 7, i32 7, i32 7>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D<vector<float, 2> >\22)"(i32 0, %"class.Texture2D<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D<vector<float, 2> >" undef)
+  // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <3 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <3 x i32> [[IX]])
+  float2 Tx2d0  = Tex2d.Load(ix3 + 7);
+
+  // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 8, i32 8>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D<vector<float, 2> >\22)"(i32 0, %"class.Texture2D<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]])
+  float2 Tx2d1  = Tex2d[ix2 + 8];
+
+  // CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, <i32 9, i32 9, i32 9, i32 9>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D<vector<float, 2> >\22)"(i32 0, %"class.Texture3D<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D<vector<float, 2> >" undef)
+  // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <4 x i32> [[IX]])
+  float2 Tx3d0  = Tex3d.Load(ix4 + 9);
+
+  // CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, <i32 10, i32 10, i32 10>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D<vector<float, 2> >\22)"(i32 0, %"class.Texture3D<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <3 x i32> [[IX]])
+  float2 Tx3d1  = Tex3d[ix3 + 10];
+
+  // CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, <i32 11, i32 11, i32 11, i32 11>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 0, %"class.Texture2DArray<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray<vector<float, 2> >" undef)
+  // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <4 x i32> [[IX]])
+  float2 Tx2da0  = Tex2dArr.Load(ix4 + 11);
+
+  // CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, <i32 12, i32 12, i32 12>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 0, %"class.Texture2DArray<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <3 x i32> [[IX]])
+  float2 Tx2da1  = Tex2dArr[ix3 + 12];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 13
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  OutBuf[ix1+13] = select(Tyb0, Tx1d0, Tx1d1);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 14
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  OutBuf[ix1+14] = select(Tyb1, Tx2d0, Tx2d1);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 15
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  OutBuf[ix1+15] = select(TxMs0, Tx3d0, Tx3d1);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 16
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  OutBuf[ix1+16] = select(TxMs1, Tx2da0, Tx2da1);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.ll
new file mode 100644
index 0000000000..3ecb28644c
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.ll
@@ -0,0 +1,346 @@
+; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%"class.RWBuffer<vector<bool, 2> >" = type { <2 x i32> }
+%"class.Texture2DMS<vector<bool, 2>, 0>" = type { <2 x i32>, %"class.Texture2DMS<vector<bool, 2>, 0>::sample_type" }
+%"class.Texture2DMS<vector<bool, 2>, 0>::sample_type" = type { i32 }
+%"class.Texture1D<vector<float, 2> >" = type { <2 x float>, %"class.Texture1D<vector<float, 2> >::mips_type" }
+%"class.Texture1D<vector<float, 2> >::mips_type" = type { i32 }
+%"class.Texture2D<vector<float, 2> >" = type { <2 x float>, %"class.Texture2D<vector<float, 2> >::mips_type" }
+%"class.Texture2D<vector<float, 2> >::mips_type" = type { i32 }
+%"class.Texture3D<vector<float, 2> >" = type { <2 x float>, %"class.Texture3D<vector<float, 2> >::mips_type" }
+%"class.Texture3D<vector<float, 2> >::mips_type" = type { i32 }
+%"class.Texture2DArray<vector<float, 2> >" = type { <2 x float>, %"class.Texture2DArray<vector<float, 2> >::mips_type" }
+%"class.Texture2DArray<vector<float, 2> >::mips_type" = type { i32 }
+%"class.RWBuffer<vector<float, 2> >" = type { <2 x float> }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+
+@"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" = external global %"class.RWBuffer<vector<bool, 2> >", align 4
+@"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A" = external global %"class.Texture2DMS<vector<bool, 2>, 0>", align 4
+@"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A" = external global %"class.Texture1D<vector<float, 2> >", align 4
+@"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A" = external global %"class.Texture2D<vector<float, 2> >", align 4
+@"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A" = external global %"class.Texture3D<vector<float, 2> >", align 4
+@"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A" = external global %"class.Texture2DArray<vector<float, 2> >", align 4
+@"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A" = external global %"class.RWBuffer<vector<float, 2> >", align 4
+
+; Function Attrs: nounwind
+define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3, <4 x i32> %ix4) #0 {
+  ; CHECK: [[PIX:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0,
+  ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 1
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<bool, 2> >"(i32 160, %"class.RWBuffer<vector<bool, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef)
+  ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0
+  ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1
+  ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0
+  ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1
+  ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer
+  %1 = add i32 %ix1, 1
+  %2 = load %"class.RWBuffer<vector<bool, 2> >", %"class.RWBuffer<vector<bool, 2> >"* @"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A"
+  %3 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >" %2)
+  %4 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" zeroinitializer)
+  %5 = call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %4, i32 %1)
+
+  %6 = zext <2 x i1> %5 to <2 x i32>
+
+  ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 2
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<bool, 2> >"(i32 160, %"class.RWBuffer<vector<bool, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef)
+  ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0
+  ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1
+  ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0
+  ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1
+  ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer
+  %7 = add i32 %ix1, 2
+  %8 = load %"class.RWBuffer<vector<bool, 2> >", %"class.RWBuffer<vector<bool, 2> >"* @"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A"
+  %9 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >" %8)
+  %10 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %9, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" zeroinitializer)
+  %11 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %10, i32 %7)
+  %12 = load <2 x i32>, <2 x i32>* %11
+
+  %13 = icmp ne <2 x i32> %12, zeroinitializer
+  %14 = zext <2 x i1> %13 to <2 x i32>
+
+  ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 3, i32 3>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DMS<vector<bool, 2>, 0>"(i32 160, %"class.Texture2DMS<vector<bool, 2>, 0>"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[PIX]], i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0
+  ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1
+  ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0
+  ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1
+  ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer
+  %15 = add <2 x i32> %ix2, <i32 3, i32 3>
+  %16 = load %"class.Texture2DMS<vector<bool, 2>, 0>", %"class.Texture2DMS<vector<bool, 2>, 0>"* @"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A"
+  %17 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.Texture2DMS<vector<bool, 2>, 0>" %16)
+  %18 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle %17, %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS<vector<bool, 2>, 0>" zeroinitializer)
+  %19 = call <2 x i1> @"dx.hl.op..<2 x i1> (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 231, %dx.types.Handle %18, <2 x i32> %15, i32 %ix1)
+  %20 = zext <2 x i1> %19 to <2 x i32>
+
+  ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 4, i32 4>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DMS<vector<bool, 2>, 0>"(i32 160, %"class.Texture2DMS<vector<bool, 2>, 0>"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0
+  ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1
+  ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0
+  ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1
+  ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer
+  %21 = add <2 x i32> %ix2, <i32 4, i32 4>
+  %22 = load %"class.Texture2DMS<vector<bool, 2>, 0>", %"class.Texture2DMS<vector<bool, 2>, 0>"* @"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A"
+  %23 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.Texture2DMS<vector<bool, 2>, 0>" %22)
+  %24 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle %23, %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS<vector<bool, 2>, 0>" zeroinitializer)
+  %25 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %24, <2 x i32> %21)
+  %26 = load <2 x i32>, <2 x i32>* %25
+
+  %27 = icmp ne <2 x i32> %26, zeroinitializer
+  %28 = zext <2 x i1> %27 to <2 x i32>
+
+  ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 5, i32 5>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture1D<vector<float, 2> >"(i32 160, %"class.Texture1D<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX1]], i32 [[IX0]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  %29 = add <2 x i32> %ix2, <i32 5, i32 5>
+  %30 = load %"class.Texture1D<vector<float, 2> >", %"class.Texture1D<vector<float, 2> >"* @"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A"
+  %31 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D<vector<float, 2> >\22)"(i32 0, %"class.Texture1D<vector<float, 2> >" %30)
+  %32 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %31, %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D<vector<float, 2> >" zeroinitializer)
+  %33 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <2 x i32>)"(i32 231, %dx.types.Handle %32, <2 x i32> %29)
+
+  ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 6
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture1D<vector<float, 2> >"(i32 160, %"class.Texture1D<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 })
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  %34 = add i32 %ix1, 6
+  %35 = load %"class.Texture1D<vector<float, 2> >", %"class.Texture1D<vector<float, 2> >"* @"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A"
+  %36 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D<vector<float, 2> >\22)"(i32 0, %"class.Texture1D<vector<float, 2> >" %35)
+  %37 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %36, %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D<vector<float, 2> >" zeroinitializer)
+  %38 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %37, i32 %34)
+  %39 = load <2 x float>, <2 x float>* %38
+
+  ; CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, <i32 7, i32 7, i32 7>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2D<vector<float, 2> >"(i32 160, %"class.Texture2D<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <3 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <3 x i32> [[IX]], i64 1
+  ; CHECK-DAG: [[IX2:%.*]] = extractelement <3 x i32> [[IX]], i64 2
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX2]], i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  %40 = add <3 x i32> %ix3, <i32 7, i32 7, i32 7>
+  %41 = load %"class.Texture2D<vector<float, 2> >", %"class.Texture2D<vector<float, 2> >"* @"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A"
+  %42 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D<vector<float, 2> >\22)"(i32 0, %"class.Texture2D<vector<float, 2> >" %41)
+  %43 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %42, %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D<vector<float, 2> >" zeroinitializer)
+  %44 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <3 x i32>)"(i32 231, %dx.types.Handle %43, <3 x i32> %40)
+
+  ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 8, i32 8>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2D<vector<float, 2> >"(i32 160, %"class.Texture2D<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  %45 = add <2 x i32> %ix2, <i32 8, i32 8>
+  %46 = load %"class.Texture2D<vector<float, 2> >", %"class.Texture2D<vector<float, 2> >"* @"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A"
+  %47 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D<vector<float, 2> >\22)"(i32 0, %"class.Texture2D<vector<float, 2> >" %46)
+  %48 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %47, %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D<vector<float, 2> >" zeroinitializer)
+  %49 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %48, <2 x i32> %45)
+  %50 = load <2 x float>, <2 x float>* %49
+
+  ; CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, <i32 9, i32 9, i32 9, i32 9>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture3D<vector<float, 2> >"(i32 160, %"class.Texture3D<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <4 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <4 x i32> [[IX]], i64 1
+  ; CHECK-DAG: [[IX2:%.*]] = extractelement <4 x i32> [[IX]], i64 2
+  ; CHECK-DAG: [[IX3:%.*]] = extractelement <4 x i32> [[IX]], i64 3
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX3]], i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef)
+  %51 = add <4 x i32> %ix4, <i32 9, i32 9, i32 9, i32 9>
+  %52 = load %"class.Texture3D<vector<float, 2> >", %"class.Texture3D<vector<float, 2> >"* @"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A"
+  %53 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D<vector<float, 2> >\22)"(i32 0, %"class.Texture3D<vector<float, 2> >" %52)
+  %54 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %53, %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D<vector<float, 2> >" zeroinitializer)
+  %55 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle %54, <4 x i32> %51)
+
+  ; CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, <i32 10, i32 10, i32 10>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture3D<vector<float, 2> >"(i32 160, %"class.Texture3D<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <3 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <3 x i32> [[IX]], i64 1
+  ; CHECK-DAG: [[IX2:%.*]] = extractelement <3 x i32> [[IX]], i64 2
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef)
+  %56 = add <3 x i32> %ix3, <i32 10, i32 10, i32 10>
+  %57 = load %"class.Texture3D<vector<float, 2> >", %"class.Texture3D<vector<float, 2> >"* @"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A"
+  %58 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D<vector<float, 2> >\22)"(i32 0, %"class.Texture3D<vector<float, 2> >" %57)
+  %59 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %58, %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D<vector<float, 2> >" zeroinitializer)
+  %60 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %59, <3 x i32> %56)
+  %61 = load <2 x float>, <2 x float>* %60
+
+  ; CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, <i32 11, i32 11, i32 11, i32 11>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DArray<vector<float, 2> >"(i32 160, %"class.Texture2DArray<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <4 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <4 x i32> [[IX]], i64 1
+  ; CHECK-DAG: [[IX2:%.*]] = extractelement <4 x i32> [[IX]], i64 2
+  ; CHECK-DAG: [[IX3:%.*]] = extractelement <4 x i32> [[IX]], i64 3
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX3]], i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef)
+  %62 = add <4 x i32> %ix4, <i32 11, i32 11, i32 11, i32 11>
+  %63 = load %"class.Texture2DArray<vector<float, 2> >", %"class.Texture2DArray<vector<float, 2> >"* @"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A"
+  %64 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 0, %"class.Texture2DArray<vector<float, 2> >" %63)
+  %65 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %64, %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray<vector<float, 2> >" zeroinitializer)
+  %66 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle %65, <4 x i32> %62)
+
+  ; CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, <i32 12, i32 12, i32 12>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DArray<vector<float, 2> >"(i32 160, %"class.Texture2DArray<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <3 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <3 x i32> [[IX]], i64 1
+  ; CHECK-DAG: [[IX2:%.*]] = extractelement <3 x i32> [[IX]], i64 2
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef)
+  %67 = add <3 x i32> %ix3, <i32 12, i32 12, i32 12>
+  %68 = load %"class.Texture2DArray<vector<float, 2> >", %"class.Texture2DArray<vector<float, 2> >"* @"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A"
+  %69 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 0, %"class.Texture2DArray<vector<float, 2> >" %68)
+  %70 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %69, %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray<vector<float, 2> >" zeroinitializer)
+  %71 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %70, <3 x i32> %67)
+  %72 = load <2 x float>, <2 x float>* %71
+
+  %73 = icmp ne <2 x i32> %6, zeroinitializer
+  %74 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %73, <2 x float> %33, <2 x float> %39)
+
+  ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 13
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<float, 2> >"(i32 160, %"class.RWBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 })
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef,
+  %75 = add i32 %ix1, 13
+  %76 = load %"class.RWBuffer<vector<float, 2> >", %"class.RWBuffer<vector<float, 2> >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A"
+  %77 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >" %76)
+  %78 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %77, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" zeroinitializer)
+  %79 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %78, i32 %75)
+  store <2 x float> %74, <2 x float>* %79
+
+  %80 = icmp ne <2 x i32> %14, zeroinitializer
+  %81 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %80, <2 x float> %44, <2 x float> %50)
+
+  ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 14
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<float, 2> >"(i32 160, %"class.RWBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 })
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef
+  %82 = add i32 %ix1, 14
+  %83 = load %"class.RWBuffer<vector<float, 2> >", %"class.RWBuffer<vector<float, 2> >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A"
+  %84 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >" %83)
+  %85 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %84, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" zeroinitializer)
+  %86 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %85, i32 %82)
+  store <2 x float> %81, <2 x float>* %86
+
+  %87 = icmp ne <2 x i32> %20, zeroinitializer
+  %88 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %87, <2 x float> %55, <2 x float> %61)
+
+  ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 15
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<float, 2> >"(i32 160, %"class.RWBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 })
+  ; CHECK:  call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef
+  %89 = add i32 %ix1, 15
+  %90 = load %"class.RWBuffer<vector<float, 2> >", %"class.RWBuffer<vector<float, 2> >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A"
+  %91 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >" %90)
+  %92 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %91, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" zeroinitializer)
+  %93 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %92, i32 %89)
+  store <2 x float> %88, <2 x float>* %93
+
+  %94 = icmp ne <2 x i32> %28, zeroinitializer
+  %95 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %94, <2 x float> %66, <2 x float> %72)
+
+  ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 16
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<float, 2> >"(i32 160, %"class.RWBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 })
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef
+  %96 = add i32 %ix1, 16
+  %97 = load %"class.RWBuffer<vector<float, 2> >", %"class.RWBuffer<vector<float, 2> >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A"
+  %98 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >" %97)
+  %99 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %98, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" zeroinitializer)
+  %100 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %99, i32 %96)
+  store <2 x float> %95, <2 x float>* %100
+
+  ret void
+}
+
+declare <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32, %"class.RWBuffer<vector<bool, 2> >") #2
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer<vector<bool, 2> >") #2
+declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare <2 x i1> @"dx.hl.op..<2 x i1> (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #0
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32, %"class.Texture2DMS<vector<bool, 2>, 0>") #2
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture2DMS<vector<bool, 2>, 0>") #2
+declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #2
+declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D<vector<float, 2> >\22)"(i32, %"class.Texture1D<vector<float, 2> >") #2
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture1D<vector<float, 2> >") #2
+declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D<vector<float, 2> >\22)"(i32, %"class.Texture2D<vector<float, 2> >") #2
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture2D<vector<float, 2> >") #2
+declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #2
+declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32, %dx.types.Handle, <4 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D<vector<float, 2> >\22)"(i32, %"class.Texture3D<vector<float, 2> >") #2
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture3D<vector<float, 2> >") #2
+declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #2
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32, %"class.Texture2DArray<vector<float, 2> >") #2
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture2DArray<vector<float, 2> >") #2
+declare <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32, <2 x i1>, <2 x float>, <2 x float>) #2
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32, %"class.RWBuffer<vector<float, 2> >") #2
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer<vector<float, 2> >") #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
+
+!dx.version = !{!3}
+!dx.valver = !{!4}
+!dx.shaderModel = !{!5}
+!dx.typeAnnotations = !{!6}
+!dx.entryPoints = !{!22}
+!dx.fnprops = !{!35}
+!dx.options = !{!36, !37}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!2 = !{!"dxc(private) 1.8.0.4807 (longvec_bab_ldst, 88cfe61c3-dirty)"}
+!3 = !{i32 1, i32 6}
+!4 = !{i32 1, i32 9}
+!5 = !{!"vs", i32 6, i32 6}
+!6 = !{i32 1, void (i32, <2 x i32>, <3 x i32>, <4 x i32>)* @main, !7}
+!7 = !{!8, !10, !13, !16, !19}
+!8 = !{i32 1, !9, !9}
+!9 = !{}
+!10 = !{i32 0, !11, !12}
+!11 = !{i32 4, !"IX1", i32 7, i32 5}
+!12 = !{i32 1}
+!13 = !{i32 0, !14, !15}
+!14 = !{i32 4, !"IX2", i32 7, i32 5}
+!15 = !{i32 2}
+!16 = !{i32 0, !17, !18}
+!17 = !{i32 4, !"IX3", i32 7, i32 5}
+!18 = !{i32 3}
+!19 = !{i32 0, !20, !21}
+!20 = !{i32 4, !"IX4", i32 7, i32 5}
+!21 = !{i32 4}
+!22 = !{void (i32, <2 x i32>, <3 x i32>, <4 x i32>)* @main, !"main", null, !23, null}
+!23 = !{!24, !32, null, null}
+!24 = !{!25, !27, !29, !30, !31}
+!25 = !{i32 0, %"class.Texture2DMS<vector<bool, 2>, 0>"* @"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A", !"Tex2dMs", i32 0, i32 2, i32 1, i32 3, i32 0, !26}
+!26 = !{i32 0, i32 5}
+!27 = !{i32 1, %"class.Texture1D<vector<float, 2> >"* @"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A", !"Tex1d", i32 0, i32 3, i32 1, i32 1, i32 0, !28}
+!28 = !{i32 0, i32 9}
+!29 = !{i32 2, %"class.Texture2D<vector<float, 2> >"* @"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A", !"Tex2d", i32 0, i32 4, i32 1, i32 2, i32 0, !28}
+!30 = !{i32 3, %"class.Texture3D<vector<float, 2> >"* @"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A", !"Tex3d", i32 0, i32 5, i32 1, i32 4, i32 0, !28}
+!31 = !{i32 4, %"class.Texture2DArray<vector<float, 2> >"* @"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A", !"Tex2dArr", i32 0, i32 6, i32 1, i32 7, i32 0, !28}
+!32 = !{!33, !34}
+!33 = !{i32 0, %"class.RWBuffer<vector<bool, 2> >"* @"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A", !"TyBuf", i32 0, i32 1, i32 1, i32 10, i1 false, i1 false, i1 false, !26}
+!34 = !{i32 1, %"class.RWBuffer<vector<float, 2> >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A", !"OutBuf", i32 0, i32 7, i32 1, i32 10, i1 false, i1 false, i1 false, !28}
+!35 = !{void (i32, <2 x i32>, <3 x i32>, <4 x i32>)* @main, i32 1}
+!36 = !{i32 64}
+!37 = !{i32 -1}

From 9ba9689e22c6861279bef0463baf75adac4aec18 Mon Sep 17 00:00:00 2001
From: Junda Liu <Junda.Liu@amd.com>
Date: Tue, 18 Mar 2025 22:42:34 +0800
Subject: [PATCH 30/88] [SPIR-V] Set RValue for the result of bitfield extract
 emulation (#7200)

Otherwise, the result of bitfield extract emulation is treated as LValue
and may have an extra OpLoad generated.
---
 tools/clang/lib/SPIRV/SpirvBuilder.cpp        |  2 ++
 ...p.struct.access.bitfield.sized.rvalue.hlsl | 22 +++++++++++++++++++
 2 files changed, 24 insertions(+)
 create mode 100644 tools/clang/test/CodeGenSPIRV/op.struct.access.bitfield.sized.rvalue.hlsl

diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp
index b1e7388f16..1275e2b252 100644
--- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp
+++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp
@@ -994,6 +994,8 @@ SpirvInstruction *SpirvBuilder::createEmulatedBitFieldExtract(
     rightShift->setResultType(baseType);
   }
 
+  rightShift->setRValue(true);
+
   return rightShift;
 }
 
diff --git a/tools/clang/test/CodeGenSPIRV/op.struct.access.bitfield.sized.rvalue.hlsl b/tools/clang/test/CodeGenSPIRV/op.struct.access.bitfield.sized.rvalue.hlsl
new file mode 100644
index 0000000000..414d8a638c
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/op.struct.access.bitfield.sized.rvalue.hlsl
@@ -0,0 +1,22 @@
+// RUN: %dxc -T cs_6_2 -E main -spirv -fcgl -enable-16bit-types %s | FileCheck %s
+
+struct S1
+{
+  uint16_t a : 8;
+};
+
+S1 foo()
+{
+  return (S1)0;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  uint16_t test = foo().a;
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Function_ushort %temp_var_S1 %int_0
+// CHECK: [[raw:%[0-9]+]] = OpLoad %ushort [[ptr]]
+// CHECK: [[tmp:%[0-9]+]] = OpShiftLeftLogical %ushort [[raw]] %uint_8
+// CHECK: [[out:%[0-9]+]] = OpShiftRightLogical %ushort [[tmp]] %uint_8
+// CHECK-NOT:               OpLoad %ushort [[out]]
+// CHECK:                   OpStore %test [[out]]
+}

From 503ef3c2bc198b1e844da53c117402d45302cdd1 Mon Sep 17 00:00:00 2001
From: Chris B <cbieneman@microsoft.com>
Date: Tue, 18 Mar 2025 10:25:04 -0500
Subject: [PATCH 31/88] Switch from tj-actions/changed-files to
 step-security/changed-files (#7217)

Aligning with upstream LLVM's action definition.
---
 .github/workflows/clang-format-checker.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/clang-format-checker.yml b/.github/workflows/clang-format-checker.yml
index 7e39a5b0be..1c69d6de86 100644
--- a/.github/workflows/clang-format-checker.yml
+++ b/.github/workflows/clang-format-checker.yml
@@ -19,10 +19,10 @@ jobs:
 
       - name: Get changed files
         id: changed-files
-        uses: tj-actions/changed-files@v41
+        uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
         with:
           separator: ","
-          fetch_depth: 100 # Fetches only the last 10 commits
+          skip_initial_fetch: true
 
       - name: "Listed files"
         env:

From 3ddf29bd4384cd2b81a6b04c71ca9e8f3160714f Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Tue, 18 Mar 2025 10:13:17 -0700
Subject: [PATCH 32/88] Disallow swizzling on long vectors (#7215)

This PR addresses
https://github.com/microsoft/DirectXShaderCompiler/issues/7194 by adding
a new error string in DiagnosticSemaKinds.td and emitting it in
SemaHLSL.cpp.

@pow2clk implemented most of this in his
[fork](https://github.com/microsoft/DirectXShaderCompiler/commit/a41e0a69db6fd072ffe8f1c811bf3dadcc2ab8fe).
I'm just helping to finish it.

**How verified:**
1. Several new test cases were added and verified locally
2. Ran hcttest locally
---
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 ++
 tools/clang/lib/Sema/SemaHLSL.cpp             |  3 +++
 .../hlsl/types/invalid-longvec-swizzle.hlsl   | 27 +++++++++++++++++++
 3 files changed, 32 insertions(+)
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl

diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index b8a772b3a8..16ff7777a7 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7549,6 +7549,8 @@ def err_hlsl_vector_element_index_out_of_bounds: Error<
   "vector element index '%0' is out of bounds">;
 def err_hlsl_vector_member_too_many_positions: Error<
   "more than four positions are referenced in '%0'">;
+def err_hlsl_vector_member_on_long_vector: Error<
+   "invalid swizzle '%0' on vector of over 4 elements.">;
 def err_hlsl_missing_type_specifier : Error< // Patterened after err_missing_type_specifier
   "HLSL requires a type specifier for all declarations">;
 def err_hlsl_multiple_concrete_bases : Error<
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 031e49408f..66cbea12ce 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -8643,6 +8643,9 @@ ExprResult HLSLExternalSource::LookupVectorMemberExprForHLSL(
     llvm_unreachable("Unknown VectorMemberAccessError value");
   }
 
+  if (colCount > 4)
+    msg = diag::err_hlsl_vector_member_on_long_vector;
+
   if (msg != 0) {
     m_sema->Diag(MemberLoc, msg) << memberText;
 
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl
new file mode 100644
index 0000000000..28b4a52158
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl
@@ -0,0 +1,27 @@
+// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=float
+// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=bool
+// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=uint64_t
+// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=double
+// RUN: %dxc -Tlib_6_9 -verify %s -enable-16bit-types -DTYPE=float16_t
+// RUN: %dxc -Tlib_6_9 -verify %s -enable-16bit-types -DTYPE=int16_t
+
+export
+vector<double, 3> doit(vector<double, 5> vec5) {
+  vec5.x = 1; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}}
+  return vec5.xyw; // expected-error {{invalid swizzle 'xyw' on vector of over 4 elements.}}
+}
+
+export
+TYPE arr_to_vec(TYPE arr[5]) {
+
+  TYPE val = (vector<TYPE, 6>(arr, 1)).x; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}}
+
+  TYPE val2 = ((vector<TYPE, 5>)arr).x; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}}
+
+  return val;
+}
+
+export TYPE lv_ctor(TYPE s) {
+  TYPE ret = (vector<TYPE,6>(1, 2, 3, 4, 5, s)).x; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}}
+  return ret;
+}
\ No newline at end of file

From 6475f98147604c315b81302f324f779442a00cd2 Mon Sep 17 00:00:00 2001
From: Chris B <cbieneman@microsoft.com>
Date: Tue, 18 Mar 2025 15:23:45 -0500
Subject: [PATCH 33/88] Actually fix the changed-files workflow (#7226)

This time I actually tested the workflow over on this PR:

https://github.com/llvm-beanz/DirectXShaderCompiler/pull/6
---
 .github/workflows/clang-format-checker.yml | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/clang-format-checker.yml b/.github/workflows/clang-format-checker.yml
index 1c69d6de86..d1887e4519 100644
--- a/.github/workflows/clang-format-checker.yml
+++ b/.github/workflows/clang-format-checker.yml
@@ -13,9 +13,16 @@ jobs:
       pull-requests: write
     steps:
       - name: Fetch LLVM sources
-        uses: actions/checkout@v4
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
-          fetch-depth: 2
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: Checkout through merge base
+        uses: rmacklin/fetch-through-merge-base@bfe4d03a86f9afa52bc1a70e9814fc92a07f7b75 # v0.3.0
+        with:
+          base_ref: ${{ github.event.pull_request.base.ref }}
+          head_ref: ${{ github.event.pull_request.head.sha }}
+          deepen_length: 500
 
       - name: Get changed files
         id: changed-files

From 454bbf480805cae25173159465eb4769422dee5b Mon Sep 17 00:00:00 2001
From: Steven Perron <stevenperron@google.com>
Date: Wed, 19 Mar 2025 10:20:37 -0400
Subject: [PATCH 34/88] Fix typo in SPIR-V.rst (#7224)

Fixes https://github.com/microsoft/DirectXShaderCompiler/issues/7176
---
 docs/SPIR-V.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst
index 072a2fe9c1..9a8150a0e8 100644
--- a/docs/SPIR-V.rst
+++ b/docs/SPIR-V.rst
@@ -312,7 +312,7 @@ Supported extensions
 * SPV_NV_mesh_shader
 * SPV_KHR_ray_query
 * SPV_EXT_shader_image_int64
-* SPV_KHR_fragment_shading_barycentric
+* SPV_KHR_fragment_shader_barycentric
 * SPV_KHR_physical_storage_buffer
 * SPV_KHR_vulkan_memory_model
 * SPV_NV_compute_shader_derivatives

From 6701eeddd5c759a277ee40329ea746f4984748b1 Mon Sep 17 00:00:00 2001
From: Steven Perron <stevenperron@google.com>
Date: Wed, 19 Mar 2025 13:01:38 -0400
Subject: [PATCH 35/88] [SPIRV] Handle a cast to void (#7227)

Fixes https://github.com/microsoft/DirectXShaderCompiler/issues/7134
---
 tools/clang/lib/SPIRV/SpirvEmitter.cpp         |  6 ++++--
 .../clang/test/CodeGenSPIRV/cast.to.void.hlsl  | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100644 tools/clang/test/CodeGenSPIRV/cast.to.void.hlsl

diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
index 3a67257da7..3aaa91d50a 100644
--- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@@ -3657,14 +3657,16 @@ SpirvInstruction *SpirvEmitter::doCastExpr(const CastExpr *expr,
       emitError("implicit cast kind '%0' unimplemented", expr->getExprLoc())
           << expr->getCastKindName() << expr->getSourceRange();
       expr->dump();
-      return 0;
+      return nullptr;
     }
   }
+  case CastKind::CK_ToVoid:
+    return nullptr;
   default:
     emitError("implicit cast kind '%0' unimplemented", expr->getExprLoc())
         << expr->getCastKindName() << expr->getSourceRange();
     expr->dump();
-    return 0;
+    return nullptr;
   }
 }
 
diff --git a/tools/clang/test/CodeGenSPIRV/cast.to.void.hlsl b/tools/clang/test/CodeGenSPIRV/cast.to.void.hlsl
new file mode 100644
index 0000000000..19a37d071c
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/cast.to.void.hlsl
@@ -0,0 +1,18 @@
+// RUN: %dxc dxc -T cs_6_6 -E Main -spirv %s -fcgl | FileCheck %s
+
+
+// Make sure no code is generated for the cast to void.
+
+// CHECK: %src_Main = OpFunction %void None
+// CHECK-NEXT: OpLabel
+// CHECK-NEXT: %x = OpVariable
+// CHECK-NEXT: OpStore %x %false
+// CHECK-NEXT: OpReturn
+// CHECK-NEXT: OpFunctionEnd
+
+[numthreads(1, 1, 1)]
+void Main()
+{
+    bool x = false;
+    (void)x;
+}

From 0958e064380f7a450974c09dd6ea6e77ce10a523 Mon Sep 17 00:00:00 2001
From: Steven Perron <stevenperron@google.com>
Date: Wed, 19 Mar 2025 13:03:13 -0400
Subject: [PATCH 36/88] [SPIRV] Don't assume entry points are at the start of
 the worklist. (#7225)

Fixes https://github.com/microsoft/DirectXShaderCompiler/issues/7161
---
 tools/clang/lib/SPIRV/SpirvEmitter.cpp        | 22 ++++++++-----------
 .../lib.fn.export.with.entrypoint.hlsl        | 19 ++++++++++++++++
 2 files changed, 28 insertions(+), 13 deletions(-)
 create mode 100644 tools/clang/test/CodeGenSPIRV/lib.fn.export.with.entrypoint.hlsl

diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
index 3aaa91d50a..d858e2caca 100644
--- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@@ -809,21 +809,17 @@ void SpirvEmitter::HandleTranslationUnit(ASTContext &context) {
   spvBuilder.setMemoryModel(spv::AddressingModel::Logical,
                             spv::MemoryModel::GLSL450);
 
-  // Even though the 'workQueue' grows due to the above loop, the first
-  // 'numEntryPoints' entries in the 'workQueue' are the ones with the HLSL
-  // 'shader' attribute, and must therefore be entry functions.
-  assert(numEntryPoints <= workQueue.size());
-
-  for (uint32_t i = 0; i < numEntryPoints; ++i) {
+  for (uint32_t i = 0; i < workQueue.size(); ++i) {
     // TODO: assign specific StageVars w.r.t. to entry point
     const FunctionInfo *entryInfo = workQueue[i];
-    assert(entryInfo->isEntryFunction);
-    spvBuilder.addEntryPoint(
-        getSpirvShaderStage(
-            entryInfo->shaderModelKind,
-            featureManager.isExtensionEnabled(Extension::EXT_mesh_shader)),
-        entryInfo->entryFunction, getEntryPointName(entryInfo),
-        getInterfacesForEntryPoint(entryInfo->entryFunction));
+    if (entryInfo->isEntryFunction) {
+      spvBuilder.addEntryPoint(
+          getSpirvShaderStage(
+              entryInfo->shaderModelKind,
+              featureManager.isExtensionEnabled(Extension::EXT_mesh_shader)),
+          entryInfo->entryFunction, getEntryPointName(entryInfo),
+          getInterfacesForEntryPoint(entryInfo->entryFunction));
+    }
   }
 
   // Add Location decorations to stage input/output variables.
diff --git a/tools/clang/test/CodeGenSPIRV/lib.fn.export.with.entrypoint.hlsl b/tools/clang/test/CodeGenSPIRV/lib.fn.export.with.entrypoint.hlsl
new file mode 100644
index 0000000000..0ab965aded
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/lib.fn.export.with.entrypoint.hlsl
@@ -0,0 +1,19 @@
+// RUN: %dxc -T lib_6_6 -E main -fspv-target-env=universal1.5 -fcgl  %s -spirv | FileCheck %s
+
+// CHECK: OpEntryPoint MissKHR %miss "miss" %payload
+// CHECK: OpDecorate %func LinkageAttributes "func" Export
+
+
+struct RayPayload
+{
+    uint a;
+};
+
+export void func()
+{
+}
+
+[shader("miss")]
+void miss(inout RayPayload payload)
+{
+}

From b2bcf21a62566fed959a9091abb6ace4751071f2 Mon Sep 17 00:00:00 2001
From: Steven Perron <stevenperron@google.com>
Date: Wed, 19 Mar 2025 17:07:57 -0400
Subject: [PATCH 37/88] Revert "[SPIRV] Use copy-in/copy-out for
 non-declaration (#7127)" (#7223)

This did not solve all of the cases for the issue it was fixing. A new
fix was done in the inliner in spirv-opt. This change is no longer
needed.

This reverts commit 8967dacb03f1d95fc0292aa7a2e48b0acf50dcd9.
---
 tools/clang/lib/SPIRV/SpirvEmitter.cpp        |  8 +------
 .../cs.groupshared.function-param.out.hlsl    |  6 +----
 .../CodeGenSPIRV/fn.fixfuncall-compute.hlsl   | 10 ++++----
 .../CodeGenSPIRV/fn.fixfuncall-linkage.hlsl   |  8 +++----
 .../fn.param.inout.storage-class.hlsl         |  9 +++----
 .../CodeGenSPIRV/fn.param.inout.vector.hlsl   |  8 +------
 .../CodeGenSPIRV/fn.param.isomorphism.hlsl    | 24 +++++--------------
 7 files changed, 21 insertions(+), 52 deletions(-)

diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
index d858e2caca..557768f59a 100644
--- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@@ -3100,12 +3100,6 @@ SpirvInstruction *SpirvEmitter::processCall(const CallExpr *callExpr) {
         argInfo && argInfo->getStorageClass() != spv::StorageClass::Function &&
         isResourceType(paramType);
 
-    // HLSL requires that the parameters be copied in and out from temporaries.
-    // This looks for cases where the copy can be elided. To generate valid
-    // SPIR-V, the argument must be a memory declaration.
-    //
-    //
-
     // If argInfo is nullptr and argInst is a rvalue, we do not have a proper
     // pointer to pass to the function. we need a temporary variable in that
     // case.
@@ -3114,7 +3108,7 @@ SpirvInstruction *SpirvEmitter::processCall(const CallExpr *callExpr) {
     // create a temporary variable for it because the function definition
     // expects are point-to-pointer argument for resources, which will be
     // resolved by legalization.
-    if ((argInfo || (argInst && argInst->getopcode() == spv::Op::OpVariable)) &&
+    if ((argInfo || (argInst && !argInst->isRValue())) &&
         canActAsOutParmVar(param) && !isArgGlobalVarWithResourceType &&
         paramTypeMatchesArgType(paramType, arg->getType())) {
       // Based on SPIR-V spec, function parameter must be always Function
diff --git a/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl b/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl
index 3ec0ad447e..8d0195d672 100644
--- a/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl
@@ -28,14 +28,10 @@ groupshared S D;
 [numthreads(1,1,1)]
 void main() {
 // CHECK: %E = OpVariable %_ptr_Function_int Function
-// CHECK-NEXT: [[TempVar:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function
-
   int E;
 
 // CHECK:        [[A:%[0-9]+]] = OpAccessChain %_ptr_Uniform_int %A %int_0 %uint_0
-// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %int [[A]]
-// CHECK-NEXT: OpStore [[TempVar]] [[ld]]
-// CHECK-NEXT:     {{%[0-9]+}} = OpFunctionCall %void %foo [[TempVar]] %B %C %D %E
+// CHECK-NEXT:     {{%[0-9]+}} = OpFunctionCall %void %foo [[A]] %B %C %D %E
   foo(A[0], B, C, D, E);
   A[0] = A[0] | B | C | D.a | E;
 }
diff --git a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl
index 70bf50abc6..dba7cd00ce 100644
--- a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl
@@ -7,19 +7,19 @@ float4 foo(inout float f0, inout int f1)
     return 0;
 }
 
-// CHECK-DAG: [[s39:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function
-// CHECK-DAG: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function
+// CHECK: [[s39:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function
+// CHECK: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function
 // CHECK: [[s33:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Uniform_float {{%[a-zA-Z0-9_]+}} %int_0
+// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int {{%[a-zA-Z0-9_]+}} %int_1
 // CHECK: [[s37:%[a-zA-Z0-9_]+]] = OpLoad %float [[s33]]
 // CHECK:                OpStore [[s36]] [[s37]]
-// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int {{%[a-zA-Z0-9_]+}} %int_1
 // CHECK: [[s40:%[a-zA-Z0-9_]+]] = OpLoad %int [[s34]]
 // CHECK:                OpStore [[s39]] [[s40]]
 // CHECK: {{%[a-zA-Z0-9_]+}} = OpFunctionCall %v4float %foo [[s36]] [[s39]]
-// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]]
-// CHECK:                OpStore [[s33]] [[s38]]
 // CHECK: [[s41:%[a-zA-Z0-9_]+]] = OpLoad %int [[s39]]
 // CHECK:                OpStore [[s34]] [[s41]]
+// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]]
+// CHECK:                OpStore [[s33]] [[s38]]
 
 struct Stru {
   int x;
diff --git a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl
index 6acd104aa3..5977fc454a 100644
--- a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl
@@ -6,19 +6,19 @@ RWStructuredBuffer< float4 > output : register(u1);
 
 // CHECK: OpDecorate %main LinkageAttributes "main" Export
 // CHECK: %main = OpFunction %int None
-// CHECK: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function
 // CHECK: [[s39:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function
+// CHECK: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function
 // CHECK: [[s33:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_StorageBuffer_float {{%[a-zA-Z0-9_]+}} %int_0
+// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int %stru %int_1
 // CHECK: [[s37:%[a-zA-Z0-9_]+]] = OpLoad %float [[s33]]
 // CHECK:                OpStore [[s36]] [[s37]]
-// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int %stru %int_1
 // CHECK: [[s40:%[a-zA-Z0-9_]+]] = OpLoad %int [[s34]]
 // CHECK:                OpStore [[s39]] [[s40]]
 // CHECK: {{%[a-zA-Z0-9_]+}} = OpFunctionCall %void %func [[s36]] [[s39]]
-// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]]
-// CHECK:                OpStore [[s33]] [[s38]]
 // CHECK: [[s41:%[a-zA-Z0-9_]+]] = OpLoad %int [[s39]]
 // CHECK:                OpStore [[s34]] [[s41]]
+// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]]
+// CHECK:                OpStore [[s33]] [[s38]]
 
 [noinline]
 void func(inout float f0, inout int f1) {
diff --git a/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl b/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl
index 4d75d27fa8..d0e771e834 100644
--- a/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl
@@ -11,13 +11,10 @@ void main(float input : INPUT) {
 // CHECK: %param_var_a = OpVariable %_ptr_Function_float Function
 
 // CHECK: [[val:%[0-9]+]] = OpLoad %float %input
-// CHECK:                   OpStore %param_var_a [[val]]
+// CHECK:                OpStore %param_var_a [[val]]
 // CHECK:  [[p0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_float %Data %int_0 %uint_0
-// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %float [[p0]]
-// CHECK-NEXT: OpStore [[temp0:%[a-zA-Z0-9_]+]] [[ld]]
 // CHECK:  [[p1:%[0-9]+]] = OpAccessChain %_ptr_Uniform_float %Data %int_0 %uint_1
-// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %float %32
-// CHECK-NEXT: OpStore [[temp1:%[a-zA-Z0-9_]+]] [[ld]]
-// CHECK:                OpFunctionCall %void %foo %param_var_a [[temp0]] [[temp1]]
+
+// CHECK:                OpFunctionCall %void %foo %param_var_a [[p0]] [[p1]]
     foo(input, Data[0], Data[1]);
 }
diff --git a/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl b/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl
index 5641923aaa..bda2183057 100644
--- a/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl
@@ -18,9 +18,7 @@ float4 main() : C {
 
     float4 val;
 // CHECK:    [[z_ptr:%[0-9]+]] = OpAccessChain %_ptr_Function_float %val %int_2
-// CHECK:       [[ld:%[0-9]+]] = OpLoad %float [[z_ptr]]
-// CHECK:                        OpStore %param_var_w [[ld]]
-// CHECK:          {{%[0-9]+}} = OpFunctionCall %void %bar %val %param_var_y %param_var_z %param_var_w
+// CHECK:          {{%[0-9]+}} = OpFunctionCall %void %bar %val %param_var_y %param_var_z [[z_ptr]]
 // CHECK-NEXT:   [[y:%[0-9]+]] = OpLoad %v3float %param_var_y
 // CHECK-NEXT: [[old:%[0-9]+]] = OpLoad %v4float %val
     // Write to val.zwx:
@@ -39,10 +37,6 @@ float4 main() : C {
 // CHECK-NEXT: [[old_0:%[0-9]+]] = OpLoad %v4float %val
 // CHECK-NEXT: [[new_0:%[0-9]+]] = OpVectorShuffle %v4float [[old_0]] [[z]] 4 5 2 3
 // CHECK-NEXT:                OpStore %val [[new_0]]
-    // Write to val.z:
-// CHECK-NEXT: [[new:%[0-9]+]] = OpLoad %float %param_var_w
-// CHECK-NEXT:                   OpStore [[z_ptr]] [[new]]
-
     bar(val, val.zwx, val.xy, val.z);
 
     return MyRWBuffer[0];
diff --git a/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl b/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl
index 3f890099f5..a4ad925f77 100644
--- a/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl
@@ -62,11 +62,7 @@ void main() {
   fn.incr();
 
 // CHECK:      [[rwsb_0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_R %rwsb %int_0 %uint_0
-// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %R [[rwsb_0]]
-// CHECK-NEXT: [[ex:%[0-9]+]] = OpCompositeExtract %int [[ld]] 0
-// CHECK-NEXT: [[v:%[0-9]+]] = OpCompositeConstruct %R_0 [[ex]]
-// CHECK-NEXT: OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[v]]
-// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr [[TempVar]]
+// CHECK-NEXT:      {{%[0-9]+}} = OpFunctionCall %void %decr [[rwsb_0]]
   decr(rwsb[0]);
 
 // CHECK: OpFunctionCall %void %decr2 %gs
@@ -91,29 +87,21 @@ void main() {
   fnarr[0].incr();
 
 // CHECK:      [[gsarr_0:%[0-9]+]] = OpAccessChain %_ptr_Workgroup_S %gsarr %int_0
-// CHECK:           [[ld:%[0-9]+]] = OpLoad %S [[gsarr_0]]
-// CHECK:                            OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[ld]]
-// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %decr2 [[TempVar]]
+// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %decr2 [[gsarr_0]]
   decr2(gsarr[0]);
 
 // CHECK:      [[starr_0:%[0-9]+]] = OpAccessChain %_ptr_Private_S %starr %int_0
-// CHECK:           [[ld:%[0-9]+]] = OpLoad %S [[starr_0]]
-// CHECK:                            OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[ld]]
-// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %decr2 [[TempVar]]
+// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %decr2 [[starr_0]]
   decr2(starr[0]);
 
 // CHECK:      [[fnarr_0:%[0-9]+]] = OpAccessChain %_ptr_Function_S %fnarr %int_0
-// CHECK:           [[ld:%[0-9]+]] = OpLoad %S [[fnarr_0]]
-// CHECK:                            OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[ld]]
-// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %decr2 [[TempVar]]
+// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %decr2 [[fnarr_0]]
   decr2(fnarr[0]);
 
 // CHECK:        [[arr:%[0-9]+]] = OpAccessChain %_ptr_Function_int %arr %int_0
 // CHECK-NEXT: [[arr_0:%[0-9]+]] = OpLoad %int [[arr]]
 // CHECK-NEXT: [[arr_1:%[0-9]+]] = OpIAdd %int [[arr_0]] %int_1
-// CHECK-NEXT:                     OpStore [[arr]] [[arr_1]]
-// CHECK-NEXT:    [[ld:%[0-9]+]] = OpLoad %int [[arr]]
-// CHECK-NEXT:                     OpStore [[TempVar:%[0-9a-zA-Z_]+]] [[ld]]
-// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %int_decr [[TempVar]]
+// CHECK-NEXT:                  OpStore [[arr]] [[arr_1]]
+// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %int_decr [[arr]]
   int_decr(++arr[0]);
 }

From a0932fa0817dcd93f1c527f04cbaec0f282d56c6 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista@microsoft.com>
Date: Wed, 19 Mar 2025 14:51:12 -0700
Subject: [PATCH 38/88] Add /bigobj compile option to MSVC build (#7228)

When targeting arm64 Debug, this error is detected:
`libclang\dxcrewriteunused.cpp(1,1): error C1128: number of sections
exceeded object file format limit: compile with /bigobj`
This PR adds a compile option for the folder that contains
dxcrewriteunused.cpp, so that the limit on the number of sections is
increased, and compilation may succeed.
---
 tools/clang/tools/libclang/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/clang/tools/libclang/CMakeLists.txt b/tools/clang/tools/libclang/CMakeLists.txt
index 1ef0c8ecd9..ed49cbaf44 100644
--- a/tools/clang/tools/libclang/CMakeLists.txt
+++ b/tools/clang/tools/libclang/CMakeLists.txt
@@ -119,6 +119,7 @@ if(MSVC)
   # Each functions is exported as "dllexport" in include/clang-c.
   # KB835326
   set(LLVM_EXPORTED_SYMBOL_FILE)
+  add_compile_options(/bigobj)
 endif()
 
 # HLSL Change Starts

From eb0234398b7665c4084b71dd1f1f662794128e20 Mon Sep 17 00:00:00 2001
From: Tex Riddell <texr@microsoft.com>
Date: Thu, 20 Mar 2025 09:12:34 -0700
Subject: [PATCH 39/88] NFC: Make hlsl::IntrinsicOp enum values stable (#7231)

This change makes hlsl::IntrinsicOp enum values stable by:
- adding hlsl_intrinsic_opcodes.json to capture assigned indices
- adds this to the files generated by hctgen
- generation assigns new indices after the last index
- hlsl::IntrinsicOp enum values have explicit assignments
- removes ENABLE_SPIRV_CODEGEN ifdefs around opcode definitions and
lowering table entries to keep these stable whether or not the spirv
build setting is enabled.

Fixes #7230
---
 CMakeLists.txt                        |   2 +
 include/dxc/HlslIntrinsicOp.h         | 730 +++++++++++++-------------
 lib/HLSL/HLOperationLower.cpp         |   8 +-
 utils/hct/CMakeLists.txt              |   3 +
 utils/hct/hctdb.py                    |  35 +-
 utils/hct/hctdb_instrhelp.py          |  39 +-
 utils/hct/hctgen.py                   |  10 +
 utils/hct/hlsl_intrinsic_opcodes.json | 363 +++++++++++++
 8 files changed, 801 insertions(+), 389 deletions(-)
 create mode 100644 utils/hct/CMakeLists.txt
 create mode 100644 utils/hct/hlsl_intrinsic_opcodes.json

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8f7db99784..74244c1d58 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -686,6 +686,8 @@ add_subdirectory(include/dxc)
 # really depend on anything else in the build it is safe.
 list(APPEND LLVM_COMMON_DEPENDS HCTGen) 
 
+add_subdirectory(utils/hct)
+
 if(EXISTS "${LLVM_MAIN_SRC_DIR}/external")
   add_subdirectory(external) # SPIRV change
 endif()
diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h
index fcc9bb11b1..41c72d1a51 100644
--- a/include/dxc/HlslIntrinsicOp.h
+++ b/include/dxc/HlslIntrinsicOp.h
@@ -5,378 +5,366 @@
 #pragma once
 namespace hlsl {
 enum class IntrinsicOp {
-  IOP_AcceptHitAndEndSearch,
-  IOP_AddUint64,
-  IOP_AllMemoryBarrier,
-  IOP_AllMemoryBarrierWithGroupSync,
-  IOP_AllocateRayQuery,
-  IOP_Barrier,
-  IOP_CallShader,
-  IOP_CheckAccessFullyMapped,
-  IOP_CreateResourceFromHeap,
-  IOP_D3DCOLORtoUBYTE4,
-  IOP_DeviceMemoryBarrier,
-  IOP_DeviceMemoryBarrierWithGroupSync,
-  IOP_DispatchMesh,
-  IOP_DispatchRaysDimensions,
-  IOP_DispatchRaysIndex,
-  IOP_EvaluateAttributeAtSample,
-  IOP_EvaluateAttributeCentroid,
-  IOP_EvaluateAttributeSnapped,
-  IOP_GeometryIndex,
-  IOP_GetAttributeAtVertex,
-  IOP_GetRemainingRecursionLevels,
-  IOP_GetRenderTargetSampleCount,
-  IOP_GetRenderTargetSamplePosition,
-  IOP_GroupMemoryBarrier,
-  IOP_GroupMemoryBarrierWithGroupSync,
-  IOP_HitKind,
-  IOP_IgnoreHit,
-  IOP_InstanceID,
-  IOP_InstanceIndex,
-  IOP_InterlockedAdd,
-  IOP_InterlockedAnd,
-  IOP_InterlockedCompareExchange,
-  IOP_InterlockedCompareExchangeFloatBitwise,
-  IOP_InterlockedCompareStore,
-  IOP_InterlockedCompareStoreFloatBitwise,
-  IOP_InterlockedExchange,
-  IOP_InterlockedMax,
-  IOP_InterlockedMin,
-  IOP_InterlockedOr,
-  IOP_InterlockedXor,
-  IOP_IsHelperLane,
-  IOP_NonUniformResourceIndex,
-  IOP_ObjectRayDirection,
-  IOP_ObjectRayOrigin,
-  IOP_ObjectToWorld,
-  IOP_ObjectToWorld3x4,
-  IOP_ObjectToWorld4x3,
-  IOP_PrimitiveIndex,
-  IOP_Process2DQuadTessFactorsAvg,
-  IOP_Process2DQuadTessFactorsMax,
-  IOP_Process2DQuadTessFactorsMin,
-  IOP_ProcessIsolineTessFactors,
-  IOP_ProcessQuadTessFactorsAvg,
-  IOP_ProcessQuadTessFactorsMax,
-  IOP_ProcessQuadTessFactorsMin,
-  IOP_ProcessTriTessFactorsAvg,
-  IOP_ProcessTriTessFactorsMax,
-  IOP_ProcessTriTessFactorsMin,
-  IOP_QuadAll,
-  IOP_QuadAny,
-  IOP_QuadReadAcrossDiagonal,
-  IOP_QuadReadAcrossX,
-  IOP_QuadReadAcrossY,
-  IOP_QuadReadLaneAt,
-  IOP_RayFlags,
-  IOP_RayTCurrent,
-  IOP_RayTMin,
-  IOP_ReportHit,
-  IOP_SetMeshOutputCounts,
-  IOP_TraceRay,
-  IOP_WaveActiveAllEqual,
-  IOP_WaveActiveAllTrue,
-  IOP_WaveActiveAnyTrue,
-  IOP_WaveActiveBallot,
-  IOP_WaveActiveBitAnd,
-  IOP_WaveActiveBitOr,
-  IOP_WaveActiveBitXor,
-  IOP_WaveActiveCountBits,
-  IOP_WaveActiveMax,
-  IOP_WaveActiveMin,
-  IOP_WaveActiveProduct,
-  IOP_WaveActiveSum,
-  IOP_WaveGetLaneCount,
-  IOP_WaveGetLaneIndex,
-  IOP_WaveIsFirstLane,
-  IOP_WaveMatch,
-  IOP_WaveMultiPrefixBitAnd,
-  IOP_WaveMultiPrefixBitOr,
-  IOP_WaveMultiPrefixBitXor,
-  IOP_WaveMultiPrefixCountBits,
-  IOP_WaveMultiPrefixProduct,
-  IOP_WaveMultiPrefixSum,
-  IOP_WavePrefixCountBits,
-  IOP_WavePrefixProduct,
-  IOP_WavePrefixSum,
-  IOP_WaveReadLaneAt,
-  IOP_WaveReadLaneFirst,
-  IOP_WorldRayDirection,
-  IOP_WorldRayOrigin,
-  IOP_WorldToObject,
-  IOP_WorldToObject3x4,
-  IOP_WorldToObject4x3,
-  IOP_abort,
-  IOP_abs,
-  IOP_acos,
-  IOP_all,
-  IOP_and,
-  IOP_any,
-  IOP_asdouble,
-  IOP_asfloat,
-  IOP_asfloat16,
-  IOP_asin,
-  IOP_asint,
-  IOP_asint16,
-  IOP_asuint,
-  IOP_asuint16,
-  IOP_atan,
-  IOP_atan2,
-  IOP_ceil,
-  IOP_clamp,
-  IOP_clip,
-  IOP_cos,
-  IOP_cosh,
-  IOP_countbits,
-  IOP_cross,
-  IOP_ddx,
-  IOP_ddx_coarse,
-  IOP_ddx_fine,
-  IOP_ddy,
-  IOP_ddy_coarse,
-  IOP_ddy_fine,
-  IOP_degrees,
-  IOP_determinant,
-  IOP_distance,
-  IOP_dot,
-  IOP_dot2add,
-  IOP_dot4add_i8packed,
-  IOP_dot4add_u8packed,
-  IOP_dst,
-  IOP_exp,
-  IOP_exp2,
-  IOP_f16tof32,
-  IOP_f32tof16,
-  IOP_faceforward,
-  IOP_firstbithigh,
-  IOP_firstbitlow,
-  IOP_floor,
-  IOP_fma,
-  IOP_fmod,
-  IOP_frac,
-  IOP_frexp,
-  IOP_fwidth,
-  IOP_isfinite,
-  IOP_isinf,
-  IOP_isnan,
-  IOP_ldexp,
-  IOP_length,
-  IOP_lerp,
-  IOP_lit,
-  IOP_log,
-  IOP_log10,
-  IOP_log2,
-  IOP_mad,
-  IOP_max,
-  IOP_min,
-  IOP_modf,
-  IOP_msad4,
-  IOP_mul,
-  IOP_normalize,
-  IOP_or,
-  IOP_pack_clamp_s8,
-  IOP_pack_clamp_u8,
-  IOP_pack_s8,
-  IOP_pack_u8,
-  IOP_pow,
-  IOP_printf,
-  IOP_radians,
-  IOP_rcp,
-  IOP_reflect,
-  IOP_refract,
-  IOP_reversebits,
-  IOP_round,
-  IOP_rsqrt,
-  IOP_saturate,
-  IOP_select,
-  IOP_sign,
-  IOP_sin,
-  IOP_sincos,
-  IOP_sinh,
-  IOP_smoothstep,
-  IOP_source_mark,
-  IOP_sqrt,
-  IOP_step,
-  IOP_tan,
-  IOP_tanh,
-  IOP_tex1D,
-  IOP_tex1Dbias,
-  IOP_tex1Dgrad,
-  IOP_tex1Dlod,
-  IOP_tex1Dproj,
-  IOP_tex2D,
-  IOP_tex2Dbias,
-  IOP_tex2Dgrad,
-  IOP_tex2Dlod,
-  IOP_tex2Dproj,
-  IOP_tex3D,
-  IOP_tex3Dbias,
-  IOP_tex3Dgrad,
-  IOP_tex3Dlod,
-  IOP_tex3Dproj,
-  IOP_texCUBE,
-  IOP_texCUBEbias,
-  IOP_texCUBEgrad,
-  IOP_texCUBElod,
-  IOP_texCUBEproj,
-  IOP_transpose,
-  IOP_trunc,
-  IOP_unpack_s8s16,
-  IOP_unpack_s8s32,
-  IOP_unpack_u8u16,
-  IOP_unpack_u8u32,
-#ifdef ENABLE_SPIRV_CODEGEN
-  IOP_VkRawBufferLoad,
-#endif // ENABLE_SPIRV_CODEGEN
-#ifdef ENABLE_SPIRV_CODEGEN
-  IOP_VkRawBufferStore,
-#endif // ENABLE_SPIRV_CODEGEN
-#ifdef ENABLE_SPIRV_CODEGEN
-  IOP_VkReadClock,
-#endif // ENABLE_SPIRV_CODEGEN
-#ifdef ENABLE_SPIRV_CODEGEN
-  IOP_Vkext_execution_mode,
-#endif // ENABLE_SPIRV_CODEGEN
-#ifdef ENABLE_SPIRV_CODEGEN
-  IOP_Vkext_execution_mode_id,
-#endif // ENABLE_SPIRV_CODEGEN
-  MOP_Append,
-  MOP_RestartStrip,
-  MOP_CalculateLevelOfDetail,
-  MOP_CalculateLevelOfDetailUnclamped,
-  MOP_GetDimensions,
-  MOP_Load,
-  MOP_Sample,
-  MOP_SampleBias,
-  MOP_SampleCmp,
-  MOP_SampleCmpBias,
-  MOP_SampleCmpGrad,
-  MOP_SampleCmpLevel,
-  MOP_SampleCmpLevelZero,
-  MOP_SampleGrad,
-  MOP_SampleLevel,
-  MOP_Gather,
-  MOP_GatherAlpha,
-  MOP_GatherBlue,
-  MOP_GatherCmp,
-  MOP_GatherCmpAlpha,
-  MOP_GatherCmpBlue,
-  MOP_GatherCmpGreen,
-  MOP_GatherCmpRed,
-  MOP_GatherGreen,
-  MOP_GatherRaw,
-  MOP_GatherRed,
-  MOP_GetSamplePosition,
-  MOP_Load2,
-  MOP_Load3,
-  MOP_Load4,
-  MOP_InterlockedAdd,
-  MOP_InterlockedAdd64,
-  MOP_InterlockedAnd,
-  MOP_InterlockedAnd64,
-  MOP_InterlockedCompareExchange,
-  MOP_InterlockedCompareExchange64,
-  MOP_InterlockedCompareExchangeFloatBitwise,
-  MOP_InterlockedCompareStore,
-  MOP_InterlockedCompareStore64,
-  MOP_InterlockedCompareStoreFloatBitwise,
-  MOP_InterlockedExchange,
-  MOP_InterlockedExchange64,
-  MOP_InterlockedExchangeFloat,
-  MOP_InterlockedMax,
-  MOP_InterlockedMax64,
-  MOP_InterlockedMin,
-  MOP_InterlockedMin64,
-  MOP_InterlockedOr,
-  MOP_InterlockedOr64,
-  MOP_InterlockedXor,
-  MOP_InterlockedXor64,
-  MOP_Store,
-  MOP_Store2,
-  MOP_Store3,
-  MOP_Store4,
-  MOP_DecrementCounter,
-  MOP_IncrementCounter,
-  MOP_Consume,
-  MOP_WriteSamplerFeedback,
-  MOP_WriteSamplerFeedbackBias,
-  MOP_WriteSamplerFeedbackGrad,
-  MOP_WriteSamplerFeedbackLevel,
-  MOP_Abort,
-  MOP_CandidateGeometryIndex,
-  MOP_CandidateInstanceContributionToHitGroupIndex,
-  MOP_CandidateInstanceID,
-  MOP_CandidateInstanceIndex,
-  MOP_CandidateObjectRayDirection,
-  MOP_CandidateObjectRayOrigin,
-  MOP_CandidateObjectToWorld3x4,
-  MOP_CandidateObjectToWorld4x3,
-  MOP_CandidatePrimitiveIndex,
-  MOP_CandidateProceduralPrimitiveNonOpaque,
-  MOP_CandidateTriangleBarycentrics,
-  MOP_CandidateTriangleFrontFace,
-  MOP_CandidateTriangleRayT,
-  MOP_CandidateType,
-  MOP_CandidateWorldToObject3x4,
-  MOP_CandidateWorldToObject4x3,
-  MOP_CommitNonOpaqueTriangleHit,
-  MOP_CommitProceduralPrimitiveHit,
-  MOP_CommittedGeometryIndex,
-  MOP_CommittedInstanceContributionToHitGroupIndex,
-  MOP_CommittedInstanceID,
-  MOP_CommittedInstanceIndex,
-  MOP_CommittedObjectRayDirection,
-  MOP_CommittedObjectRayOrigin,
-  MOP_CommittedObjectToWorld3x4,
-  MOP_CommittedObjectToWorld4x3,
-  MOP_CommittedPrimitiveIndex,
-  MOP_CommittedRayT,
-  MOP_CommittedStatus,
-  MOP_CommittedTriangleBarycentrics,
-  MOP_CommittedTriangleFrontFace,
-  MOP_CommittedWorldToObject3x4,
-  MOP_CommittedWorldToObject4x3,
-  MOP_Proceed,
-  MOP_RayFlags,
-  MOP_RayTMin,
-  MOP_TraceRayInline,
-  MOP_WorldRayDirection,
-  MOP_WorldRayOrigin,
-  MOP_Count,
-  MOP_FinishedCrossGroupSharing,
-  MOP_GetGroupNodeOutputRecords,
-  MOP_GetThreadNodeOutputRecords,
-  MOP_IsValid,
-  MOP_GroupIncrementOutputCount,
-  MOP_ThreadIncrementOutputCount,
-  MOP_OutputComplete,
-#ifdef ENABLE_SPIRV_CODEGEN
-  MOP_SubpassLoad,
-#endif // ENABLE_SPIRV_CODEGEN
+  IOP_AcceptHitAndEndSearch = 0,
+  IOP_AddUint64 = 1,
+  IOP_AllMemoryBarrier = 2,
+  IOP_AllMemoryBarrierWithGroupSync = 3,
+  IOP_AllocateRayQuery = 4,
+  IOP_Barrier = 5,
+  IOP_CallShader = 6,
+  IOP_CheckAccessFullyMapped = 7,
+  IOP_CreateResourceFromHeap = 8,
+  IOP_D3DCOLORtoUBYTE4 = 9,
+  IOP_DeviceMemoryBarrier = 10,
+  IOP_DeviceMemoryBarrierWithGroupSync = 11,
+  IOP_DispatchMesh = 12,
+  IOP_DispatchRaysDimensions = 13,
+  IOP_DispatchRaysIndex = 14,
+  IOP_EvaluateAttributeAtSample = 15,
+  IOP_EvaluateAttributeCentroid = 16,
+  IOP_EvaluateAttributeSnapped = 17,
+  IOP_GeometryIndex = 18,
+  IOP_GetAttributeAtVertex = 19,
+  IOP_GetRemainingRecursionLevels = 20,
+  IOP_GetRenderTargetSampleCount = 21,
+  IOP_GetRenderTargetSamplePosition = 22,
+  IOP_GroupMemoryBarrier = 23,
+  IOP_GroupMemoryBarrierWithGroupSync = 24,
+  IOP_HitKind = 25,
+  IOP_IgnoreHit = 26,
+  IOP_InstanceID = 27,
+  IOP_InstanceIndex = 28,
+  IOP_InterlockedAdd = 29,
+  IOP_InterlockedAnd = 30,
+  IOP_InterlockedCompareExchange = 31,
+  IOP_InterlockedCompareExchangeFloatBitwise = 32,
+  IOP_InterlockedCompareStore = 33,
+  IOP_InterlockedCompareStoreFloatBitwise = 34,
+  IOP_InterlockedExchange = 35,
+  IOP_InterlockedMax = 36,
+  IOP_InterlockedMin = 37,
+  IOP_InterlockedOr = 38,
+  IOP_InterlockedXor = 39,
+  IOP_IsHelperLane = 40,
+  IOP_NonUniformResourceIndex = 41,
+  IOP_ObjectRayDirection = 42,
+  IOP_ObjectRayOrigin = 43,
+  IOP_ObjectToWorld = 44,
+  IOP_ObjectToWorld3x4 = 45,
+  IOP_ObjectToWorld4x3 = 46,
+  IOP_PrimitiveIndex = 47,
+  IOP_Process2DQuadTessFactorsAvg = 48,
+  IOP_Process2DQuadTessFactorsMax = 49,
+  IOP_Process2DQuadTessFactorsMin = 50,
+  IOP_ProcessIsolineTessFactors = 51,
+  IOP_ProcessQuadTessFactorsAvg = 52,
+  IOP_ProcessQuadTessFactorsMax = 53,
+  IOP_ProcessQuadTessFactorsMin = 54,
+  IOP_ProcessTriTessFactorsAvg = 55,
+  IOP_ProcessTriTessFactorsMax = 56,
+  IOP_ProcessTriTessFactorsMin = 57,
+  IOP_QuadAll = 58,
+  IOP_QuadAny = 59,
+  IOP_QuadReadAcrossDiagonal = 60,
+  IOP_QuadReadAcrossX = 61,
+  IOP_QuadReadAcrossY = 62,
+  IOP_QuadReadLaneAt = 63,
+  IOP_RayFlags = 64,
+  IOP_RayTCurrent = 65,
+  IOP_RayTMin = 66,
+  IOP_ReportHit = 67,
+  IOP_SetMeshOutputCounts = 68,
+  IOP_TraceRay = 69,
+  IOP_WaveActiveAllEqual = 70,
+  IOP_WaveActiveAllTrue = 71,
+  IOP_WaveActiveAnyTrue = 72,
+  IOP_WaveActiveBallot = 73,
+  IOP_WaveActiveBitAnd = 74,
+  IOP_WaveActiveBitOr = 75,
+  IOP_WaveActiveBitXor = 76,
+  IOP_WaveActiveCountBits = 77,
+  IOP_WaveActiveMax = 78,
+  IOP_WaveActiveMin = 79,
+  IOP_WaveActiveProduct = 80,
+  IOP_WaveActiveSum = 81,
+  IOP_WaveGetLaneCount = 82,
+  IOP_WaveGetLaneIndex = 83,
+  IOP_WaveIsFirstLane = 84,
+  IOP_WaveMatch = 85,
+  IOP_WaveMultiPrefixBitAnd = 86,
+  IOP_WaveMultiPrefixBitOr = 87,
+  IOP_WaveMultiPrefixBitXor = 88,
+  IOP_WaveMultiPrefixCountBits = 89,
+  IOP_WaveMultiPrefixProduct = 90,
+  IOP_WaveMultiPrefixSum = 91,
+  IOP_WavePrefixCountBits = 92,
+  IOP_WavePrefixProduct = 93,
+  IOP_WavePrefixSum = 94,
+  IOP_WaveReadLaneAt = 95,
+  IOP_WaveReadLaneFirst = 96,
+  IOP_WorldRayDirection = 97,
+  IOP_WorldRayOrigin = 98,
+  IOP_WorldToObject = 99,
+  IOP_WorldToObject3x4 = 100,
+  IOP_WorldToObject4x3 = 101,
+  IOP_abort = 102,
+  IOP_abs = 103,
+  IOP_acos = 104,
+  IOP_all = 105,
+  IOP_and = 106,
+  IOP_any = 107,
+  IOP_asdouble = 108,
+  IOP_asfloat = 109,
+  IOP_asfloat16 = 110,
+  IOP_asin = 111,
+  IOP_asint = 112,
+  IOP_asint16 = 113,
+  IOP_asuint = 114,
+  IOP_asuint16 = 115,
+  IOP_atan = 116,
+  IOP_atan2 = 117,
+  IOP_ceil = 118,
+  IOP_clamp = 119,
+  IOP_clip = 120,
+  IOP_cos = 121,
+  IOP_cosh = 122,
+  IOP_countbits = 123,
+  IOP_cross = 124,
+  IOP_ddx = 125,
+  IOP_ddx_coarse = 126,
+  IOP_ddx_fine = 127,
+  IOP_ddy = 128,
+  IOP_ddy_coarse = 129,
+  IOP_ddy_fine = 130,
+  IOP_degrees = 131,
+  IOP_determinant = 132,
+  IOP_distance = 133,
+  IOP_dot = 134,
+  IOP_dot2add = 135,
+  IOP_dot4add_i8packed = 136,
+  IOP_dot4add_u8packed = 137,
+  IOP_dst = 138,
+  IOP_exp = 139,
+  IOP_exp2 = 140,
+  IOP_f16tof32 = 141,
+  IOP_f32tof16 = 142,
+  IOP_faceforward = 143,
+  IOP_firstbithigh = 144,
+  IOP_firstbitlow = 145,
+  IOP_floor = 146,
+  IOP_fma = 147,
+  IOP_fmod = 148,
+  IOP_frac = 149,
+  IOP_frexp = 150,
+  IOP_fwidth = 151,
+  IOP_isfinite = 152,
+  IOP_isinf = 153,
+  IOP_isnan = 154,
+  IOP_ldexp = 155,
+  IOP_length = 156,
+  IOP_lerp = 157,
+  IOP_lit = 158,
+  IOP_log = 159,
+  IOP_log10 = 160,
+  IOP_log2 = 161,
+  IOP_mad = 162,
+  IOP_max = 163,
+  IOP_min = 164,
+  IOP_modf = 165,
+  IOP_msad4 = 166,
+  IOP_mul = 167,
+  IOP_normalize = 168,
+  IOP_or = 169,
+  IOP_pack_clamp_s8 = 170,
+  IOP_pack_clamp_u8 = 171,
+  IOP_pack_s8 = 172,
+  IOP_pack_u8 = 173,
+  IOP_pow = 174,
+  IOP_printf = 175,
+  IOP_radians = 176,
+  IOP_rcp = 177,
+  IOP_reflect = 178,
+  IOP_refract = 179,
+  IOP_reversebits = 180,
+  IOP_round = 181,
+  IOP_rsqrt = 182,
+  IOP_saturate = 183,
+  IOP_select = 184,
+  IOP_sign = 185,
+  IOP_sin = 186,
+  IOP_sincos = 187,
+  IOP_sinh = 188,
+  IOP_smoothstep = 189,
+  IOP_source_mark = 190,
+  IOP_sqrt = 191,
+  IOP_step = 192,
+  IOP_tan = 193,
+  IOP_tanh = 194,
+  IOP_tex1D = 195,
+  IOP_tex1Dbias = 196,
+  IOP_tex1Dgrad = 197,
+  IOP_tex1Dlod = 198,
+  IOP_tex1Dproj = 199,
+  IOP_tex2D = 200,
+  IOP_tex2Dbias = 201,
+  IOP_tex2Dgrad = 202,
+  IOP_tex2Dlod = 203,
+  IOP_tex2Dproj = 204,
+  IOP_tex3D = 205,
+  IOP_tex3Dbias = 206,
+  IOP_tex3Dgrad = 207,
+  IOP_tex3Dlod = 208,
+  IOP_tex3Dproj = 209,
+  IOP_texCUBE = 210,
+  IOP_texCUBEbias = 211,
+  IOP_texCUBEgrad = 212,
+  IOP_texCUBElod = 213,
+  IOP_texCUBEproj = 214,
+  IOP_transpose = 215,
+  IOP_trunc = 216,
+  IOP_unpack_s8s16 = 217,
+  IOP_unpack_s8s32 = 218,
+  IOP_unpack_u8u16 = 219,
+  IOP_unpack_u8u32 = 220,
+  IOP_VkRawBufferLoad = 221,
+  IOP_VkRawBufferStore = 222,
+  IOP_VkReadClock = 223,
+  IOP_Vkext_execution_mode = 224,
+  IOP_Vkext_execution_mode_id = 225,
+  MOP_Append = 226,
+  MOP_RestartStrip = 227,
+  MOP_CalculateLevelOfDetail = 228,
+  MOP_CalculateLevelOfDetailUnclamped = 229,
+  MOP_GetDimensions = 230,
+  MOP_Load = 231,
+  MOP_Sample = 232,
+  MOP_SampleBias = 233,
+  MOP_SampleCmp = 234,
+  MOP_SampleCmpBias = 235,
+  MOP_SampleCmpGrad = 236,
+  MOP_SampleCmpLevel = 237,
+  MOP_SampleCmpLevelZero = 238,
+  MOP_SampleGrad = 239,
+  MOP_SampleLevel = 240,
+  MOP_Gather = 241,
+  MOP_GatherAlpha = 242,
+  MOP_GatherBlue = 243,
+  MOP_GatherCmp = 244,
+  MOP_GatherCmpAlpha = 245,
+  MOP_GatherCmpBlue = 246,
+  MOP_GatherCmpGreen = 247,
+  MOP_GatherCmpRed = 248,
+  MOP_GatherGreen = 249,
+  MOP_GatherRaw = 250,
+  MOP_GatherRed = 251,
+  MOP_GetSamplePosition = 252,
+  MOP_Load2 = 253,
+  MOP_Load3 = 254,
+  MOP_Load4 = 255,
+  MOP_InterlockedAdd = 256,
+  MOP_InterlockedAdd64 = 257,
+  MOP_InterlockedAnd = 258,
+  MOP_InterlockedAnd64 = 259,
+  MOP_InterlockedCompareExchange = 260,
+  MOP_InterlockedCompareExchange64 = 261,
+  MOP_InterlockedCompareExchangeFloatBitwise = 262,
+  MOP_InterlockedCompareStore = 263,
+  MOP_InterlockedCompareStore64 = 264,
+  MOP_InterlockedCompareStoreFloatBitwise = 265,
+  MOP_InterlockedExchange = 266,
+  MOP_InterlockedExchange64 = 267,
+  MOP_InterlockedExchangeFloat = 268,
+  MOP_InterlockedMax = 269,
+  MOP_InterlockedMax64 = 270,
+  MOP_InterlockedMin = 271,
+  MOP_InterlockedMin64 = 272,
+  MOP_InterlockedOr = 273,
+  MOP_InterlockedOr64 = 274,
+  MOP_InterlockedXor = 275,
+  MOP_InterlockedXor64 = 276,
+  MOP_Store = 277,
+  MOP_Store2 = 278,
+  MOP_Store3 = 279,
+  MOP_Store4 = 280,
+  MOP_DecrementCounter = 281,
+  MOP_IncrementCounter = 282,
+  MOP_Consume = 283,
+  MOP_WriteSamplerFeedback = 284,
+  MOP_WriteSamplerFeedbackBias = 285,
+  MOP_WriteSamplerFeedbackGrad = 286,
+  MOP_WriteSamplerFeedbackLevel = 287,
+  MOP_Abort = 288,
+  MOP_CandidateGeometryIndex = 289,
+  MOP_CandidateInstanceContributionToHitGroupIndex = 290,
+  MOP_CandidateInstanceID = 291,
+  MOP_CandidateInstanceIndex = 292,
+  MOP_CandidateObjectRayDirection = 293,
+  MOP_CandidateObjectRayOrigin = 294,
+  MOP_CandidateObjectToWorld3x4 = 295,
+  MOP_CandidateObjectToWorld4x3 = 296,
+  MOP_CandidatePrimitiveIndex = 297,
+  MOP_CandidateProceduralPrimitiveNonOpaque = 298,
+  MOP_CandidateTriangleBarycentrics = 299,
+  MOP_CandidateTriangleFrontFace = 300,
+  MOP_CandidateTriangleRayT = 301,
+  MOP_CandidateType = 302,
+  MOP_CandidateWorldToObject3x4 = 303,
+  MOP_CandidateWorldToObject4x3 = 304,
+  MOP_CommitNonOpaqueTriangleHit = 305,
+  MOP_CommitProceduralPrimitiveHit = 306,
+  MOP_CommittedGeometryIndex = 307,
+  MOP_CommittedInstanceContributionToHitGroupIndex = 308,
+  MOP_CommittedInstanceID = 309,
+  MOP_CommittedInstanceIndex = 310,
+  MOP_CommittedObjectRayDirection = 311,
+  MOP_CommittedObjectRayOrigin = 312,
+  MOP_CommittedObjectToWorld3x4 = 313,
+  MOP_CommittedObjectToWorld4x3 = 314,
+  MOP_CommittedPrimitiveIndex = 315,
+  MOP_CommittedRayT = 316,
+  MOP_CommittedStatus = 317,
+  MOP_CommittedTriangleBarycentrics = 318,
+  MOP_CommittedTriangleFrontFace = 319,
+  MOP_CommittedWorldToObject3x4 = 320,
+  MOP_CommittedWorldToObject4x3 = 321,
+  MOP_Proceed = 322,
+  MOP_RayFlags = 323,
+  MOP_RayTMin = 324,
+  MOP_TraceRayInline = 325,
+  MOP_WorldRayDirection = 326,
+  MOP_WorldRayOrigin = 327,
+  MOP_Count = 328,
+  MOP_FinishedCrossGroupSharing = 329,
+  MOP_GetGroupNodeOutputRecords = 330,
+  MOP_GetThreadNodeOutputRecords = 331,
+  MOP_IsValid = 332,
+  MOP_GroupIncrementOutputCount = 333,
+  MOP_ThreadIncrementOutputCount = 334,
+  MOP_OutputComplete = 335,
+  MOP_SubpassLoad = 336,
   // unsigned
-  IOP_InterlockedUMax,
-  IOP_InterlockedUMin,
-  IOP_WaveActiveUMax,
-  IOP_WaveActiveUMin,
-  IOP_WaveActiveUProduct,
-  IOP_WaveActiveUSum,
-  IOP_WaveMultiPrefixUProduct,
-  IOP_WaveMultiPrefixUSum,
-  IOP_WavePrefixUProduct,
-  IOP_WavePrefixUSum,
-  IOP_uabs,
-  IOP_uclamp,
-  IOP_udot,
-  IOP_ufirstbithigh,
-  IOP_umad,
-  IOP_umax,
-  IOP_umin,
-  IOP_umul,
-  IOP_usign,
-  MOP_InterlockedUMax,
-  MOP_InterlockedUMin,
-  Num_Intrinsics,
+  IOP_InterlockedUMax = 337,
+  IOP_InterlockedUMin = 338,
+  IOP_WaveActiveUMax = 339,
+  IOP_WaveActiveUMin = 340,
+  IOP_WaveActiveUProduct = 341,
+  IOP_WaveActiveUSum = 342,
+  IOP_WaveMultiPrefixUProduct = 343,
+  IOP_WaveMultiPrefixUSum = 344,
+  IOP_WavePrefixUProduct = 345,
+  IOP_WavePrefixUSum = 346,
+  IOP_uabs = 347,
+  IOP_uclamp = 348,
+  IOP_udot = 349,
+  IOP_ufirstbithigh = 350,
+  IOP_umad = 351,
+  IOP_umax = 352,
+  IOP_umin = 353,
+  IOP_umul = 354,
+  IOP_usign = 355,
+  MOP_InterlockedUMax = 356,
+  MOP_InterlockedUMin = 357,
+  Num_Intrinsics = 358,
 };
 inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) {
   switch (opcode) {
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index 9c3ad76b92..80d3af4147 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -6156,7 +6156,6 @@ Value *EmptyLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
 }
 
 // SPIRV change starts
-#ifdef ENABLE_SPIRV_CODEGEN
 Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP,
                                   DXIL::OpCode opcode,
                                   HLOperationLowerHelper &helper,
@@ -6166,7 +6165,6 @@ Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP,
   dxilutil::EmitErrorOnInstruction(CI, "Unsupported Vulkan intrinsic.");
   return nullptr;
 }
-#endif // ENABLE_SPIRV_CODEGEN
 // SPIRV change ends
 
 Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
@@ -6511,7 +6509,6 @@ IntrinsicLower gLowerTable[] = {
     {IntrinsicOp::IOP_unpack_s8s32, TranslateUnpack, DXIL::OpCode::Unpack4x8},
     {IntrinsicOp::IOP_unpack_u8u16, TranslateUnpack, DXIL::OpCode::Unpack4x8},
     {IntrinsicOp::IOP_unpack_u8u32, TranslateUnpack, DXIL::OpCode::Unpack4x8},
-#ifdef ENABLE_SPIRV_CODEGEN
     {IntrinsicOp::IOP_VkRawBufferLoad, UnsupportedVulkanIntrinsic,
      DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_VkRawBufferStore, UnsupportedVulkanIntrinsic,
@@ -6522,7 +6519,6 @@ IntrinsicLower gLowerTable[] = {
      DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_Vkext_execution_mode_id, UnsupportedVulkanIntrinsic,
      DXIL::OpCode::NumOpCodes},
-#endif // ENABLE_SPIRV_CODEGEN
     {IntrinsicOp::MOP_Append, StreamOutputLower, DXIL::OpCode::EmitStream},
     {IntrinsicOp::MOP_RestartStrip, StreamOutputLower, DXIL::OpCode::CutStream},
     {IntrinsicOp::MOP_CalculateLevelOfDetail, TranslateCalculateLOD,
@@ -6750,11 +6746,9 @@ IntrinsicLower gLowerTable[] = {
     {IntrinsicOp::MOP_OutputComplete, TranslateNodeOutputComplete,
      DXIL::OpCode::OutputComplete},
 
-// SPIRV change starts
-#ifdef ENABLE_SPIRV_CODEGEN
+    // SPIRV change starts
     {IntrinsicOp::MOP_SubpassLoad, UnsupportedVulkanIntrinsic,
      DXIL::OpCode::NumOpCodes},
-#endif // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
     // Manually added part.
diff --git a/utils/hct/CMakeLists.txt b/utils/hct/CMakeLists.txt
new file mode 100644
index 0000000000..41e6b494e6
--- /dev/null
+++ b/utils/hct/CMakeLists.txt
@@ -0,0 +1,3 @@
+# generate hlsl_intrinsic_opcodes.json to preserve high level intrinsic opcodes
+# This uses CODE_TAG because the file exists in the source tree.
+add_hlsl_hctgen(HlslIntrinsicOpcodes OUTPUT hlsl_intrinsic_opcodes.json CODE_TAG)
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 1c3fd0f717..6f4611db32 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -8254,6 +8254,8 @@ def __init__(
         self.vulkanSpecific = ns.startswith(
             "Vk"
         )  # Vulkan specific intrinsic - SPIRV change
+        self.opcode = None  # high-level opcode assigned later
+        self.unsigned_opcode = None  # unsigned high-level opcode if appicable
 
 
 class db_hlsl_namespace(object):
@@ -8295,11 +8297,10 @@ def __init__(
         self.template_id_idx = template_id_idx  # Template ID numeric value
         self.component_id_idx = component_id_idx  # Component ID numeric value
 
-
 class db_hlsl(object):
     "A database of HLSL language data"
 
-    def __init__(self, intrinsic_defs):
+    def __init__(self, intrinsic_defs, opcode_data):
         self.base_types = {
             "bool": "LICOMPTYPE_BOOL",
             "int": "LICOMPTYPE_INT",
@@ -8372,6 +8373,13 @@ def __init__(self, intrinsic_defs):
         self.populate_attributes()
         self.opcode_namespace = "hlsl::IntrinsicOp"
 
+        # Populate opcode data for HLSL intrinsics.
+        self.opcode_data = opcode_data
+        # If opcode data is empty, create the default structure.
+        if not self.opcode_data:
+            self.opcode_data["IntrinsicOpCodes"] = {"Num_Intrinsics": 0}
+        self.assign_opcodes()
+
     def create_namespaces(self):
         last_ns = None
         self.namespaces = {}
@@ -8898,6 +8906,29 @@ def add_attr_arg(title_name, scope, args, doc):
         )
         self.attributes = attributes
 
+    # Iterate through all intrinsics, assigning opcodes to each one.
+    # This uses the opcode_data to preserve already-assigned opcodes.
+    def assign_opcodes(self):
+        "Assign opcodes to the intrinsics."
+        IntrinsicOpDict = self.opcode_data["IntrinsicOpCodes"]
+        Num_Intrinsics = self.opcode_data["IntrinsicOpCodes"]["Num_Intrinsics"]
+
+        def add_intrinsic(name):
+            nonlocal Num_Intrinsics
+            opcode = IntrinsicOpDict.setdefault(name, Num_Intrinsics)
+            if opcode == Num_Intrinsics:
+                Num_Intrinsics += 1
+            return opcode
+
+        sorted_intrinsics = sorted(self.intrinsics, key=lambda x: x.key)
+        for i in sorted_intrinsics:
+            i.opcode = add_intrinsic(i.enum_name)
+        for i in sorted_intrinsics:
+            if i.unsigned_op == "":
+                continue
+            i.unsigned_opcode = add_intrinsic(i.unsigned_op)
+        self.opcode_data["IntrinsicOpCodes"]["Num_Intrinsics"] = Num_Intrinsics
+
 
 if __name__ == "__main__":
     db = db_dxil()
diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py
index 353f8f9634..2a0359d274 100644
--- a/utils/hct/hctdb_instrhelp.py
+++ b/utils/hct/hctdb_instrhelp.py
@@ -18,15 +18,36 @@ def get_db_dxil():
     return g_db_dxil
 
 
-g_db_hlsl = None
+# opcode data contains fixed opcode assignments for HLSL intrinsics.
+g_hlsl_opcode_data = None
+
+
+def get_hlsl_opcode_data():
+    global g_hlsl_opcode_data
+    if g_hlsl_opcode_data is None:
+        # Load the intrinsic opcodes from the JSON file.
+        json_filepath = os.path.join(
+            os.path.dirname(__file__), "hlsl_intrinsic_opcodes.json"
+        )
+        try:
+            with open(json_filepath, "r") as file:
+                g_hlsl_opcode_data = json.load(file)
+        except FileNotFoundError:
+            print(f"File not found: {json_filepath}")
+        except json.JSONDecodeError as e:
+            print(f"Error decoding JSON from {json_filepath}: {e}")
+        if not g_hlsl_opcode_data:
+            g_hlsl_opcode_data = {}
+    return g_hlsl_opcode_data
 
+g_db_hlsl = None
 
 def get_db_hlsl():
     global g_db_hlsl
     if g_db_hlsl is None:
         thisdir = os.path.dirname(os.path.realpath(__file__))
         with open(os.path.join(thisdir, "gen_intrin_main.txt"), "r") as f:
-            g_db_hlsl = db_hlsl(f)
+            g_db_hlsl = db_hlsl(f, get_hlsl_opcode_data())
     return g_db_hlsl
 
 
@@ -1055,22 +1076,22 @@ def wrap_with_ifdef_if_vulkan_specific(intrinsic, text):
 def enum_hlsl_intrinsics():
     db = get_db_hlsl()
     result = ""
-    enumed = []
+    enumed = set()
     for i in sorted(db.intrinsics, key=lambda x: x.key):
         if i.enum_name not in enumed:
-            enumerant = "  %s,\n" % (i.enum_name)
-            result += wrap_with_ifdef_if_vulkan_specific(i, enumerant)  # SPIRV Change
-            enumed.append(i.enum_name)
+            result += "  %s = %d,\n" % (i.enum_name, i.opcode)
+            enumed.add(i.enum_name)
     # unsigned
     result += "  // unsigned\n"
 
     for i in sorted(db.intrinsics, key=lambda x: x.key):
         if i.unsigned_op != "":
             if i.unsigned_op not in enumed:
-                result += "  %s,\n" % (i.unsigned_op)
-                enumed.append(i.unsigned_op)
+                result += "  %s = %d,\n" % (i.unsigned_op, i.unsigned_opcode)
+                enumed.add(i.unsigned_op)
 
-    result += "  Num_Intrinsics,\n"
+    Num_Intrinsics = get_hlsl_opcode_data()["IntrinsicOpCodes"]["Num_Intrinsics"]
+    result += "  Num_Intrinsics = %d,\n" % (Num_Intrinsics)
     return result
 
 
diff --git a/utils/hct/hctgen.py b/utils/hct/hctgen.py
index dbb7e3a745..1421fbfad5 100755
--- a/utils/hct/hctgen.py
+++ b/utils/hct/hctgen.py
@@ -2,6 +2,7 @@
 import argparse
 from hctdb_instrhelp import *
 from hctdb import *
+import json
 import sys
 import os
 import CodeTags
@@ -28,6 +29,7 @@
         "DxilCounters",
         "DxilMetadata",
         "RDAT_LibraryTypes",
+        "HlslIntrinsicOpcodes",
     ],
 )
 parser.add_argument("--output", required=True)
@@ -232,6 +234,14 @@ def writeDxilPIXPasses(args):
     return 0
 
 
+def writeHlslIntrinsicOpcodes(args):
+    out = openOutput(args)
+    # get_db_hlsl() initializes the hlsl intrinsic database and opcode_data.
+    get_db_hlsl()
+    json.dump(get_hlsl_opcode_data(), out, indent=2)
+    out.write("\n")
+    return 0
+
 args = parser.parse_args()
 if args.force_lf and args.force_crlf:
     eprint("--force-lf and --force-crlf are mutually exclusive, only pass one")
diff --git a/utils/hct/hlsl_intrinsic_opcodes.json b/utils/hct/hlsl_intrinsic_opcodes.json
new file mode 100644
index 0000000000..48a0b74c17
--- /dev/null
+++ b/utils/hct/hlsl_intrinsic_opcodes.json
@@ -0,0 +1,363 @@
+{
+  "IntrinsicOpCodes": {
+    "Num_Intrinsics": 358,
+    "IOP_AcceptHitAndEndSearch": 0,
+    "IOP_AddUint64": 1,
+    "IOP_AllMemoryBarrier": 2,
+    "IOP_AllMemoryBarrierWithGroupSync": 3,
+    "IOP_AllocateRayQuery": 4,
+    "IOP_Barrier": 5,
+    "IOP_CallShader": 6,
+    "IOP_CheckAccessFullyMapped": 7,
+    "IOP_CreateResourceFromHeap": 8,
+    "IOP_D3DCOLORtoUBYTE4": 9,
+    "IOP_DeviceMemoryBarrier": 10,
+    "IOP_DeviceMemoryBarrierWithGroupSync": 11,
+    "IOP_DispatchMesh": 12,
+    "IOP_DispatchRaysDimensions": 13,
+    "IOP_DispatchRaysIndex": 14,
+    "IOP_EvaluateAttributeAtSample": 15,
+    "IOP_EvaluateAttributeCentroid": 16,
+    "IOP_EvaluateAttributeSnapped": 17,
+    "IOP_GeometryIndex": 18,
+    "IOP_GetAttributeAtVertex": 19,
+    "IOP_GetRemainingRecursionLevels": 20,
+    "IOP_GetRenderTargetSampleCount": 21,
+    "IOP_GetRenderTargetSamplePosition": 22,
+    "IOP_GroupMemoryBarrier": 23,
+    "IOP_GroupMemoryBarrierWithGroupSync": 24,
+    "IOP_HitKind": 25,
+    "IOP_IgnoreHit": 26,
+    "IOP_InstanceID": 27,
+    "IOP_InstanceIndex": 28,
+    "IOP_InterlockedAdd": 29,
+    "IOP_InterlockedAnd": 30,
+    "IOP_InterlockedCompareExchange": 31,
+    "IOP_InterlockedCompareExchangeFloatBitwise": 32,
+    "IOP_InterlockedCompareStore": 33,
+    "IOP_InterlockedCompareStoreFloatBitwise": 34,
+    "IOP_InterlockedExchange": 35,
+    "IOP_InterlockedMax": 36,
+    "IOP_InterlockedMin": 37,
+    "IOP_InterlockedOr": 38,
+    "IOP_InterlockedXor": 39,
+    "IOP_IsHelperLane": 40,
+    "IOP_NonUniformResourceIndex": 41,
+    "IOP_ObjectRayDirection": 42,
+    "IOP_ObjectRayOrigin": 43,
+    "IOP_ObjectToWorld": 44,
+    "IOP_ObjectToWorld3x4": 45,
+    "IOP_ObjectToWorld4x3": 46,
+    "IOP_PrimitiveIndex": 47,
+    "IOP_Process2DQuadTessFactorsAvg": 48,
+    "IOP_Process2DQuadTessFactorsMax": 49,
+    "IOP_Process2DQuadTessFactorsMin": 50,
+    "IOP_ProcessIsolineTessFactors": 51,
+    "IOP_ProcessQuadTessFactorsAvg": 52,
+    "IOP_ProcessQuadTessFactorsMax": 53,
+    "IOP_ProcessQuadTessFactorsMin": 54,
+    "IOP_ProcessTriTessFactorsAvg": 55,
+    "IOP_ProcessTriTessFactorsMax": 56,
+    "IOP_ProcessTriTessFactorsMin": 57,
+    "IOP_QuadAll": 58,
+    "IOP_QuadAny": 59,
+    "IOP_QuadReadAcrossDiagonal": 60,
+    "IOP_QuadReadAcrossX": 61,
+    "IOP_QuadReadAcrossY": 62,
+    "IOP_QuadReadLaneAt": 63,
+    "IOP_RayFlags": 64,
+    "IOP_RayTCurrent": 65,
+    "IOP_RayTMin": 66,
+    "IOP_ReportHit": 67,
+    "IOP_SetMeshOutputCounts": 68,
+    "IOP_TraceRay": 69,
+    "IOP_WaveActiveAllEqual": 70,
+    "IOP_WaveActiveAllTrue": 71,
+    "IOP_WaveActiveAnyTrue": 72,
+    "IOP_WaveActiveBallot": 73,
+    "IOP_WaveActiveBitAnd": 74,
+    "IOP_WaveActiveBitOr": 75,
+    "IOP_WaveActiveBitXor": 76,
+    "IOP_WaveActiveCountBits": 77,
+    "IOP_WaveActiveMax": 78,
+    "IOP_WaveActiveMin": 79,
+    "IOP_WaveActiveProduct": 80,
+    "IOP_WaveActiveSum": 81,
+    "IOP_WaveGetLaneCount": 82,
+    "IOP_WaveGetLaneIndex": 83,
+    "IOP_WaveIsFirstLane": 84,
+    "IOP_WaveMatch": 85,
+    "IOP_WaveMultiPrefixBitAnd": 86,
+    "IOP_WaveMultiPrefixBitOr": 87,
+    "IOP_WaveMultiPrefixBitXor": 88,
+    "IOP_WaveMultiPrefixCountBits": 89,
+    "IOP_WaveMultiPrefixProduct": 90,
+    "IOP_WaveMultiPrefixSum": 91,
+    "IOP_WavePrefixCountBits": 92,
+    "IOP_WavePrefixProduct": 93,
+    "IOP_WavePrefixSum": 94,
+    "IOP_WaveReadLaneAt": 95,
+    "IOP_WaveReadLaneFirst": 96,
+    "IOP_WorldRayDirection": 97,
+    "IOP_WorldRayOrigin": 98,
+    "IOP_WorldToObject": 99,
+    "IOP_WorldToObject3x4": 100,
+    "IOP_WorldToObject4x3": 101,
+    "IOP_abort": 102,
+    "IOP_abs": 103,
+    "IOP_acos": 104,
+    "IOP_all": 105,
+    "IOP_and": 106,
+    "IOP_any": 107,
+    "IOP_asdouble": 108,
+    "IOP_asfloat": 109,
+    "IOP_asfloat16": 110,
+    "IOP_asin": 111,
+    "IOP_asint": 112,
+    "IOP_asint16": 113,
+    "IOP_asuint": 114,
+    "IOP_asuint16": 115,
+    "IOP_atan": 116,
+    "IOP_atan2": 117,
+    "IOP_ceil": 118,
+    "IOP_clamp": 119,
+    "IOP_clip": 120,
+    "IOP_cos": 121,
+    "IOP_cosh": 122,
+    "IOP_countbits": 123,
+    "IOP_cross": 124,
+    "IOP_ddx": 125,
+    "IOP_ddx_coarse": 126,
+    "IOP_ddx_fine": 127,
+    "IOP_ddy": 128,
+    "IOP_ddy_coarse": 129,
+    "IOP_ddy_fine": 130,
+    "IOP_degrees": 131,
+    "IOP_determinant": 132,
+    "IOP_distance": 133,
+    "IOP_dot": 134,
+    "IOP_dot2add": 135,
+    "IOP_dot4add_i8packed": 136,
+    "IOP_dot4add_u8packed": 137,
+    "IOP_dst": 138,
+    "IOP_exp": 139,
+    "IOP_exp2": 140,
+    "IOP_f16tof32": 141,
+    "IOP_f32tof16": 142,
+    "IOP_faceforward": 143,
+    "IOP_firstbithigh": 144,
+    "IOP_firstbitlow": 145,
+    "IOP_floor": 146,
+    "IOP_fma": 147,
+    "IOP_fmod": 148,
+    "IOP_frac": 149,
+    "IOP_frexp": 150,
+    "IOP_fwidth": 151,
+    "IOP_isfinite": 152,
+    "IOP_isinf": 153,
+    "IOP_isnan": 154,
+    "IOP_ldexp": 155,
+    "IOP_length": 156,
+    "IOP_lerp": 157,
+    "IOP_lit": 158,
+    "IOP_log": 159,
+    "IOP_log10": 160,
+    "IOP_log2": 161,
+    "IOP_mad": 162,
+    "IOP_max": 163,
+    "IOP_min": 164,
+    "IOP_modf": 165,
+    "IOP_msad4": 166,
+    "IOP_mul": 167,
+    "IOP_normalize": 168,
+    "IOP_or": 169,
+    "IOP_pack_clamp_s8": 170,
+    "IOP_pack_clamp_u8": 171,
+    "IOP_pack_s8": 172,
+    "IOP_pack_u8": 173,
+    "IOP_pow": 174,
+    "IOP_printf": 175,
+    "IOP_radians": 176,
+    "IOP_rcp": 177,
+    "IOP_reflect": 178,
+    "IOP_refract": 179,
+    "IOP_reversebits": 180,
+    "IOP_round": 181,
+    "IOP_rsqrt": 182,
+    "IOP_saturate": 183,
+    "IOP_select": 184,
+    "IOP_sign": 185,
+    "IOP_sin": 186,
+    "IOP_sincos": 187,
+    "IOP_sinh": 188,
+    "IOP_smoothstep": 189,
+    "IOP_source_mark": 190,
+    "IOP_sqrt": 191,
+    "IOP_step": 192,
+    "IOP_tan": 193,
+    "IOP_tanh": 194,
+    "IOP_tex1D": 195,
+    "IOP_tex1Dbias": 196,
+    "IOP_tex1Dgrad": 197,
+    "IOP_tex1Dlod": 198,
+    "IOP_tex1Dproj": 199,
+    "IOP_tex2D": 200,
+    "IOP_tex2Dbias": 201,
+    "IOP_tex2Dgrad": 202,
+    "IOP_tex2Dlod": 203,
+    "IOP_tex2Dproj": 204,
+    "IOP_tex3D": 205,
+    "IOP_tex3Dbias": 206,
+    "IOP_tex3Dgrad": 207,
+    "IOP_tex3Dlod": 208,
+    "IOP_tex3Dproj": 209,
+    "IOP_texCUBE": 210,
+    "IOP_texCUBEbias": 211,
+    "IOP_texCUBEgrad": 212,
+    "IOP_texCUBElod": 213,
+    "IOP_texCUBEproj": 214,
+    "IOP_transpose": 215,
+    "IOP_trunc": 216,
+    "IOP_unpack_s8s16": 217,
+    "IOP_unpack_s8s32": 218,
+    "IOP_unpack_u8u16": 219,
+    "IOP_unpack_u8u32": 220,
+    "IOP_VkRawBufferLoad": 221,
+    "IOP_VkRawBufferStore": 222,
+    "IOP_VkReadClock": 223,
+    "IOP_Vkext_execution_mode": 224,
+    "IOP_Vkext_execution_mode_id": 225,
+    "MOP_Append": 226,
+    "MOP_RestartStrip": 227,
+    "MOP_CalculateLevelOfDetail": 228,
+    "MOP_CalculateLevelOfDetailUnclamped": 229,
+    "MOP_GetDimensions": 230,
+    "MOP_Load": 231,
+    "MOP_Sample": 232,
+    "MOP_SampleBias": 233,
+    "MOP_SampleCmp": 234,
+    "MOP_SampleCmpBias": 235,
+    "MOP_SampleCmpGrad": 236,
+    "MOP_SampleCmpLevel": 237,
+    "MOP_SampleCmpLevelZero": 238,
+    "MOP_SampleGrad": 239,
+    "MOP_SampleLevel": 240,
+    "MOP_Gather": 241,
+    "MOP_GatherAlpha": 242,
+    "MOP_GatherBlue": 243,
+    "MOP_GatherCmp": 244,
+    "MOP_GatherCmpAlpha": 245,
+    "MOP_GatherCmpBlue": 246,
+    "MOP_GatherCmpGreen": 247,
+    "MOP_GatherCmpRed": 248,
+    "MOP_GatherGreen": 249,
+    "MOP_GatherRaw": 250,
+    "MOP_GatherRed": 251,
+    "MOP_GetSamplePosition": 252,
+    "MOP_Load2": 253,
+    "MOP_Load3": 254,
+    "MOP_Load4": 255,
+    "MOP_InterlockedAdd": 256,
+    "MOP_InterlockedAdd64": 257,
+    "MOP_InterlockedAnd": 258,
+    "MOP_InterlockedAnd64": 259,
+    "MOP_InterlockedCompareExchange": 260,
+    "MOP_InterlockedCompareExchange64": 261,
+    "MOP_InterlockedCompareExchangeFloatBitwise": 262,
+    "MOP_InterlockedCompareStore": 263,
+    "MOP_InterlockedCompareStore64": 264,
+    "MOP_InterlockedCompareStoreFloatBitwise": 265,
+    "MOP_InterlockedExchange": 266,
+    "MOP_InterlockedExchange64": 267,
+    "MOP_InterlockedExchangeFloat": 268,
+    "MOP_InterlockedMax": 269,
+    "MOP_InterlockedMax64": 270,
+    "MOP_InterlockedMin": 271,
+    "MOP_InterlockedMin64": 272,
+    "MOP_InterlockedOr": 273,
+    "MOP_InterlockedOr64": 274,
+    "MOP_InterlockedXor": 275,
+    "MOP_InterlockedXor64": 276,
+    "MOP_Store": 277,
+    "MOP_Store2": 278,
+    "MOP_Store3": 279,
+    "MOP_Store4": 280,
+    "MOP_DecrementCounter": 281,
+    "MOP_IncrementCounter": 282,
+    "MOP_Consume": 283,
+    "MOP_WriteSamplerFeedback": 284,
+    "MOP_WriteSamplerFeedbackBias": 285,
+    "MOP_WriteSamplerFeedbackGrad": 286,
+    "MOP_WriteSamplerFeedbackLevel": 287,
+    "MOP_Abort": 288,
+    "MOP_CandidateGeometryIndex": 289,
+    "MOP_CandidateInstanceContributionToHitGroupIndex": 290,
+    "MOP_CandidateInstanceID": 291,
+    "MOP_CandidateInstanceIndex": 292,
+    "MOP_CandidateObjectRayDirection": 293,
+    "MOP_CandidateObjectRayOrigin": 294,
+    "MOP_CandidateObjectToWorld3x4": 295,
+    "MOP_CandidateObjectToWorld4x3": 296,
+    "MOP_CandidatePrimitiveIndex": 297,
+    "MOP_CandidateProceduralPrimitiveNonOpaque": 298,
+    "MOP_CandidateTriangleBarycentrics": 299,
+    "MOP_CandidateTriangleFrontFace": 300,
+    "MOP_CandidateTriangleRayT": 301,
+    "MOP_CandidateType": 302,
+    "MOP_CandidateWorldToObject3x4": 303,
+    "MOP_CandidateWorldToObject4x3": 304,
+    "MOP_CommitNonOpaqueTriangleHit": 305,
+    "MOP_CommitProceduralPrimitiveHit": 306,
+    "MOP_CommittedGeometryIndex": 307,
+    "MOP_CommittedInstanceContributionToHitGroupIndex": 308,
+    "MOP_CommittedInstanceID": 309,
+    "MOP_CommittedInstanceIndex": 310,
+    "MOP_CommittedObjectRayDirection": 311,
+    "MOP_CommittedObjectRayOrigin": 312,
+    "MOP_CommittedObjectToWorld3x4": 313,
+    "MOP_CommittedObjectToWorld4x3": 314,
+    "MOP_CommittedPrimitiveIndex": 315,
+    "MOP_CommittedRayT": 316,
+    "MOP_CommittedStatus": 317,
+    "MOP_CommittedTriangleBarycentrics": 318,
+    "MOP_CommittedTriangleFrontFace": 319,
+    "MOP_CommittedWorldToObject3x4": 320,
+    "MOP_CommittedWorldToObject4x3": 321,
+    "MOP_Proceed": 322,
+    "MOP_RayFlags": 323,
+    "MOP_RayTMin": 324,
+    "MOP_TraceRayInline": 325,
+    "MOP_WorldRayDirection": 326,
+    "MOP_WorldRayOrigin": 327,
+    "MOP_Count": 328,
+    "MOP_FinishedCrossGroupSharing": 329,
+    "MOP_GetGroupNodeOutputRecords": 330,
+    "MOP_GetThreadNodeOutputRecords": 331,
+    "MOP_IsValid": 332,
+    "MOP_GroupIncrementOutputCount": 333,
+    "MOP_ThreadIncrementOutputCount": 334,
+    "MOP_OutputComplete": 335,
+    "MOP_SubpassLoad": 336,
+    "IOP_InterlockedUMax": 337,
+    "IOP_InterlockedUMin": 338,
+    "IOP_WaveActiveUMax": 339,
+    "IOP_WaveActiveUMin": 340,
+    "IOP_WaveActiveUProduct": 341,
+    "IOP_WaveActiveUSum": 342,
+    "IOP_WaveMultiPrefixUProduct": 343,
+    "IOP_WaveMultiPrefixUSum": 344,
+    "IOP_WavePrefixUProduct": 345,
+    "IOP_WavePrefixUSum": 346,
+    "IOP_uabs": 347,
+    "IOP_uclamp": 348,
+    "IOP_udot": 349,
+    "IOP_ufirstbithigh": 350,
+    "IOP_umad": 351,
+    "IOP_umax": 352,
+    "IOP_umin": 353,
+    "IOP_umul": 354,
+    "IOP_usign": 355,
+    "MOP_InterlockedUMax": 356,
+    "MOP_InterlockedUMin": 357
+  }
+}

From 9e8a698deed37116e5e55cebd0d725c5c2be5e4c Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista@microsoft.com>
Date: Thu, 20 Mar 2025 10:10:48 -0700
Subject: [PATCH 40/88] Lower RayQuery constructor to allocateRayQuery2 (#7205)

This PR connects the front end change to the back end change, by taking
the existing rayquery constructor translation, and augmenting it so that
allocaterayquery2 can be emitted as an opcode if there are 2 template
arguments.
It is independent of the shader model. If 2 template args are detected,
and the 2nd template argument has a non-zero value, it just emits
allocateRayQuery2.
A test was added to make sure that when targeting shader model 6.9,
using 2 template args where the 2nd arg is non-zero in a rayquery
declaration will produce an allocateRayQuery2 opcode.

Fixes
[#7136](https://github.com/microsoft/DirectXShaderCompiler/issues/7136)
---
 include/dxc/HLSL/HLOperations.h               |   4 +
 lib/DXIL/DxilShaderFlags.cpp                  |   1 +
 lib/HLSL/HLOperationLower.cpp                 |  20 ++-
 .../lib/CodeGen/CGHLSLMSFinishCodeGen.cpp     |  23 +++-
 .../objects/RayQuery/allocateRayQuery2.hlsl   |  23 ++++
 .../Passes/DxilGen/LowerAllocateRayQuery2.ll  | 118 ++++++++++++++++++
 utils/hct/gen_intrin_main.txt                 |   4 +-
 7 files changed, 183 insertions(+), 10 deletions(-)
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/allocateRayQuery2.hlsl
 create mode 100644 tools/clang/test/DXC/Passes/DxilGen/LowerAllocateRayQuery2.ll

diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h
index 1ccb7f04a2..f87d324baf 100644
--- a/include/dxc/HLSL/HLOperations.h
+++ b/include/dxc/HLSL/HLOperations.h
@@ -398,6 +398,10 @@ const unsigned kAnnotateHandleResourceTypeOpIdx = 3;
 const unsigned kTraceRayRayDescOpIdx = 7;
 const unsigned kTraceRayPayLoadOpIdx = 8;
 
+// AllocateRayQuery
+const unsigned kAllocateRayQueryRayFlagsIdx = 1;
+const unsigned kAllocateRayQueryRayQueryFlagsIdx = 2;
+
 // CallShader.
 const unsigned kCallShaderPayloadOpIdx = 2;
 
diff --git a/lib/DXIL/DxilShaderFlags.cpp b/lib/DXIL/DxilShaderFlags.cpp
index 7d0799dc64..993038aaf1 100644
--- a/lib/DXIL/DxilShaderFlags.cpp
+++ b/lib/DXIL/DxilShaderFlags.cpp
@@ -637,6 +637,7 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
           hasViewID = true;
           break;
         case DXIL::OpCode::AllocateRayQuery:
+        case DXIL::OpCode::AllocateRayQuery2:
         case DXIL::OpCode::GeometryIndex:
           hasRaytracingTier1_1 = true;
           break;
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index 80d3af4147..96ebda43ac 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -5670,7 +5670,24 @@ Value *TranslateAllocateRayQuery(CallInst *CI, IntrinsicOp IOP,
                                  HLObjectOperationLowerHelper *pObjHelper,
                                  bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
-  Value *refArgs[] = {nullptr, CI->getOperand(1)};
+  // upgrade to allocateRayQuery2 if there is a non-zero 2nd template arg
+  DXASSERT(CI->getNumArgOperands() == 3,
+           "hlopcode for allocaterayquery always expects 3 arguments");
+
+  llvm::Value *Arg =
+      CI->getArgOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx);
+  llvm::ConstantInt *ConstVal = llvm::dyn_cast<llvm::ConstantInt>(Arg);
+  DXASSERT(ConstVal,
+           "2nd argument to allocaterayquery must always be a constant value");
+  if (ConstVal->getValue().getZExtValue() != 0) {
+    Value *refArgs[3] = {
+        nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx),
+        CI->getOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx)};
+    opcode = OP::OpCode::AllocateRayQuery2;
+    return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
+  }
+  Value *refArgs[2] = {
+      nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx)};
   return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
 }
 
@@ -5679,7 +5696,6 @@ Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                                HLObjectOperationLowerHelper *pObjHelper,
                                bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
-
   Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
 
   Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp];
diff --git a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
index 16f268f102..532ec01458 100644
--- a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
+++ b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
@@ -2795,10 +2795,12 @@ unsigned AlignBufferOffsetInLegacy(unsigned offset, unsigned size,
 }
 
 // Translate RayQuery constructor.  From:
-//  %call = call %"RayQuery<flags>" @<constructor>(%"RayQuery<flags>" %ptr)
+//  %call = call %"RayQuery<flags, constrayqueryflags<optional rayquery flags>>"
+//  @<constructor>(%"RayQuery<flags>" %ptr)
 // To:
-//  i32 %handle = AllocateRayQuery(i32 <IntrinsicOp::IOP_AllocateRayQuery>, i32
-//  %flags) %gep = GEP %"RayQuery<flags>" %ptr, 0, 0 store i32* %gep, i32
+//  i32 %handle = AllocateRayQuery2(i32 <IntrinsicOp::IOP_AllocateRayQuery>, i32
+//  %flags, i32 %constrayqueryflags <0 if not given>) %gep = GEP
+//  %"RayQuery<flags, constrayqueryflags>" %ptr, 0, 0 store i32* %gep, i32
 //  %handle ; and replace uses of %call with %ptr
 void TranslateRayQueryConstructor(HLModule &HLM) {
   llvm::Module &M = *HLM.GetModule();
@@ -2822,9 +2824,13 @@ void TranslateRayQueryConstructor(HLModule &HLM) {
     llvm::IntegerType *i32Ty = llvm::Type::getInt32Ty(M.getContext());
     llvm::ConstantInt *i32Zero =
         llvm::ConstantInt::get(i32Ty, (uint64_t)0, false);
+
+    // the third argument will default to 0 if the rayquery constructor doesn't
+    // have a second template argument
     llvm::FunctionType *funcTy =
-        llvm::FunctionType::get(i32Ty, {i32Ty, i32Ty}, false);
+        llvm::FunctionType::get(i32Ty, {i32Ty, i32Ty, i32Ty}, false);
     unsigned opcode = (unsigned)IntrinsicOp::IOP_AllocateRayQuery;
+
     llvm::ConstantInt *opVal = llvm::ConstantInt::get(i32Ty, opcode, false);
     Function *opFunc =
         GetOrCreateHLFunction(M, funcTy, HLOpcodeGroup::HLIntrinsic, opcode);
@@ -2848,8 +2854,13 @@ void TranslateRayQueryConstructor(HLModule &HLM) {
       llvm::IRBuilder<> Builder(CI);
       llvm::Value *rayFlags =
           Builder.getInt32(SA->GetTemplateArgAnnotation(0).GetIntegral());
-      llvm::Value *Call =
-          Builder.CreateCall(opFunc, {opVal, rayFlags}, pThis->getName());
+      // the default val of 0 will be assigned if there is no 2nd template arg
+      llvm::Value *rayQueryFlags =
+          Builder.getInt32(SA->GetTemplateArgAnnotation(1).GetIntegral());
+
+      llvm::Value *Call = Builder.CreateCall(
+          opFunc, {opVal, rayFlags, rayQueryFlags}, pThis->getName());
+
       llvm::Value *GEP = Builder.CreateInBoundsGEP(pThis, {i32Zero, i32Zero});
       Builder.CreateStore(Call, GEP);
       CI->replaceAllUsesWith(pThis);
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/allocateRayQuery2.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/allocateRayQuery2.hlsl
new file mode 100644
index 0000000000..de79a2f481
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/allocateRayQuery2.hlsl
@@ -0,0 +1,23 @@
+// REQUIRES: dxil-1-9
+// RUN: %dxc -T lib_6_9 %s | FileCheck %s 
+// RUN: %dxc -T lib_6_9 -fcgl %s | FileCheck -check-prefix=FCGL %s 
+
+// RUN: %dxc -T vs_6_9 %s | FileCheck %s 
+// RUN: %dxc -T vs_6_9 -fcgl %s | FileCheck -check-prefix=FCGL %s 
+
+
+RaytracingAccelerationStructure RTAS;
+[shader("vertex")]
+void main(RayDesc rayDesc : RAYDESC) {
+
+  // CHECK: call i32 @dx.op.allocateRayQuery2(i32 258, i32 1024, i32 1)
+  // FCGL: call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1024, i32 1)
+  RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS> rayQuery1;
+
+  rayQuery1.TraceRayInline(RTAS, RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc);
+
+  // CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 1)
+  // FCGL: call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1, i32 0)
+  RayQuery<RAY_FLAG_FORCE_OPAQUE> rayQuery2;
+  rayQuery2.TraceRayInline(RTAS, 0, 2, rayDesc);
+}
diff --git a/tools/clang/test/DXC/Passes/DxilGen/LowerAllocateRayQuery2.ll b/tools/clang/test/DXC/Passes/DxilGen/LowerAllocateRayQuery2.ll
new file mode 100644
index 0000000000..ab86452b17
--- /dev/null
+++ b/tools/clang/test/DXC/Passes/DxilGen/LowerAllocateRayQuery2.ll
@@ -0,0 +1,118 @@
+; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
+; generated the IR with:
+; ExtractIRForPassTest.py -p dxilgen -o LowerAllocateRayQuery2.ll tools\clang\test\CodeGenDXIL\hlsl\objects\RayQuery\allocateRayQuery2.hlsl -- -T vs_6_9
+; Importantly, extraction took place with spirv code-gen enabled
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%struct.RaytracingAccelerationStructure = type { i32 }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+%struct.RayDesc = type { <3 x float>, float, <3 x float>, float }
+%"class.RayQuery<1024, 1>" = type { i32 }
+%"class.RayQuery<1, 0>" = type { i32 }
+
+@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind readnone
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1
+
+; Function Attrs: nounwind readnone
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1
+
+; Function Attrs: nounwind
+declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0
+
+; Function Attrs: nounwind
+define void @main(<3 x float>, float, <3 x float>, float) #0 {
+entry:
+  ; CHECK: call i32 @dx.op.allocateRayQuery2(i32 258, i32 1024, i32 1)
+  %rayQuery12 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1024, i32 1), !dbg !42 ; line:15 col:79
+  %4 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !46 ; line:17 col:3
+  %5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %4), !dbg !46 ; line:17 col:3
+  %6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !46 ; line:17 col:3
+  call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery12, %dx.types.Handle %6, i32 1024, i32 2, <3 x float> %0, float %1, <3 x float> %2, float %3), !dbg !46 ; line:17 col:3
+
+  ; CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 1)
+  %rayQuery23 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1, i32 0), !dbg !47 ; line:21 col:35
+  %7 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !48 ; line:22 col:3
+  %8 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %7), !dbg !48 ; line:22 col:3
+  %9 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %8, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !48 ; line:22 col:3
+  call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery23, %dx.types.Handle %9, i32 0, i32 2, <3 x float> %0, float %1, <3 x float> %2, float %3), !dbg !48 ; line:22 col:3
+  ret void, !dbg !49 ; line:23 col:1
+}
+
+; Function Attrs: nounwind
+declare void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!llvm.module.flags = !{!0}
+!pauseresume = !{!1}
+!llvm.ident = !{!2}
+!dx.version = !{!3}
+!dx.valver = !{!3}
+!dx.shaderModel = !{!4}
+!dx.typeAnnotations = !{!5, !21}
+!dx.entryPoints = !{!34}
+!dx.fnprops = !{!39}
+!dx.options = !{!40, !41}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!2 = !{!"dxc(private) 1.8.0.4853 (lowerOMM, ca5df957eb33-dirty)"}
+!3 = !{i32 1, i32 9}
+!4 = !{!"vs", i32 6, i32 9}
+!5 = !{i32 0, %struct.RayDesc undef, !6, %"class.RayQuery<1024, 1>" undef, !11, %"class.RayQuery<1, 0>" undef, !17}
+!6 = !{i32 32, !7, !8, !9, !10}
+!7 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3}
+!8 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9}
+!9 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3}
+!10 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9}
+!11 = !{i32 4, !12, !13}
+!12 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5}
+!13 = !{i32 0, !14}
+!14 = !{!15, !16}
+!15 = !{i32 1, i64 1024}
+!16 = !{i32 1, i64 1}
+!17 = !{i32 4, !12, !18}
+!18 = !{i32 0, !19}
+!19 = !{!16, !20}
+!20 = !{i32 1, i64 0}
+!21 = !{i32 1, void (<3 x float>, float, <3 x float>, float)* @main, !22}
+!22 = !{!23, !25, !28, !30, !32}
+!23 = !{i32 0, !24, !24}
+!24 = !{}
+!25 = !{i32 0, !26, !27}
+!26 = !{i32 4, !"RAYDESC", i32 7, i32 9}
+!27 = !{i32 0}
+!28 = !{i32 0, !26, !29}
+!29 = !{i32 1}
+!30 = !{i32 0, !26, !31}
+!31 = !{i32 2}
+!32 = !{i32 0, !26, !33}
+!33 = !{i32 3}
+!34 = !{void (<3 x float>, float, <3 x float>, float)* @main, !"main", null, !35, null}
+!35 = !{!36, null, null, null}
+!36 = !{!37}
+!37 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !38}
+!38 = !{i32 0, i32 4}
+!39 = !{void (<3 x float>, float, <3 x float>, float)* @main, i32 1}
+!40 = !{i32 -2147483584}
+!41 = !{i32 -1}
+!42 = !DILocation(line: 15, column: 79, scope: !43)
+!43 = !DISubprogram(name: "main", scope: !44, file: !44, line: 11, type: !45, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, function: void (<3 x float>, float, <3 x float>, float)* @main)
+!44 = !DIFile(filename: "tools\5Cclang\5Ctest\5CCodeGenDXIL\5Chlsl\5Cobjects\5CRayQuery\5CallocateRayQuery2.hlsl", directory: "")
+!45 = !DISubroutineType(types: !24)
+!46 = !DILocation(line: 17, column: 3, scope: !43)
+!47 = !DILocation(line: 21, column: 35, scope: !43)
+!48 = !DILocation(line: 22, column: 3, scope: !43)
+!49 = !DILocation(line: 23, column: 1, scope: !43)
diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt
index 7f7637b230..51ea6b3176 100644
--- a/utils/hct/gen_intrin_main.txt
+++ b/utils/hct/gen_intrin_main.txt
@@ -361,8 +361,8 @@ void [[]] DispatchMesh(in uint threadGroupCountX, in uint threadGroupCountY, in
 // Return true if the current lane is a helper lane
 bool [[ro]] IsHelperLane();
 
-// HL Op for allocating ray query object that default constructor uses
-uint [[hidden]] AllocateRayQuery(in uint flags);
+// HL Op for allocating ray query object
+uint [[hidden]] AllocateRayQuery(in uint flags, in uint rayqueryflags);
 
 resource [[hidden]] CreateResourceFromHeap(in uint index);
 

From 8b3fae2f23b946eb47429b3ee432885c2b63301b Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista@microsoft.com>
Date: Thu, 20 Mar 2025 16:28:41 -0700
Subject: [PATCH 41/88] Use Wide String variants explicitly for Windows API
 calls (#7235)

This PR changes some code in ExecutionTests.cpp to use the wide string
variants of Windows API calls explicitly. This is because some internal
builds will get confused about which overload to resolve the
GetModuleHandle function to. By being explicit, this should eliminate
the error that an arg can't be converted to LPCWSTR.
---
 tools/clang/unittests/HLSLExec/ExecutionTest.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp
index 7066247883..91b42f6b79 100644
--- a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp
+++ b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp
@@ -820,10 +820,10 @@ class ExecutionTest {
         return false;
       }
 
-      if (GetModuleHandle("d3d10warp.dll") != NULL) {
-        CHAR szFullModuleFilePath[MAX_PATH] = "";
-        GetModuleFileName(GetModuleHandle("d3d10warp.dll"),
-                          szFullModuleFilePath, sizeof(szFullModuleFilePath));
+      if (GetModuleHandleW(L"d3d10warp.dll") != NULL) {
+        WCHAR szFullModuleFilePath[MAX_PATH] = L"";
+        GetModuleFileNameW(GetModuleHandleW(L"d3d10warp.dll"),
+                           szFullModuleFilePath, sizeof(szFullModuleFilePath));
         WEX::Logging::Log::Comment(WEX::Common::String().Format(
             L"WARP driver loaded from: %S", szFullModuleFilePath));
       }

From 60e6c76fbad97dd0137385289498dc76ffe7b611 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista@microsoft.com>
Date: Thu, 20 Mar 2025 19:10:13 -0700
Subject: [PATCH 42/88] Add constraint to test that requires spirv support
 (#7241)

This PR adds a // REQUIRES: spirv line to the top of a test that uses
spirv.
This prevents failures in dev environments that don't have spirv
enabled.
---
 .../test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl b/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl
index 4fcce749d7..ece7e3f2f4 100644
--- a/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl
+++ b/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl
@@ -1,3 +1,4 @@
+// REQUIRES: spirv
 // RUN: %dxc -T ps_6_0 -E main -verify -spirv %s
 
 struct S

From b646ad39c722a43b39d2df4a80d5f118d85a8685 Mon Sep 17 00:00:00 2001
From: Steven Perron <stevenperron@google.com>
Date: Fri, 21 Mar 2025 13:27:18 -0400
Subject: [PATCH 43/88] [SPIRV] Update submodules and fix test (#7243)

Updates the submodules. One test is updated because spirv-opt does not
common the load of a sampler anymore to avoid using a value from a
different basic block.
---
 external/SPIRV-Tools                                            | 2 +-
 .../vk.binding.global-struct-of-resource.and.array.hlsl         | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools
index f289d047f4..ada1771a9f 160000
--- a/external/SPIRV-Tools
+++ b/external/SPIRV-Tools
@@ -1 +1 @@
-Subproject commit f289d047f49fb60488301ec62bafab85573668cc
+Subproject commit ada1771a9f7a125573aa94fe551fdc44b45769bd
diff --git a/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl b/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl
index 9d226eb962..526bfc002c 100644
--- a/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl
@@ -27,6 +27,7 @@ float4 main() : SV_Target
 // CHECK:   [[x:%[0-9]+]] = OpSampledImage %type_sampled_image [[tex]] [[smp]]
   return Textures[0].Sample(TheStruct.Sampler, float2(0, 0))
 // CHECK: [[tex:%[0-9]+]] = OpLoad %type_2d_image %TheStruct_Texture
+// CHECK: [[smp:%[0-9]+]] = OpLoad %type_sampler %TheStruct_Sampler
 // CHECK:   [[x:%[0-9]+]] = OpSampledImage %type_sampled_image [[tex]] [[smp]]
        + TheStruct.Texture.Sample(TheStruct.Sampler, float2(0, 0));
 }

From 94596e1c97e10ef2f97cf21d33cbabdc0e7df2e8 Mon Sep 17 00:00:00 2001
From: Steven Perron <stevenperron@google.com>
Date: Mon, 24 Mar 2025 15:52:46 -0400
Subject: [PATCH 44/88] [SPIRV] Allow sampled type to be half for universal
 (#7252)

We have a check that the sample type for an image cannot be a 16-bit
float. This is true for Vulkan, but not true for general spir-v.

We modify this check to only apply when the target env is vulkan. Wew
also move the check to spirvemitter where the error handling is better.
In its current location, the compiler continue to run with an unexpected
nullptr.

Fixes #6987
Fixes #6989

---------

Co-authored-by: Cassandra Beckley <cbeckley@google.com>
---
 .../include/clang/SPIRV/FeatureManager.h      |  3 +++
 tools/clang/lib/SPIRV/FeatureManager.cpp      | 18 +++++++++++++++++
 tools/clang/lib/SPIRV/LowerTypeVisitor.cpp    | 20 -------------------
 tools/clang/lib/SPIRV/SpirvEmitter.cpp        | 13 ++++++++++++
 .../test/CodeGenSPIRV/type.buffer.half.hlsl   | 12 +++++++++--
 .../test/CodeGenSPIRV/type.buffer.half4.hlsl  | 14 +++++++++++++
 6 files changed, 58 insertions(+), 22 deletions(-)
 create mode 100644 tools/clang/test/CodeGenSPIRV/type.buffer.half4.hlsl

diff --git a/tools/clang/include/clang/SPIRV/FeatureManager.h b/tools/clang/include/clang/SPIRV/FeatureManager.h
index 32ee187091..841708d8d5 100644
--- a/tools/clang/include/clang/SPIRV/FeatureManager.h
+++ b/tools/clang/include/clang/SPIRV/FeatureManager.h
@@ -132,6 +132,9 @@ class FeatureManager {
   /// Returns false otherwise.
   bool isTargetEnvVulkan1p3OrAbove();
 
+  /// Return true if the target environment is a Vulkan environment.
+  bool isTargetEnvVulkan();
+
   /// Returns the spv_target_env matching the input string if possible.
   /// This functions matches the spv_target_env with the command-line version
   /// of the name ('vulkan1.1', not 'Vulkan 1.1').
diff --git a/tools/clang/lib/SPIRV/FeatureManager.cpp b/tools/clang/lib/SPIRV/FeatureManager.cpp
index 2512984a4c..c459f7af0f 100644
--- a/tools/clang/lib/SPIRV/FeatureManager.cpp
+++ b/tools/clang/lib/SPIRV/FeatureManager.cpp
@@ -405,5 +405,23 @@ bool FeatureManager::isTargetEnvVulkan1p3OrAbove() {
   return targetEnv >= SPV_ENV_VULKAN_1_3;
 }
 
+bool FeatureManager::isTargetEnvVulkan() {
+  // This assert ensure that this list will be updated, if necessary, when
+  // a new target environment is added.
+  static_assert(SPV_ENV_VULKAN_1_4 + 1 == SPV_ENV_MAX);
+
+  switch (targetEnv) {
+  case SPV_ENV_VULKAN_1_0:
+  case SPV_ENV_VULKAN_1_1:
+  case SPV_ENV_VULKAN_1_2:
+  case SPV_ENV_VULKAN_1_1_SPIRV_1_4:
+  case SPV_ENV_VULKAN_1_3:
+  case SPV_ENV_VULKAN_1_4:
+    return true;
+  default:
+    return false;
+  }
+}
+
 } // end namespace spirv
 } // end namespace clang
diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
index 24cce9d89e..a5bc4a4aa8 100644
--- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
+++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
@@ -834,26 +834,6 @@ LowerTypeVisitor::lowerResourceType(QualType type, SpirvLayoutRule rule,
 
   // TODO: avoid string comparison once hlsl::IsHLSLResouceType() does that.
 
-  // Vulkan does not yet support true 16-bit float texture objexts.
-  if (name == "Buffer" || name == "RWBuffer" || name == "Texture1D" ||
-      name == "Texture2D" || name == "Texture3D" || name == "TextureCube" ||
-      name == "Texture1DArray" || name == "Texture2DArray" ||
-      name == "Texture2DMS" || name == "Texture2DMSArray" ||
-      name == "TextureCubeArray" || name == "RWTexture1D" ||
-      name == "RWTexture2D" || name == "RWTexture3D" ||
-      name == "RWTexture1DArray" || name == "RWTexture2DArray") {
-    const auto sampledType = hlsl::GetHLSLResourceResultType(type);
-    const auto loweredType =
-        lowerType(getElementType(astContext, sampledType), rule,
-                  /*isRowMajor*/ llvm::None, srcLoc);
-    if (const auto *floatType = dyn_cast<FloatType>(loweredType)) {
-      if (floatType->getBitwidth() == 16) {
-        emitError("16-bit texture types not yet supported with -spirv", srcLoc);
-        return nullptr;
-      }
-    }
-  }
-
   { // Texture types
     spv::Dim dim = {};
     bool isArray = {};
diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
index 557768f59a..e1124999ec 100644
--- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@@ -1880,6 +1880,19 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) {
     }
   }
 
+  if (featureManager.isTargetEnvVulkan() &&
+      (isTexture(decl->getType()) || isRWTexture(decl->getType()) ||
+       isBuffer(decl->getType()) || isRWBuffer(decl->getType()))) {
+    const auto sampledType = hlsl::GetHLSLResourceResultType(decl->getType());
+    if (isFloatOrVecMatOfFloatType(sampledType) &&
+        isOrContains16BitType(sampledType, spirvOptions.enable16BitTypes)) {
+      emitError("The sampled type for textures cannot be a floating point type "
+                "smaller than 32-bits when targeting a Vulkan environment.",
+                loc);
+      return;
+    }
+  }
+
   if (decl->hasAttr<VKConstantIdAttr>()) {
     // This is a VarDecl for specialization constant.
     createSpecConstant(decl);
diff --git a/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl b/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl
index e5954abae5..99d365b5e2 100644
--- a/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl
@@ -1,6 +1,14 @@
-// RUN: not %dxc -T ps_6_6 -E main -fcgl  %s -spirv -enable-16bit-types  2>&1 | FileCheck %s
+// RUN: not %dxc -T ps_6_6 -E main -fcgl  %s -spirv -enable-16bit-types  2>&1 | FileCheck %s --check-prefix=VK
+// RUN: %dxc -T ps_6_6 -E main -fcgl  %s -spirv -fspv-target-env=universal1.5 -enable-16bit-types  2>&1 | FileCheck %s --check-prefix=UNIVERSAL
 
-// CHECK: error: 16-bit texture types not yet supported with -spirv
+// When targeting Vulkan, A 16-bit floating pointer buffer is not valid.
+// VK: error: The sampled type for textures cannot be a floating point type smaller than 32-bits when targeting a Vulkan environment.
+
+// When not targeting Vulkan, we should generate the 16-bit floating pointer buffer.
+// UNIVERSAL: %half = OpTypeFloat 16
+// UNIVERSAL: %type_buffer_image = OpTypeImage %half Buffer 2 0 0 1 Unknown
+// UNIVERSAL: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image
+// UNIVERSAL: %MyBuffer = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant
 Buffer<half> MyBuffer;
 
 void main(): SV_Target { }
diff --git a/tools/clang/test/CodeGenSPIRV/type.buffer.half4.hlsl b/tools/clang/test/CodeGenSPIRV/type.buffer.half4.hlsl
new file mode 100644
index 0000000000..f29af69c1c
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/type.buffer.half4.hlsl
@@ -0,0 +1,14 @@
+// RUN: not %dxc -T ps_6_6 -E main -fcgl  %s -spirv -enable-16bit-types  2>&1 | FileCheck %s --check-prefix=VK
+// RUN: %dxc -T ps_6_6 -E main -fcgl  %s -spirv -fspv-target-env=universal1.5 -enable-16bit-types  2>&1 | FileCheck %s --check-prefix=UNIVERSAL
+
+// When targeting Vulkan, A 16-bit floating pointer buffer is not valid.
+// VK: error: The sampled type for textures cannot be a floating point type smaller than 32-bits when targeting a Vulkan environment.
+
+// When not targeting Vulkan, we should generate the 16-bit floating pointer buffer.
+// UNIVERSAL: %half = OpTypeFloat 16
+// UNIVERSAL: %type_buffer_image = OpTypeImage %half Buffer 2 0 0 1 Unknown
+// UNIVERSAL: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image
+// UNIVERSAL: %MyBuffer = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant
+Buffer<half4> MyBuffer;
+
+void main(): SV_Target { }

From 9a06f4d27acdce04b0fcd1c9ffef46eb43b667b8 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 24 Mar 2025 15:54:50 -0700
Subject: [PATCH 45/88] Consolidate buffer store translation (#7251)

Consolidate buffer store translation

Added structured and types buffer support to TranslateStore and used it for all
such lowerings.

Includes IR and fcgl tests for the same in addition to recently added
load/store tests that exercise this same code.
---
 lib/HLSL/HLOperationLower.cpp                 |  175 +--
 .../hlsl/intrinsics/buffer-store.hlsl         |  192 +++
 .../hlsl/intrinsics/buffer-store.ll           |  822 +++++++++++++
 .../hlsl/intrinsics/buffer-typed-store.hlsl   |  404 ++++++
 .../hlsl/intrinsics/buffer-typed-store.ll     | 1079 +++++++++++++++++
 5 files changed, 2560 insertions(+), 112 deletions(-)
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.ll
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.ll

diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index 96ebda43ac..5a0dadf7f4 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -4335,18 +4335,15 @@ void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
 }
 
 void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
-                    Value *offset, IRBuilder<> &Builder, hlsl::OP *OP,
-                    Value *sampIdx = nullptr) {
+                    Value *Idx, Value *offset, IRBuilder<> &Builder,
+                    hlsl::OP *OP, Value *sampIdx = nullptr) {
   Type *Ty = val->getType();
-
-  // This function is no longer used for lowering stores to a
-  // structured buffer.
-  DXASSERT_NOMSG(RK != DxilResource::Kind::StructuredBuffer);
-
   OP::OpCode opcode = OP::OpCode::NumOpCodes;
+  bool IsTyped = true;
   switch (RK) {
   case DxilResource::Kind::RawBuffer:
   case DxilResource::Kind::StructuredBuffer:
+    IsTyped = false;
     opcode = OP::OpCode::RawBufferStore;
     break;
   case DxilResource::Kind::TypedBuffer:
@@ -4364,10 +4361,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
     break;
   }
 
-  bool isTyped = opcode == OP::OpCode::TextureStore ||
-                 opcode == OP::OpCode::TextureStoreSample ||
-                 RK == DxilResource::Kind::TypedBuffer;
-
   Type *i32Ty = Builder.getInt32Ty();
   Type *i64Ty = Builder.getInt64Ty();
   Type *doubleTy = Builder.getDoubleTy();
@@ -4390,7 +4383,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
     alignValue = 4;
   Constant *Alignment = OP->GetI32Const(alignValue);
   bool is64 = EltTy == i64Ty || EltTy == doubleTy;
-  if (is64 && isTyped) {
+  if (is64 && IsTyped) {
     EltTy = i32Ty;
   }
 
@@ -4406,38 +4399,42 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
   storeArgs.emplace_back(opArg);  // opcode
   storeArgs.emplace_back(handle); // resource handle
 
-  unsigned offset0Idx = 0;
-  if (RK == DxilResource::Kind::RawBuffer ||
-      RK == DxilResource::Kind::TypedBuffer) {
-    // Offset 0
-    if (offset->getType()->isVectorTy()) {
-      Value *scalarOffset = Builder.CreateExtractElement(offset, (uint64_t)0);
-      storeArgs.emplace_back(scalarOffset); // offset
+  unsigned OffsetIdx = 0;
+  if (opcode == OP::OpCode::RawBufferStore ||
+      opcode == OP::OpCode::BufferStore) {
+    // Append Coord0 (Index) value.
+    if (Idx->getType()->isVectorTy()) {
+      Value *ScalarIdx = Builder.CreateExtractElement(Idx, (uint64_t)0);
+      storeArgs.emplace_back(ScalarIdx); // Coord0 (Index).
     } else {
-      storeArgs.emplace_back(offset); // offset
+      storeArgs.emplace_back(Idx); // Coord0 (Index).
     }
 
-    // Store offset0 for later use
-    offset0Idx = storeArgs.size() - 1;
+    // Store OffsetIdx representing the argument that may need to be incremented
+    // later to load additional chunks of data.
+    // Only structured buffers can use the offset parameter.
+    // Others must increment the index.
+    if (RK == DxilResource::Kind::StructuredBuffer)
+      OffsetIdx = storeArgs.size();
+    else
+      OffsetIdx = storeArgs.size() - 1;
 
-    // Offset 1
-    storeArgs.emplace_back(undefI);
+    // Coord1 (Offset).
+    // Only relevant when storing more than 4 elements to structured buffers.
+    storeArgs.emplace_back(offset);
   } else {
     // texture store
     unsigned coordSize = DxilResource::GetNumCoords(RK);
 
     // Set x first.
-    if (offset->getType()->isVectorTy())
-      storeArgs.emplace_back(Builder.CreateExtractElement(offset, (uint64_t)0));
+    if (Idx->getType()->isVectorTy())
+      storeArgs.emplace_back(Builder.CreateExtractElement(Idx, (uint64_t)0));
     else
-      storeArgs.emplace_back(offset);
-
-    // Store offset0 for later use
-    offset0Idx = storeArgs.size() - 1;
+      storeArgs.emplace_back(Idx);
 
     for (unsigned i = 1; i < 3; i++) {
       if (i < coordSize)
-        storeArgs.emplace_back(Builder.CreateExtractElement(offset, i));
+        storeArgs.emplace_back(Builder.CreateExtractElement(Idx, i));
       else
         storeArgs.emplace_back(undefI);
     }
@@ -4464,30 +4461,24 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
   }
 
   for (unsigned j = 0; j < storeArgsList.size(); j++) {
-
-    // For second and subsequent store calls, increment the offset0 (i.e. store
-    // index)
+    // For second and subsequent store calls, increment the resource-appropriate
+    // index or offset parameter.
     if (j > 0) {
-      // Greater than four-components store is not allowed for
-      // TypedBuffer and Textures. So greater than four elements
-      // scenario should only get hit here for RawBuffer.
-      DXASSERT_NOMSG(RK == DxilResource::Kind::RawBuffer);
       unsigned EltSize = OP->GetAllocSizeForType(EltTy);
-      unsigned newOffset = EltSize * MaxStoreElemCount * j;
-      Value *newOffsetVal = ConstantInt::get(Builder.getInt32Ty(), newOffset);
-      newOffsetVal =
-          Builder.CreateAdd(storeArgsList[0][offset0Idx], newOffsetVal);
-      storeArgsList[j][offset0Idx] = newOffsetVal;
+      unsigned NewCoord = EltSize * MaxStoreElemCount * j;
+      Value *NewCoordVal = ConstantInt::get(Builder.getInt32Ty(), NewCoord);
+      NewCoordVal = Builder.CreateAdd(storeArgsList[0][OffsetIdx], NewCoordVal);
+      storeArgsList[j][OffsetIdx] = NewCoordVal;
     }
 
-    // values
+    // Set value parameters.
     uint8_t mask = 0;
     if (Ty->isVectorTy()) {
       unsigned vecSize =
           std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) -
           (j * MaxStoreElemCount);
       Value *emptyVal = undefVal;
-      if (isTyped) {
+      if (IsTyped) {
         mask = DXIL::kCompMask_All;
         emptyVal = Builder.CreateExtractElement(val, (uint64_t)0);
       }
@@ -4503,7 +4494,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
       }
 
     } else {
-      if (isTyped) {
+      if (IsTyped) {
         mask = DXIL::kCompMask_All;
         storeArgsList[j].emplace_back(val);
         storeArgsList[j].emplace_back(val);
@@ -4518,7 +4509,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
       }
     }
 
-    if (is64 && isTyped) {
+    if (is64 && IsTyped) {
       unsigned size = 1;
       if (Ty->isVectorTy()) {
         size =
@@ -4576,7 +4567,8 @@ Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 
   Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx);
   Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx);
-  TranslateStore(RK, handle, val, offset, Builder, hlslOP);
+  Value *UndefI = UndefValue::get(Builder.getInt32Ty());
+  TranslateStore(RK, handle, val, offset, UndefI, Builder, hlslOP);
 
   return nullptr;
 }
@@ -7907,40 +7899,11 @@ Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder,
 void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
                              hlsl::OP *OP, Value *bufIdx, Value *baseOffset,
                              Value *val, const DataLayout &DL) {
-  HLMatrixType MatTy = HLMatrixType::cast(matType);
-  Type *EltTy = MatTy.getElementTypeForMem();
-
-  val = MatTy.emitLoweredRegToMem(val, Builder);
-
-  unsigned EltSize = DL.getTypeAllocSize(EltTy);
-  Constant *Alignment = OP->GetI32Const(EltSize);
-  Value *offset = baseOffset;
-  if (baseOffset == nullptr)
-    offset = OP->GetU32Const(0);
-
-  unsigned matSize = MatTy.getNumElements();
-  Value *undefElt = UndefValue::get(EltTy);
-
-  unsigned storeSize = matSize;
-  if (matSize % 4) {
-    storeSize = matSize + 4 - (matSize & 3);
-  }
-  std::vector<Value *> elts(storeSize, undefElt);
-  for (unsigned i = 0; i < matSize; i++)
-    elts[i] = Builder.CreateExtractElement(val, i);
-
-  for (unsigned i = 0; i < matSize; i += 4) {
-    uint8_t mask = 0;
-    for (unsigned j = 0; j < 4 && (i + j) < matSize; j++) {
-      if (elts[i + j] != undefElt)
-        mask |= (1 << j);
-    }
-    GenerateStructBufSt(handle, bufIdx, offset, EltTy, OP, Builder,
-                        {elts[i], elts[i + 1], elts[i + 2], elts[i + 3]}, mask,
-                        Alignment);
-    // Update offset by 4*4bytes.
-    offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize));
-  }
+  [[maybe_unused]] HLMatrixType MatTy = HLMatrixType::cast(matType);
+  DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) == val->getType(),
+           "helper type should match vectorized matrix");
+  TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx,
+                 baseOffset, Builder, OP);
 }
 
 void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, HLResource::Kind RK,
@@ -8085,6 +8048,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
 
       GEP->eraseFromParent();
     } else if (StoreInst *stUser = dyn_cast<StoreInst>(subsUser)) {
+      // Store elements of matrix in a struct. Needs to be done one scalar at a
+      // time even for vectors in the case that matrix orientation spreads the
+      // indexed scalars throughout the matrix vector.
       IRBuilder<> stBuilder(stUser);
       Value *Val = stUser->getValueOperand();
       if (Val->getType()->isVectorTy()) {
@@ -8108,6 +8074,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
       LoadInst *ldUser = cast<LoadInst>(subsUser);
       IRBuilder<> ldBuilder(ldUser);
       Value *ldData = UndefValue::get(resultType);
+      // Load elements of matrix in a struct. Needs to be done one scalar at a
+      // time even for vectors in the case that matrix orientation spreads the
+      // indexed scalars throughout the matrix vector.
       if (resultType->isVectorTy()) {
         for (unsigned i = 0; i < resultSize; i++) {
           Value *ResultElt;
@@ -8248,30 +8217,9 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
     LdInst->eraseFromParent();
   } else if (StoreInst *StInst = dyn_cast<StoreInst>(user)) {
     // Store of scalar/vector within a struct or structured raw store.
-    Type *Ty = StInst->getValueOperand()->getType();
-    Type *pOverloadTy = Ty->getScalarType();
-    Value *offset = baseOffset;
-
     Value *val = StInst->getValueOperand();
-    Value *undefVal = llvm::UndefValue::get(pOverloadTy);
-    Value *vals[] = {undefVal, undefVal, undefVal, undefVal};
-    uint8_t mask = 0;
-    if (Ty->isVectorTy()) {
-      unsigned vectorNumElements = Ty->getVectorNumElements();
-      DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector");
-      assert(vectorNumElements <= 4);
-      for (unsigned i = 0; i < vectorNumElements; i++) {
-        vals[i] = Builder.CreateExtractElement(val, i);
-        mask |= (1 << i);
-      }
-    } else {
-      vals[0] = val;
-      mask = DXIL::kCompMask_X;
-    }
-    Constant *alignment =
-        OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
-    GenerateStructBufSt(handle, bufIdx, offset, pOverloadTy, OP, Builder, vals,
-                        mask, alignment);
+    TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx,
+                   baseOffset, Builder, OP);
     StInst->eraseFromParent();
   } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
     // Recurse users
@@ -8418,14 +8366,15 @@ void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper,
     User *user = *(It++);
     Instruction *I = cast<Instruction>(user);
     IRBuilder<> Builder(I);
+    Value *UndefI = UndefValue::get(Builder.getInt32Ty());
     if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
       TranslateTypedBufSubscript(CI, RK, RC, handle, ldInst, Builder, hlslOP,
                                  helper.dataLayout);
     } else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
       Value *val = stInst->getValueOperand();
       TranslateStore(RK, handle, val,
-                     CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
-                     Builder, hlslOP);
+                     CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx),
+                     UndefI, Builder, hlslOP);
       // delete the st
       stInst->eraseFromParent();
     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(user)) {
@@ -8450,9 +8399,10 @@ void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper,
           // Generate St.
           // Reset insert point, UpdateVectorElt may move SI to different block.
           StBuilder.SetInsertPoint(SI);
-          TranslateStore(RK, handle, ldVal,
-                         CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
-                         StBuilder, hlslOP);
+          TranslateStore(
+              RK, handle, ldVal,
+              CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), UndefI,
+              StBuilder, hlslOP);
           SI->eraseFromParent();
           continue;
         }
@@ -8642,9 +8592,10 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
     } else {
       StoreInst *stInst = cast<StoreInst>(*U);
       Value *val = stInst->getValueOperand();
+      Value *UndefI = UndefValue::get(Builder.getInt32Ty());
       TranslateStore(RK, handle, val,
-                     CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
-                     Builder, hlslOP, mipLevel);
+                     CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx),
+                     UndefI, Builder, hlslOP, mipLevel);
       stInst->eraseFromParent();
     }
     Translated = true;
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.hlsl
new file mode 100644
index 0000000000..fa070ceca5
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.hlsl
@@ -0,0 +1,192 @@
+// RUN: %dxc -fcgl  -T vs_6_6 %s | FileCheck %s
+
+// Source file for DxilGen IR test for buffer store lowering
+
+template<typename T, int N>
+struct Vector {
+  float4 pad1;
+  double pad2;
+  vector<T, N> v;
+  Vector operator+(Vector vec) {
+    Vector ret;
+    ret.pad1 = 0.0;
+    ret.pad2 = 0.0;
+    ret.v = v + vec.v;
+    return ret;
+  }
+};
+
+template<typename T, int N, int M>
+struct Matrix {
+  float4 pad1;
+  matrix<T, N, M> m;
+  Matrix operator+(Matrix mat) {
+    Matrix ret;
+    ret.m = m + mat.m;
+    return ret;
+  }
+};
+
+RWByteAddressBuffer                        BabBuf : register(u1);
+RWStructuredBuffer< float2 >               VecBuf : register(u2);
+RWStructuredBuffer< float[2] >             ArrBuf : register(u3);
+RWStructuredBuffer< Vector<float, 2> >    SVecBuf : register(u4);
+RWStructuredBuffer< float2x2 >             MatBuf : register(u5);
+RWStructuredBuffer< Matrix<float, 2, 2> > SMatBuf : register(u6);
+
+ConsumeStructuredBuffer< float2 >               CVecBuf : register(u7);
+ConsumeStructuredBuffer< float[2] >             CArrBuf : register(u8);
+ConsumeStructuredBuffer< Vector<float, 2> >    CSVecBuf : register(u9);
+ConsumeStructuredBuffer< float2x2 >             CMatBuf : register(u10);
+ConsumeStructuredBuffer< Matrix<float, 2, 2> > CSMatBuf : register(u11);
+
+AppendStructuredBuffer< float2 >               AVecBuf : register(u12);
+AppendStructuredBuffer< float[2] >             AArrBuf : register(u13);
+AppendStructuredBuffer< Vector<float, 2> >    ASVecBuf : register(u14);
+AppendStructuredBuffer< float2x2 >             AMatBuf : register(u15);
+AppendStructuredBuffer< Matrix<float, 2, 2> > ASMatBuf : register(u16);
+
+void main(uint ix0 : IX0) {
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 0
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x i1>)"(i32 277, %dx.types.Handle [[anhdl]], i32 [[ix]], <2 x i1>
+  BabBuf.Store<bool2>(ix0 + 1, BabBuf.Load< bool2 >(ix0 + 0));
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, [2 x float]*)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]], [2 x float]
+  BabBuf.Store<float[2]>(ix0 + 2, BabBuf.Load< float[2] >(ix0 + 1));
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, %\22struct.Vector<float, 2>\22*)"(i32 277, %dx.types.Handle [[anhdl]], i32 [[ix]], %"struct.Vector<float, 2>"
+  BabBuf.Store<Vector<float,2> >(ix0 + 3, BabBuf.Load< Vector<float,2> >(ix0 + 2));
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call %class.matrix.float.2.2 @"dx.hl.op.ro.%class.matrix.float.2.2 (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, %class.matrix.float.2.2)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]], %class.matrix.float.2.2
+  BabBuf.Store<float2x2>(ix0 + 4, BabBuf.Load< float2x2 >(ix0 + 3));
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: [[MSS:%.*]] = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 5
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, %\22struct.Matrix<float, 2, 2>\22*)"(i32 277, %dx.types.Handle [[anhdl]], i32 [[ix]], %"struct.Matrix<float, 2, 2>"
+  BabBuf.Store<Matrix<float,2,2> >(ix0 + 5, BabBuf.Load< Matrix<float,2,2> >(ix0 + 4));
+
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 0
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  VecBuf[ix0 + 1] = VecBuf[ix0 + 0];
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<float [2]>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<float [2]>" undef)
+  // CHECK: call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<float [2]>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<float [2]>" undef)
+  // CHECK: call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  ArrBuf[ix0 + 2] = ArrBuf[ix0 + 1];
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<Vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer<Vector<float, 2> >" undef)
+  // CHECK: call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<Vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer<Vector<float, 2> >" undef)
+  // CHECK: call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  SVecBuf[ix0 + 3] = SVecBuf[ix0 + 2];
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer<matrix<float, 2, 2> >" undef)
+  // CHECK: [[SS:%.*]] = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer<matrix<float, 2, 2> >" undef)
+  // CHECK: call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  MatBuf[ix0 + 4] = MatBuf[ix0 + 3];
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 5
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >" undef)
+  // CHECK: [[MSS:%.*]] = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >" undef)
+  // CHECK: call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  SMatBuf[ix0 + 5] = SMatBuf[ix0 + 4];
+
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: [[cn:%.*]] = call <2 x float> @"dx.hl.op..consume<2 x float> (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]])
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, <2 x float>)"(i32 226, %dx.types.Handle [[anhdl]], <2 x float> [[cn]])
+  AVecBuf.Append(CVecBuf.Consume());
+
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<float [2]>\22)"(i32 0, %"class.ConsumeStructuredBuffer<float [2]>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer<float [2]>" undef)
+  // CHECK: [[cn:%.*]] = call [2 x float]* @"dx.hl.op..consume[2 x float]* (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]])
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<float [2]>\22)"(i32 0, %"class.AppendStructuredBuffer<float [2]>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer<float [2]>" undef)
+  // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, [2 x float]*)"(i32 226, %dx.types.Handle [[anhdl]], [2 x float]*
+  AArrBuf.Append(CArrBuf.Consume());
+
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<Vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.ConsumeStructuredBuffer<Vector<float, 2> >" undef)
+  // CHECK: [[cn:%.*]] = call %"struct.Vector<float, 2>"* @"dx.hl.op..consume%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]])
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<Vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.AppendStructuredBuffer<Vector<float, 2> >" undef)
+  // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, %\22struct.Vector<float, 2>\22*)"(i32 226, %dx.types.Handle [[anhdl]], %"struct.Vector<float, 2>"*
+  ASVecBuf.Append(CSVecBuf.Consume());
+
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >" undef)
+  // CHECK: [[cn:%.*]] = call %class.matrix.float.2.2 @"dx.hl.op..consume%class.matrix.float.2.2 (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]])
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >" undef)
+  // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, %class.matrix.float.2.2)"(i32 226, %dx.types.Handle [[anhdl]], %class.matrix.float.2.2 [[cn]])
+  AMatBuf.Append(CMatBuf.Consume());
+
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >" undef)
+  // CHECK: [[cn:%.*]] = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.op..consume%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]])
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >" undef)
+  // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, %\22struct.Matrix<float, 2, 2>\22*)"(i32 226, %dx.types.Handle [[anhdl]], %"struct.Matrix<float, 2, 2>"*
+  ASMatBuf.Append(CSMatBuf.Consume());
+
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.ll
new file mode 100644
index 0000000000..540ab85819
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.ll
@@ -0,0 +1,822 @@
+; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%struct.RWByteAddressBuffer = type { i32 }
+%"class.RWStructuredBuffer<vector<float, 2> >" = type { <2 x float> }
+%"class.RWStructuredBuffer<float [2]>" = type { [2 x float] }
+%"class.RWStructuredBuffer<Vector<float, 2> >" = type { %"struct.Vector<float, 2>" }
+%"struct.Vector<float, 2>" = type { <4 x float>, double, <2 x float> }
+%"class.RWStructuredBuffer<matrix<float, 2, 2> >" = type { %class.matrix.float.2.2 }
+%class.matrix.float.2.2 = type { [2 x <2 x float>] }
+%"class.RWStructuredBuffer<Matrix<float, 2, 2> >" = type { %"struct.Matrix<float, 2, 2>" }
+%"struct.Matrix<float, 2, 2>" = type { <4 x float>, %class.matrix.float.2.2 }
+%"class.ConsumeStructuredBuffer<vector<float, 2> >" = type { <2 x float> }
+%"class.ConsumeStructuredBuffer<float [2]>" = type { [2 x float] }
+%"class.ConsumeStructuredBuffer<Vector<float, 2> >" = type { %"struct.Vector<float, 2>" }
+%"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >" = type { %class.matrix.float.2.2 }
+%"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >" = type { %"struct.Matrix<float, 2, 2>" }
+%"class.AppendStructuredBuffer<vector<float, 2> >" = type { <2 x float> }
+%"class.AppendStructuredBuffer<float [2]>" = type { [2 x float] }
+%"class.AppendStructuredBuffer<Vector<float, 2> >" = type { %"struct.Vector<float, 2>" }
+%"class.AppendStructuredBuffer<matrix<float, 2, 2> >" = type { %class.matrix.float.2.2 }
+%"class.AppendStructuredBuffer<Matrix<float, 2, 2> >" = type { %"struct.Matrix<float, 2, 2>" }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+
+@"\01?BabBuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4
+@"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.RWStructuredBuffer<vector<float, 2> >", align 4
+@"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A" = external global %"class.RWStructuredBuffer<float [2]>", align 4
+@"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.RWStructuredBuffer<Vector<float, 2> >", align 8
+@"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.RWStructuredBuffer<matrix<float, 2, 2> >", align 4
+@"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.RWStructuredBuffer<Matrix<float, 2, 2> >", align 4
+@"\01?CVecBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.ConsumeStructuredBuffer<vector<float, 2> >", align 4
+@"\01?CArrBuf@@3V?$ConsumeStructuredBuffer@$$BY01M@@A" = external global %"class.ConsumeStructuredBuffer<float [2]>", align 4
+@"\01?CSVecBuf@@3V?$ConsumeStructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.ConsumeStructuredBuffer<Vector<float, 2> >", align 8
+@"\01?CMatBuf@@3V?$ConsumeStructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >", align 4
+@"\01?CSMatBuf@@3V?$ConsumeStructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >", align 4
+@"\01?AVecBuf@@3V?$AppendStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.AppendStructuredBuffer<vector<float, 2> >", align 4
+@"\01?AArrBuf@@3V?$AppendStructuredBuffer@$$BY01M@@A" = external global %"class.AppendStructuredBuffer<float [2]>", align 4
+@"\01?ASVecBuf@@3V?$AppendStructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.AppendStructuredBuffer<Vector<float, 2> >", align 8
+@"\01?AMatBuf@@3V?$AppendStructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.AppendStructuredBuffer<matrix<float, 2, 2> >", align 4
+@"\01?ASMatBuf@@3V?$AppendStructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >", align 4
+
+; CHECK-LABEL: define void @main(i32 %ix0)
+; Function Attrs: nounwind
+define void @main(i32 %ix0) #0 {
+bb:
+  ; CHECK: [[pix:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle [[anhdl]], i32 [[pix]], i32 undef, i8 3, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1
+  ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer
+
+  %tmp = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:60 col:32
+  %tmp1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp) ; line:60 col:32
+  %tmp2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:60 col:32
+  %tmp3 = call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp2, i32 %ix0) ; line:60 col:32
+
+  ; CHECK: [[stix:%.*]] = add i32 [[pix]], 1
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[vec2:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32>
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec2]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec2]], i64 1
+  ; CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, i32 [[val0]], i32 [[val1]], i32 undef, i32 undef, i8 3, i32 4)
+  %tmp4 = add i32 %ix0, 1 ; line:60 col:27
+  %tmp5 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:60 col:3
+  %tmp6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp5) ; line:60 col:3
+  %tmp7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:60 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x i1>)"(i32 277, %dx.types.Handle %tmp7, i32 %tmp4, <2 x i1> %tmp3) ; line:60 col:3
+
+  ; CHECK: [[ix:%.*]] = add i32 [[pix]], 1
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[lix:%.*]] = add i32 0, [[ix]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 1, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[lix:%.*]] = add i32 4, [[ix]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 1, i32 4)
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+
+  %tmp8 = add i32 %ix0, 1 ; line:70 col:63
+  %tmp9 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:70 col:35
+  %tmp10 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp9) ; line:70 col:35
+  %tmp11 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp10, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:70 col:35
+  %tmp12 = call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp11, i32 %tmp8) ; line:70 col:35
+  %tmp13 = getelementptr inbounds [2 x float], [2 x float]* %tmp12, i32 0, i32 0 ; line:70 col:3
+  %tmp14 = load float, float* %tmp13 ; line:70 col:3
+  %tmp15 = getelementptr inbounds [2 x float], [2 x float]* %tmp12, i32 0, i32 1 ; line:70 col:3
+  %tmp16 = load float, float* %tmp15 ; line:70 col:3
+
+
+  ; CHECK: [[ix:%.*]] = add i32 [[pix]], 2
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float undef, float undef, float undef, i8 1, i32 4)
+  ; CHECK: [[stix:%.*]] = add i32 [[ix]], 4
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val1]], float undef, float undef, float undef, i8 1, i32 4)
+
+  %tmp17 = add i32 %ix0, 2 ; line:70 col:30
+  %tmp18 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:70 col:3
+  %tmp19 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp18) ; line:70 col:3
+  %tmp20 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp19, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:70 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %tmp20, i32 %tmp17, float %tmp14) ; line:70 col:3
+  %tmp21 = add i32 %tmp17, 4 ; line:70 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %tmp20, i32 %tmp21, float %tmp16) ; line:70 col:3
+
+  ; CHECK: [[ix:%.*]] = add i32 [[pix]], 2
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[lix:%.*]] = add i32 0, [[ix]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[lix:%.*]] = add i32 16, [[ix]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 1, i32 4)
+  ; CHECK: [[dval:%.*]] = extractvalue %dx.types.ResRet.f64 [[ld]], 0
+  ; CHECK: [[lix:%.*]] = add i32 24, [[ix]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 3, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1
+  %tmp22 = add i32 %ix0, 2 ; line:80 col:78
+  %tmp23 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:80 col:43
+  %tmp24 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp23) ; line:80 col:43
+  %tmp25 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp24, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:80 col:43
+  %tmp26 = call %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp25, i32 %tmp22) ; line:80 col:43
+  %tmp27 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp26, i32 0, i32 0 ; line:80 col:3
+  %tmp28 = load <4 x float>, <4 x float>* %tmp27 ; line:80 col:3
+  %tmp29 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp26, i32 0, i32 1 ; line:80 col:3
+  %tmp30 = load double, double* %tmp29 ; line:80 col:3
+  %tmp31 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp26, i32 0, i32 2 ; line:80 col:3
+  %tmp32 = load <2 x float>, <2 x float>* %tmp31 ; line:80 col:3
+
+  ; CHECK: [[ix:%.*]] = add i32 [[pix]], 3
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  ; CHECK: [[stix:%.*]] = add i32 [[ix]], 16
+  ; CHECK: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, double [[dval]]
+  ; CHECK: [[stix:%.*]] = add i32 [[ix]], 24
+  ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4)
+  %tmp33 = add i32 %ix0, 3 ; line:80 col:38
+  %tmp34 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:80 col:3
+  %tmp35 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp34) ; line:80 col:3
+  %tmp36 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp35, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:80 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp36, i32 %tmp33, <4 x float> %tmp28) ; line:80 col:3
+  %tmp37 = add i32 %tmp33, 16 ; line:80 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, double)"(i32 277, %dx.types.Handle %tmp36, i32 %tmp37, double %tmp30) ; line:80 col:3
+  %tmp38 = add i32 %tmp33, 24 ; line:80 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32 277, %dx.types.Handle %tmp36, i32 %tmp38, <2 x float> %tmp32) ; line:80 col:3
+
+
+  ; CHECK: [[lix:%.*]] = add i32 [[pix]], 3
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[rvec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  %tmp39 = add i32 %ix0, 3 ; line:90 col:63
+  %tmp40 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:90 col:35
+  %tmp41 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp40) ; line:90 col:35
+  %tmp42 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp41, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:90 col:35
+  %tmp43 = call <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp42, i32 %tmp39) ; line:90 col:35
+
+  ; CHECK: [[stix:%.*]] = add i32 [[pix]], 4
+  ; CHECK: [[cvec4:%.*]] = shufflevector <4 x float> [[rvec4]], <4 x float> [[rvec4]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[cvec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[cvec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[cvec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[cvec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  %tmp44 = add i32 %ix0, 4 ; line:90 col:30
+  %row2col = shufflevector <4 x float> %tmp43, <4 x float> %tmp43, <4 x i32> <i32 0, i32 2, i32 1, i32 3> ; line:90 col:3
+  %tmp45 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:90 col:3
+  %tmp46 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp45) ; line:90 col:3
+  %tmp47 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp46, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:90 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp47, i32 %tmp44, <4 x float> %row2col) ; line:90 col:3
+
+
+  ; CHECK: [[ix:%.*]] = add i32 [[pix]], 4
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[lix:%.*]] = add i32 0, [[ix]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[lix:%.*]] = add i32 16, [[ix]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[mat:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  %tmp48 = add i32 %ix0, 4 ; line:100 col:82
+  %tmp49 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:100 col:45
+  %tmp50 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp49) ; line:100 col:45
+  %tmp51 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp50, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:100 col:45
+  %tmp52 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp51, i32 %tmp48) ; line:100 col:45
+  %tmp53 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp52, i32 0, i32 0 ; line:100 col:3
+  %tmp54 = load <4 x float>, <4 x float>* %tmp53 ; line:100 col:3
+  %tmp55 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp52, i32 0, i32 1 ; line:100 col:3
+  %tmp56 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp55) ; line:100 col:3
+
+  ; CHECK: [[ix:%.*]] = add i32 [[pix]], 5
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  ; CHECK: [[stix:%.*]] = add i32 [[ix]], 16
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[mat]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[mat]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[mat]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[mat]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  %tmp57 = add i32 %ix0, 5 ; line:100 col:40
+  %tmp58 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:100 col:3
+  %tmp59 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp58) ; line:100 col:3
+  %tmp60 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp59, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:100 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp60, i32 %tmp57, <4 x float> %tmp54) ; line:100 col:3
+  %tmp61 = add i32 %tmp57, 16 ; line:100 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp60, i32 %tmp61, <4 x float> %tmp56) ; line:100 col:3
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[pix]], i32 0, i8 3, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1
+  %tmp62 = load %"class.RWStructuredBuffer<vector<float, 2> >", %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" ; line:111 col:21
+  %tmp63 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" %tmp62) ; line:111 col:21
+  %tmp64 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp63, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" zeroinitializer) ; line:111 col:21
+  %tmp65 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp64, i32 %ix0) ; line:111 col:21
+  %tmp66 = load <2 x float>, <2 x float>* %tmp65 ; line:111 col:21
+
+  ; CHECK: [[stix:%.*]] = add i32 [[pix]], 1
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+  ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4)
+  %tmp67 = add i32 %ix0, 1 ; line:111 col:14
+  %tmp68 = load %"class.RWStructuredBuffer<vector<float, 2> >", %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" ; line:111 col:3
+  %tmp69 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" %tmp68) ; line:111 col:3
+  %tmp70 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp69, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" zeroinitializer) ; line:111 col:3
+  %tmp71 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp70, i32 %tmp67) ; line:111 col:3
+  store <2 x float> %tmp66, <2 x float>* %tmp71 ; line:111 col:19
+
+
+  ; CHECK: [[stix:%.*]] = add i32 [[pix]], 2
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<float [2]>"(i32 160, %"class.RWStructuredBuffer<float [2]>"
+  ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+  ; CHECK: [[lix:%.*]] = add i32 [[pix]], 1
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<float [2]>"(i32 160, %"class.RWStructuredBuffer<float [2]>"
+  ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 1, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float undef, float undef, float undef, i8 1, i32 4)
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 4, i8 1, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 4, float [[val0]], float undef, float undef, float undef, i8 1, i32 4)
+  %tmp72 = add i32 %ix0, 2 ; line:121 col:14
+  %tmp73 = load %"class.RWStructuredBuffer<float [2]>", %"class.RWStructuredBuffer<float [2]>"* @"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A" ; line:121 col:3
+  %tmp74 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 0, %"class.RWStructuredBuffer<float [2]>" %tmp73) ; line:121 col:3
+  %tmp75 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle %tmp74, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<float [2]>" zeroinitializer) ; line:121 col:3
+  %tmp76 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp75, i32 %tmp72) ; line:121 col:3
+  %tmp77 = add i32 %ix0, 1 ; line:121 col:32
+  %tmp78 = load %"class.RWStructuredBuffer<float [2]>", %"class.RWStructuredBuffer<float [2]>"* @"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A" ; line:121 col:21
+  %tmp79 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 0, %"class.RWStructuredBuffer<float [2]>" %tmp78) ; line:121 col:21
+  %tmp80 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle %tmp79, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<float [2]>" zeroinitializer) ; line:121 col:21
+  %tmp81 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp80, i32 %tmp77) ; line:121 col:21
+  %tmp82 = getelementptr inbounds [2 x float], [2 x float]* %tmp76, i32 0, i32 0 ; line:121 col:21
+  %tmp83 = getelementptr inbounds [2 x float], [2 x float]* %tmp81, i32 0, i32 0 ; line:121 col:21
+  %tmp84 = load float, float* %tmp83 ; line:121 col:21
+  store float %tmp84, float* %tmp82 ; line:121 col:21
+  %tmp85 = getelementptr inbounds [2 x float], [2 x float]* %tmp76, i32 0, i32 1 ; line:121 col:21
+  %tmp86 = getelementptr inbounds [2 x float], [2 x float]* %tmp81, i32 0, i32 1 ; line:121 col:21
+  %tmp87 = load float, float* %tmp86 ; line:121 col:21
+  store float %tmp87, float* %tmp85 ; line:121 col:21
+
+
+  ; CHECK: [[stix:%.*]] = add i32 [[pix]], 3
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<Vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<Vector<float, 2> >"
+  ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 })
+  ; CHECK: [[lix:%.*]] = add i32 [[pix]], 2
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<Vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<Vector<float, 2> >"
+  ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 16, i8 1, i32 8)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f64 [[ld]], 0
+  ; CHECK: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 16, double [[val0]], double undef, double undef, double undef, i8 1, i32 8)
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 24, i8 3, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 24, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4)
+  %tmp88 = add i32 %ix0, 3 ; line:131 col:15
+  %tmp89 = load %"class.RWStructuredBuffer<Vector<float, 2> >", %"class.RWStructuredBuffer<Vector<float, 2> >"* @"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A" ; line:131 col:3
+  %tmp90 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<Vector<float, 2> >" %tmp89) ; line:131 col:3
+  %tmp91 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp90, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer<Vector<float, 2> >" zeroinitializer) ; line:131 col:3
+  %tmp92 = call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp91, i32 %tmp88) ; line:131 col:3
+  %tmp93 = add i32 %ix0, 2 ; line:131 col:34
+  %tmp94 = load %"class.RWStructuredBuffer<Vector<float, 2> >", %"class.RWStructuredBuffer<Vector<float, 2> >"* @"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A" ; line:131 col:22
+  %tmp95 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<Vector<float, 2> >" %tmp94) ; line:131 col:22
+  %tmp96 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp95, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer<Vector<float, 2> >" zeroinitializer) ; line:131 col:22
+  %tmp97 = call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp96, i32 %tmp93) ; line:131 col:22
+  %tmp98 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp92, i32 0, i32 0 ; line:131 col:22
+  %tmp99 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp97, i32 0, i32 0 ; line:131 col:22
+  %tmp100 = load <4 x float>, <4 x float>* %tmp99 ; line:131 col:22
+  store <4 x float> %tmp100, <4 x float>* %tmp98 ; line:131 col:22
+  %tmp101 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp92, i32 0, i32 1 ; line:131 col:22
+  %tmp102 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp97, i32 0, i32 1 ; line:131 col:22
+  %tmp103 = load double, double* %tmp102 ; line:131 col:22
+  store double %tmp103, double* %tmp101 ; line:131 col:22
+  %tmp104 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp92, i32 0, i32 2 ; line:131 col:22
+  %tmp105 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp97, i32 0, i32 2 ; line:131 col:22
+  %tmp106 = load <2 x float>, <2 x float>* %tmp105 ; line:131 col:22
+  store <2 x float> %tmp106, <2 x float>* %tmp104 ; line:131 col:22
+
+
+  ; CHECK: [[stix:%.*]] = add i32 [[pix]], 4
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<matrix<float, 2, 2> >"(i32 160, %"class.RWStructuredBuffer<matrix<float, 2, 2> >"
+  ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 })
+  ; CHECK: [[lix:%.*]] = add i32 [[pix]], 3
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<matrix<float, 2, 2> >"(i32 160, %"class.RWStructuredBuffer<matrix<float, 2, 2> >"
+  ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  %tmp107 = add i32 %ix0, 4 ; line:141 col:14
+  %tmp108 = load %"class.RWStructuredBuffer<matrix<float, 2, 2> >", %"class.RWStructuredBuffer<matrix<float, 2, 2> >"* @"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:141 col:3
+  %tmp109 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<matrix<float, 2, 2> >" %tmp108) ; line:141 col:3
+  %tmp110 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp109, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer<matrix<float, 2, 2> >" zeroinitializer) ; line:141 col:3
+  %tmp111 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp110, i32 %tmp107) ; line:141 col:3
+  %tmp112 = add i32 %ix0, 3 ; line:141 col:32
+  %tmp113 = load %"class.RWStructuredBuffer<matrix<float, 2, 2> >", %"class.RWStructuredBuffer<matrix<float, 2, 2> >"* @"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:141 col:21
+  %tmp114 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<matrix<float, 2, 2> >" %tmp113) ; line:141 col:21
+  %tmp115 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp114, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer<matrix<float, 2, 2> >" zeroinitializer) ; line:141 col:21
+  %tmp116 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp115, i32 %tmp112) ; line:141 col:21
+  %tmp117 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp116) ; line:141 col:21
+  %tmp118 = call <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp111, <4 x float> %tmp117) ; line:141 col:19
+
+
+  ; CHECK: [[stix:%.*]] = add i32 [[pix]], 5
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<Matrix<float, 2, 2> >"(i32 160, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >"
+  ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 })
+  ; CHECK: [[lix:%.*]] = add i32 [[pix]], 4
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<Matrix<float, 2, 2> >"(i32 160, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >"
+  ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 16, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 16, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  %tmp119 = add i32 %ix0, 5 ; line:151 col:15
+  %tmp120 = load %"class.RWStructuredBuffer<Matrix<float, 2, 2> >", %"class.RWStructuredBuffer<Matrix<float, 2, 2> >"* @"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:151 col:3
+  %tmp121 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >" %tmp120) ; line:151 col:3
+  %tmp122 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp121, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >" zeroinitializer) ; line:151 col:3
+  %tmp123 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp122, i32 %tmp119) ; line:151 col:3
+  %tmp124 = add i32 %ix0, 4 ; line:151 col:34
+  %tmp125 = load %"class.RWStructuredBuffer<Matrix<float, 2, 2> >", %"class.RWStructuredBuffer<Matrix<float, 2, 2> >"* @"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:151 col:22
+  %tmp126 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >" %tmp125) ; line:151 col:22
+  %tmp127 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp126, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >" zeroinitializer) ; line:151 col:22
+  %tmp128 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp127, i32 %tmp124) ; line:151 col:22
+  %tmp129 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp123, i32 0, i32 0 ; line:151 col:22
+  %tmp130 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp128, i32 0, i32 0 ; line:151 col:22
+  %tmp131 = load <4 x float>, <4 x float>* %tmp130 ; line:151 col:22
+  store <4 x float> %tmp131, <4 x float>* %tmp129 ; line:151 col:22
+  %tmp132 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp123, i32 0, i32 1 ; line:151 col:22
+  %tmp133 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp128, i32 0, i32 1 ; line:151 col:22
+  %tmp134 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp133) ; line:151 col:22
+  %tmp135 = call <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp132, <4 x float> %tmp134) ; line:151 col:22
+
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer<vector<float, 2> >"(i32 160, %"class.ConsumeStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1)
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 3, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1
+  %tmp136 = load %"class.ConsumeStructuredBuffer<vector<float, 2> >", %"class.ConsumeStructuredBuffer<vector<float, 2> >"* @"\01?CVecBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$01@@@@A" ; line:159 col:18
+  %tmp137 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<vector<float, 2> >" %tmp136) ; line:159 col:18
+  %tmp138 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp137, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer<vector<float, 2> >" zeroinitializer) ; line:159 col:18
+  %tmp139 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp138) #0 ; line:159 col:18
+  %tmp140 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp138, i32 %tmp139) #0 ; line:159 col:18
+  %tmp141 = load <2 x float>, <2 x float>* %tmp140 ; line:159 col:18
+
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer<vector<float, 2> >"(i32 160, %"class.AppendStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1)
+  ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4)
+  %tmp142 = load %"class.AppendStructuredBuffer<vector<float, 2> >", %"class.AppendStructuredBuffer<vector<float, 2> >"* @"\01?AVecBuf@@3V?$AppendStructuredBuffer@V?$vector@M$01@@@@A" ; line:159 col:3
+  %tmp143 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<vector<float, 2> >" %tmp142) ; line:159 col:3
+  %tmp144 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp143, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer<vector<float, 2> >" zeroinitializer) ; line:159 col:3
+  %tmp145 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp144) #0 ; line:159 col:3
+  %tmp146 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp144, i32 %tmp145) #0 ; line:159 col:3
+  store <2 x float> %tmp141, <2 x float>* %tmp146 ; line:159 col:3
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer<float [2]>"(i32 160, %"class.ConsumeStructuredBuffer<float [2]>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1)
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 1, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 4, i8 1, i32 4)
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+
+  %tmp147 = load %"class.ConsumeStructuredBuffer<float [2]>", %"class.ConsumeStructuredBuffer<float [2]>"* @"\01?CArrBuf@@3V?$ConsumeStructuredBuffer@$$BY01M@@A" ; line:167 col:18
+  %tmp148 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<float [2]>\22)"(i32 0, %"class.ConsumeStructuredBuffer<float [2]>" %tmp147) ; line:167 col:18
+  %tmp149 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle %tmp148, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer<float [2]>" zeroinitializer) ; line:167 col:18
+  %tmp150 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp149) #0 ; line:167 col:18
+  %tmp151 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp149, i32 %tmp150) #0 ; line:167 col:18
+  %tmp152 = getelementptr inbounds [2 x float], [2 x float]* %tmp151, i32 0, i32 0 ; line:167 col:3
+  %tmp153 = load float, float* %tmp152 ; line:167 col:3
+  %tmp154 = getelementptr inbounds [2 x float], [2 x float]* %tmp151, i32 0, i32 1 ; line:167 col:3
+  %tmp155 = load float, float* %tmp154 ; line:167 col:3
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer<float [2]>"(i32 160, %"class.AppendStructuredBuffer<float [2]>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1)
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float undef, float undef, float undef, i8 1, i32 4)
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 4, float [[val1]], float undef, float undef, float undef, i8 1, i32 4)
+
+  %tmp156 = load %"class.AppendStructuredBuffer<float [2]>", %"class.AppendStructuredBuffer<float [2]>"* @"\01?AArrBuf@@3V?$AppendStructuredBuffer@$$BY01M@@A" ; line:167 col:3
+  %tmp157 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<float [2]>\22)"(i32 0, %"class.AppendStructuredBuffer<float [2]>" %tmp156) ; line:167 col:3
+  %tmp158 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle %tmp157, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer<float [2]>" zeroinitializer) ; line:167 col:3
+  %tmp159 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp158) #0 ; line:167 col:3
+  %tmp160 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp158, i32 %tmp159) #0 ; line:167 col:3
+  %tmp161 = getelementptr inbounds [2 x float], [2 x float]* %tmp160, i32 0, i32 0 ; line:167 col:3
+  store float %tmp153, float* %tmp161 ; line:167 col:3
+  %tmp162 = getelementptr inbounds [2 x float], [2 x float]* %tmp160, i32 0, i32 1 ; line:167 col:3
+  store float %tmp155, float* %tmp162 ; line:167 col:3
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer<Vector<float, 2> >"(i32 160, %"class.ConsumeStructuredBuffer<Vector<float, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37644, i32 32 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1)
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, i8 1, i32 8)
+  ; CHECK: [[dval:%.*]] = extractvalue %dx.types.ResRet.f64 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 24, i8 3, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1
+
+  %tmp163 = load %"class.ConsumeStructuredBuffer<Vector<float, 2> >", %"class.ConsumeStructuredBuffer<Vector<float, 2> >"* @"\01?CSVecBuf@@3V?$ConsumeStructuredBuffer@U?$Vector@M$01@@@@A" ; line:175 col:19
+  %tmp164 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<Vector<float, 2> >" %tmp163) ; line:175 col:19
+  %tmp165 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp164, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.ConsumeStructuredBuffer<Vector<float, 2> >" zeroinitializer) ; line:175 col:19
+  %tmp166 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp165) #0 ; line:175 col:19
+  %tmp167 = call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp165, i32 %tmp166) #0 ; line:175 col:19
+  %tmp168 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp167, i32 0, i32 0 ; line:175 col:3
+  %tmp169 = load <4 x float>, <4 x float>* %tmp168 ; line:175 col:3
+  %tmp170 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp167, i32 0, i32 1 ; line:175 col:3
+  %tmp171 = load double, double* %tmp170 ; line:175 col:3
+  %tmp172 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp167, i32 0, i32 2 ; line:175 col:3
+  %tmp173 = load <2 x float>, <2 x float>* %tmp172 ; line:175 col:3
+
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer<Vector<float, 2> >"(i32 160, %"class.AppendStructuredBuffer<Vector<float, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37644, i32 32 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1)
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  ; CHECK: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, double [[dval]], double undef, double undef, double undef, i8 1, i32 8)
+  ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 24, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4)
+  %tmp174 = load %"class.AppendStructuredBuffer<Vector<float, 2> >", %"class.AppendStructuredBuffer<Vector<float, 2> >"* @"\01?ASVecBuf@@3V?$AppendStructuredBuffer@U?$Vector@M$01@@@@A" ; line:175 col:3
+  %tmp175 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<Vector<float, 2> >" %tmp174) ; line:175 col:3
+  %tmp176 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp175, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.AppendStructuredBuffer<Vector<float, 2> >" zeroinitializer) ; line:175 col:3
+  %tmp177 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp176) #0 ; line:175 col:3
+  %tmp178 = call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp176, i32 %tmp177) #0 ; line:175 col:3
+  %tmp179 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp178, i32 0, i32 0 ; line:175 col:3
+  store <4 x float> %tmp169, <4 x float>* %tmp179 ; line:175 col:3
+  %tmp180 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp178, i32 0, i32 1 ; line:175 col:3
+  store double %tmp171, double* %tmp180 ; line:175 col:3
+  %tmp181 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp178, i32 0, i32 2 ; line:175 col:3
+  store <2 x float> %tmp173, <2 x float>* %tmp181 ; line:175 col:3
+
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer<matrix<float, 2, 2> >"(i32 160, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 16 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1)
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[rvec4:%.*]] = shufflevector <4 x float> [[vec4]], <4 x float> [[vec4]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  %tmp182 = load %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >", %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >"* @"\01?CMatBuf@@3V?$ConsumeStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:183 col:18
+  %tmp183 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >" %tmp182) ; line:183 col:18
+  %tmp184 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp183, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >" zeroinitializer) ; line:183 col:18
+  %tmp185 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp184) #0 ; line:183 col:18
+  %tmp186 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp184, i32 %tmp185) #0 ; line:183 col:18
+  %tmp187 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp186) ; line:183 col:18
+  %col2row10 = shufflevector <4 x float> %tmp187, <4 x float> %tmp187, <4 x i32> <i32 0, i32 2, i32 1, i32 3> ; line:183 col:18
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer<matrix<float, 2, 2> >"(i32 160, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 16 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1)
+  ; CHECK: [[cvec4:%.*]] = shufflevector <4 x float> [[rvec4]], <4 x float> [[rvec4]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[cvec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[cvec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[cvec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[cvec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+
+  %tmp188 = load %"class.AppendStructuredBuffer<matrix<float, 2, 2> >", %"class.AppendStructuredBuffer<matrix<float, 2, 2> >"* @"\01?AMatBuf@@3V?$AppendStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:183 col:3
+  %tmp189 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >" %tmp188) ; line:183 col:3
+  %tmp190 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp189, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >" zeroinitializer) ; line:183 col:3
+  %tmp191 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp190) #0 ; line:183 col:3
+  %tmp192 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp190, i32 %tmp191) #0 ; line:183 col:3
+  %row2col11 = shufflevector <4 x float> %col2row10, <4 x float> %col2row10, <4 x i32> <i32 0, i32 2, i32 1, i32 3> ; line:183 col:3
+  call void @"dx.hl.matldst.colStore.void (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp192, <4 x float> %row2col11) ; line:183 col:3
+
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >"(i32 160, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 32 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1)
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[mat:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  %tmp193 = load %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >", %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >"* @"\01?CSMatBuf@@3V?$ConsumeStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:191 col:19
+  %tmp194 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >" %tmp193) ; line:191 col:19
+  %tmp195 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp194, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >" zeroinitializer) ; line:191 col:19
+  %tmp196 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp195) #0 ; line:191 col:19
+  %tmp197 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp195, i32 %tmp196) #0 ; line:191 col:19
+  %tmp198 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp197, i32 0, i32 0 ; line:191 col:3
+  %tmp199 = load <4 x float>, <4 x float>* %tmp198 ; line:191 col:3
+  %tmp200 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp197, i32 0, i32 1 ; line:191 col:3
+  %tmp201 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp200) ; line:191 col:3
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer<Matrix<float, 2, 2> >"(i32 160, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 32 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1)
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[mat]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[mat]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[mat]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[mat]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+
+  %tmp202 = load %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >", %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >"* @"\01?ASMatBuf@@3V?$AppendStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:191 col:3
+  %tmp203 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >" %tmp202) ; line:191 col:3
+  %tmp204 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp203, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >" zeroinitializer) ; line:191 col:3
+  %tmp205 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp204) #0 ; line:191 col:3
+  %tmp206 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp204, i32 %tmp205) #0 ; line:191 col:3
+  %tmp207 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp206, i32 0, i32 0 ; line:191 col:3
+  store <4 x float> %tmp199, <4 x float>* %tmp207 ; line:191 col:3
+  %tmp208 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp206, i32 0, i32 1 ; line:191 col:3
+  %tmp209 = call <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp208, <4 x float> %tmp201) ; line:191 col:3
+
+
+  ; CHECK: ret void
+  ret void ; line:193 col:1
+}
+
+declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x i1>)"(i32, %dx.types.Handle, i32, <2 x i1>) #0
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #1
+declare <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32, %"class.RWStructuredBuffer<vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<float, 2> >") #1
+declare [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<float [2]>\22)"(i32, %"class.RWStructuredBuffer<float [2]>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<float [2]>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<float [2]>") #1
+declare %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32, %"class.RWStructuredBuffer<Vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<Vector<float, 2> >") #1
+declare %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %"class.RWStructuredBuffer<matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<matrix<float, 2, 2> >") #1
+declare %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >") #1
+declare i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32, %dx.types.Handle) #0
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<vector<float, 2> >\22)"(i32, %"class.AppendStructuredBuffer<vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer<vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<vector<float, 2> >\22)"(i32, %"class.ConsumeStructuredBuffer<vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer<vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<float [2]>\22)"(i32, %"class.AppendStructuredBuffer<float [2]>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<float [2]>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer<float [2]>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<float [2]>\22)"(i32, %"class.ConsumeStructuredBuffer<float [2]>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<float [2]>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer<float [2]>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<Vector<float, 2> >\22)"(i32, %"class.AppendStructuredBuffer<Vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<Vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer<Vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<Vector<float, 2> >\22)"(i32, %"class.ConsumeStructuredBuffer<Vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<Vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer<Vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >") #1
+declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32, %dx.types.Handle, i32, float) #0
+declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32, %dx.types.Handle, i32, <4 x float>) #0
+declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, double)"(i32, %dx.types.Handle, i32, double) #0
+declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32, %dx.types.Handle, i32, <2 x float>) #0
+declare <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32, %class.matrix.float.2.2*) #2
+declare <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32, %class.matrix.float.2.2*, <4 x float>) #0
+declare void @"dx.hl.matldst.colStore.void (i32, %class.matrix.float.2.2*, <4 x float>)"(i32, %class.matrix.float.2.2*, <4 x float>) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!3}
+!dx.valver = !{!4}
+!dx.shaderModel = !{!5}
+!dx.typeAnnotations = !{!6, !43}
+!dx.entryPoints = !{!50}
+!dx.fnprops = !{!72}
+!dx.options = !{!73, !74}
+
+!3 = !{i32 1, i32 6}
+!4 = !{i32 1, i32 9}
+!5 = !{!"vs", i32 6, i32 6}
+!6 = !{i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" undef, !7, %"class.RWStructuredBuffer<float [2]>" undef, !12, %"class.RWStructuredBuffer<Vector<float, 2> >" undef, !16, %"struct.Vector<float, 2>" undef, !21, %"class.RWStructuredBuffer<matrix<float, 2, 2> >" undef, !29, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >" undef, !35, %"struct.Matrix<float, 2, 2>" undef, !39, %"class.ConsumeStructuredBuffer<vector<float, 2> >" undef, !7, %"class.ConsumeStructuredBuffer<float [2]>" undef, !12, %"class.ConsumeStructuredBuffer<Vector<float, 2> >" undef, !16, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >" undef, !29, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >" undef, !35, %"class.AppendStructuredBuffer<vector<float, 2> >" undef, !7, %"class.AppendStructuredBuffer<float [2]>" undef, !12, %"class.AppendStructuredBuffer<Vector<float, 2> >" undef, !16, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >" undef, !29, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >" undef, !35}
+!7 = !{i32 8, !8, !9}
+!8 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9}
+!9 = !{i32 0, !10}
+!10 = !{!11}
+!11 = !{i32 0, <2 x float> undef}
+!12 = !{i32 20, !8, !13}
+!13 = !{i32 0, !14}
+!14 = !{!15}
+!15 = !{i32 0, [2 x float] undef}
+!16 = !{i32 32, !17, !18}
+!17 = !{i32 6, !"h", i32 3, i32 0}
+!18 = !{i32 0, !19}
+!19 = !{!20}
+!20 = !{i32 0, %"struct.Vector<float, 2>" undef}
+!21 = !{i32 32, !22, !23, !24, !25}
+!22 = !{i32 6, !"pad1", i32 3, i32 0, i32 7, i32 9}
+!23 = !{i32 6, !"pad2", i32 3, i32 16, i32 7, i32 10}
+!24 = !{i32 6, !"v", i32 3, i32 24, i32 7, i32 9}
+!25 = !{i32 0, !26}
+!26 = !{!27, !28}
+!27 = !{i32 0, float undef}
+!28 = !{i32 1, i64 2}
+!29 = !{i32 24, !30, !32}
+!30 = !{i32 6, !"h", i32 2, !31, i32 3, i32 0, i32 7, i32 9}
+!31 = !{i32 2, i32 2, i32 2}
+!32 = !{i32 0, !33}
+!33 = !{!34}
+!34 = !{i32 0, %class.matrix.float.2.2 undef}
+!35 = !{i32 40, !17, !36}
+!36 = !{i32 0, !37}
+!37 = !{!38}
+!38 = !{i32 0, %"struct.Matrix<float, 2, 2>" undef}
+!39 = !{i32 40, !22, !40, !41}
+!40 = !{i32 6, !"m", i32 2, !31, i32 3, i32 16, i32 7, i32 9}
+!41 = !{i32 0, !42}
+!42 = !{!27, !28, !28}
+!43 = !{i32 1, void (i32)* @main, !44}
+!44 = !{!45, !47}
+!45 = !{i32 1, !46, !46}
+!46 = !{}
+!47 = !{i32 0, !48, !49}
+!48 = !{i32 4, !"IX0", i32 7, i32 5}
+!49 = !{i32 0}
+!50 = !{void (i32)* @main, !"main", null, !51, null}
+!51 = !{null, !52, null, null}
+!52 = !{!53, !54, !56, !57, !59, !61, !62, !63, !64, !65, !66, !67, !68, !69, !70, !71}
+!53 = !{i32 0, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A", !"BabBuf", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!54 = !{i32 1, %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A", !"VecBuf", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !55}
+!55 = !{i32 1, i32 8}
+!56 = !{i32 2, %"class.RWStructuredBuffer<float [2]>"* @"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A", !"ArrBuf", i32 0, i32 3, i32 1, i32 12, i1 false, i1 false, i1 false, !55}
+!57 = !{i32 3, %"class.RWStructuredBuffer<Vector<float, 2> >"* @"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A", !"SVecBuf", i32 0, i32 4, i32 1, i32 12, i1 false, i1 false, i1 false, !58}
+!58 = !{i32 1, i32 32}
+!59 = !{i32 4, %"class.RWStructuredBuffer<matrix<float, 2, 2> >"* @"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A", !"MatBuf", i32 0, i32 5, i32 1, i32 12, i1 false, i1 false, i1 false, !60}
+!60 = !{i32 1, i32 16}
+!61 = !{i32 5, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >"* @"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A", !"SMatBuf", i32 0, i32 6, i32 1, i32 12, i1 false, i1 false, i1 false, !58}
+!62 = !{i32 6, %"class.ConsumeStructuredBuffer<vector<float, 2> >"* @"\01?CVecBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$01@@@@A", !"CVecBuf", i32 0, i32 7, i32 1, i32 12, i1 false, i1 false, i1 false, !55}
+!63 = !{i32 7, %"class.ConsumeStructuredBuffer<float [2]>"* @"\01?CArrBuf@@3V?$ConsumeStructuredBuffer@$$BY01M@@A", !"CArrBuf", i32 0, i32 8, i32 1, i32 12, i1 false, i1 false, i1 false, !55}
+!64 = !{i32 8, %"class.ConsumeStructuredBuffer<Vector<float, 2> >"* @"\01?CSVecBuf@@3V?$ConsumeStructuredBuffer@U?$Vector@M$01@@@@A", !"CSVecBuf", i32 0, i32 9, i32 1, i32 12, i1 false, i1 false, i1 false, !58}
+!65 = !{i32 9, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >"* @"\01?CMatBuf@@3V?$ConsumeStructuredBuffer@V?$matrix@M$01$01@@@@A", !"CMatBuf", i32 0, i32 10, i32 1, i32 12, i1 false, i1 false, i1 false, !60}
+!66 = !{i32 10, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >"* @"\01?CSMatBuf@@3V?$ConsumeStructuredBuffer@U?$Matrix@M$01$01@@@@A", !"CSMatBuf", i32 0, i32 11, i32 1, i32 12, i1 false, i1 false, i1 false, !58}
+!67 = !{i32 11, %"class.AppendStructuredBuffer<vector<float, 2> >"* @"\01?AVecBuf@@3V?$AppendStructuredBuffer@V?$vector@M$01@@@@A", !"AVecBuf", i32 0, i32 12, i32 1, i32 12, i1 false, i1 false, i1 false, !55}
+!68 = !{i32 12, %"class.AppendStructuredBuffer<float [2]>"* @"\01?AArrBuf@@3V?$AppendStructuredBuffer@$$BY01M@@A", !"AArrBuf", i32 0, i32 13, i32 1, i32 12, i1 false, i1 false, i1 false, !55}
+!69 = !{i32 13, %"class.AppendStructuredBuffer<Vector<float, 2> >"* @"\01?ASVecBuf@@3V?$AppendStructuredBuffer@U?$Vector@M$01@@@@A", !"ASVecBuf", i32 0, i32 14, i32 1, i32 12, i1 false, i1 false, i1 false, !58}
+!70 = !{i32 14, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >"* @"\01?AMatBuf@@3V?$AppendStructuredBuffer@V?$matrix@M$01$01@@@@A", !"AMatBuf", i32 0, i32 15, i32 1, i32 12, i1 false, i1 false, i1 false, !60}
+!71 = !{i32 15, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >"* @"\01?ASMatBuf@@3V?$AppendStructuredBuffer@U?$Matrix@M$01$01@@@@A", !"ASMatBuf", i32 0, i32 16, i32 1, i32 12, i1 false, i1 false, i1 false, !58}
+!72 = !{void (i32)* @main, i32 1}
+!73 = !{i32 64}
+!74 = !{i32 -1}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.hlsl
new file mode 100644
index 0000000000..9ff6039127
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.hlsl
@@ -0,0 +1,404 @@
+// RUN: %dxc -fcgl  -T vs_6_6 %s | FileCheck %s
+
+// Source file for DxilGen IR test for typed buffer store lowering
+// Focuses on converted types in addition to common float type.
+
+RWBuffer<float3>    FTyBuf;
+RWBuffer<bool2>     BTyBuf;
+RWBuffer<uint64_t2> LTyBuf;
+RWBuffer<double>    DTyBuf;
+
+RWTexture1D<float3>    FTex1d;
+RWTexture1D<bool2>     BTex1d;
+RWTexture1D<uint64_t2> LTex1d;
+RWTexture1D<double>    DTex1d;
+
+RWTexture2D<float3>    FTex2d;
+RWTexture2D<bool2>     BTex2d;
+RWTexture2D<uint64_t2> LTex2d;
+RWTexture2D<double>    DTex2d;
+
+RWTexture3D<float3>    FTex3d;
+RWTexture3D<bool2>     BTex3d;
+RWTexture3D<uint64_t2> LTex3d;
+RWTexture3D<double>    DTex3d;
+
+RWTexture2DMS<float3>    FTex2dMs;
+RWTexture2DMS<bool2>     BTex2dMs;
+RWTexture2DMS<uint64_t2> LTex2dMs;
+RWTexture2DMS<double>    DTex2dMs;
+
+// CHECK: define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3)
+void main(uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) {
+
+  // CHECK-DAG: [[ix3adr:%.*]] = alloca <3 x i32>, align 4
+  // CHECK-DAG: [[ix2adr:%.*]] = alloca <2 x i32>, align 4
+  // CHECK-DAG: [[ix1adr:%.*]] = alloca i32, align 4
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 0
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 0, %"class.RWBuffer<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 1
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 0, %"class.RWBuffer<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]]
+  FTyBuf[ix1 + 1] = FTyBuf[ix1 + 0];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 2
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]]
+  // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 3
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32>
+  // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]]
+  BTyBuf[ix1 + 3] = BTyBuf[ix1 + 2];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 4
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWBuffer<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 5
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWBuffer<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]]
+  LTyBuf[ix1 + 5] = LTyBuf[ix1 + 4];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 6
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<double>\22)"(i32 0, %"class.RWBuffer<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load double, double* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 7
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<double>\22)"(i32 0, %"class.RWBuffer<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: store double [[ld]], double* [[sub]]
+  DTyBuf[ix1 + 7] = DTyBuf[ix1 + 6];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 8
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture1D<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 9
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture1D<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]]
+  FTex1d[ix1 + 9] = FTex1d[ix1 + 8];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 10
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]]
+  // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 11
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32>
+  // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]]
+  BTex1d[ix1 + 11] = BTex1d[ix1 + 10];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 12
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 13
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]]
+  LTex1d[ix1 + 13] = LTex1d[ix1 + 12];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 14
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<double>\22)"(i32 0, %"class.RWTexture1D<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load double, double* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 15
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<double>\22)"(i32 0, %"class.RWTexture1D<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: store double [[ld]], double* [[sub]]
+  DTex1d[ix1 + 15] = DTex1d[ix1 + 14];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 16, i32 16>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture2D<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]]
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 17, i32 17>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture2D<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]]
+  FTex2d[ix2 + 17] = FTex2d[ix2 + 16];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 18, i32 18>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]]
+  // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 19, i32 19>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32>
+  // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]]
+  BTex2d[ix2 + 19] = BTex2d[ix2 + 18];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 20, i32 20>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]]
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 21, i32 21>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]]
+  LTex2d[ix2 + 21] = LTex2d[ix2 + 20];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 22, i32 22>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<double>\22)"(i32 0, %"class.RWTexture2D<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load double, double* [[sub]]
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 23, i32 23>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<double>\22)"(i32 0, %"class.RWTexture2D<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: store double [[ld]], double* [[sub]]
+  DTex2d[ix2 + 23] = DTex2d[ix2 + 22];
+
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 24, i32 24, i32 24>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture3D<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]]
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 25, i32 25, i32 25>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture3D<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]]
+  FTex3d[ix3 + 25] = FTex3d[ix3 + 24];
+
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 26, i32 26, i32 26>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]]
+  // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 27, i32 27, i32 27>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32>
+  // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]]
+  BTex3d[ix3 + 27] = BTex3d[ix3 + 26];
+
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 28, i32 28, i32 28>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]]
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 29, i32 29, i32 29>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]]
+  LTex3d[ix3 + 29] = LTex3d[ix3 + 28];
+
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 30, i32 30, i32 30>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<double>\22)"(i32 0, %"class.RWTexture3D<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load double, double* [[sub]]
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 31, i32 31, i32 31>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<double>\22)"(i32 0, %"class.RWTexture3D<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: store double [[ld]], double* [[sub]]
+  DTex3d[ix3 + 31] = DTex3d[ix3 + 30];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 32, i32 32>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]]
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 33, i32 33>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]]
+  FTex2dMs[ix2 + 33] = FTex2dMs[ix2 + 32];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 34, i32 34>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]]
+  // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 35, i32 35>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32>
+  // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]]
+  BTex2dMs[ix2 + 35] = BTex2dMs[ix2 + 34];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 36, i32 36>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]]
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 37, i32 37>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]]
+  LTex2dMs[ix2 + 37] = LTex2dMs[ix2 + 36];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 38, i32 38>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load double, double* [[sub]]
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 39, i32 39>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: store double [[ld]], double* [[sub]]
+  DTex2dMs[ix2 + 39] = DTex2dMs[ix2 + 38];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 0
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 40, i32 40>
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 1
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 41, i32 41>
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]]
+  FTex2dMs.sample[ix1 + 1][ix2 + 41] = FTex2dMs.sample[ix1 + 0][ix2 + 40];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 2
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 42, i32 42>
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]]
+  // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 3
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 43, i32 43>
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32>
+  // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]]
+  BTex2dMs.sample[ix1 + 3][ix2 + 43] = BTex2dMs.sample[ix1 + 2][ix2 + 42];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 4
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 44, i32 44>
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 5
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 45, i32 45>
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]]
+  LTex2dMs.sample[ix1 + 5][ix2 + 45] = LTex2dMs.sample[ix1 + 4][ix2 + 44];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 6
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 46, i32 46>
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: [[ld:%.*]] = load double, double* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 7
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 47, i32 47>
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: store double [[ld]], double* [[sub]]
+  DTex2dMs.sample[ix1 + 7][ix2 + 47] = DTex2dMs.sample[ix1 + 6][ix2 + 46];
+
+  // CHECK: ret void
+
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.ll
new file mode 100644
index 0000000000..ac5c6182e1
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.ll
@@ -0,0 +1,1079 @@
+; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%"class.RWBuffer<vector<float, 3> >" = type { <3 x float> }
+%"class.RWBuffer<vector<bool, 2> >" = type { <2 x i32> }
+%"class.RWBuffer<vector<unsigned long long, 2> >" = type { <2 x i64> }
+%"class.RWBuffer<double>" = type { double }
+%"class.RWTexture1D<vector<float, 3> >" = type { <3 x float> }
+%"class.RWTexture1D<vector<bool, 2> >" = type { <2 x i32> }
+%"class.RWTexture1D<vector<unsigned long long, 2> >" = type { <2 x i64> }
+%"class.RWTexture1D<double>" = type { double }
+%"class.RWTexture2D<vector<float, 3> >" = type { <3 x float> }
+%"class.RWTexture2D<vector<bool, 2> >" = type { <2 x i32> }
+%"class.RWTexture2D<vector<unsigned long long, 2> >" = type { <2 x i64> }
+%"class.RWTexture2D<double>" = type { double }
+%"class.RWTexture3D<vector<float, 3> >" = type { <3 x float> }
+%"class.RWTexture3D<vector<bool, 2> >" = type { <2 x i32> }
+%"class.RWTexture3D<vector<unsigned long long, 2> >" = type { <2 x i64> }
+%"class.RWTexture3D<double>" = type { double }
+%"class.RWTexture2DMS<vector<float, 3>, 0>" = type { <3 x float>, %"class.RWTexture2DMS<vector<float, 3>, 0>::sample_type" }
+%"class.RWTexture2DMS<vector<float, 3>, 0>::sample_type" = type { i32 }
+%"class.RWTexture2DMS<vector<bool, 2>, 0>" = type { <2 x i32>, %"class.RWTexture2DMS<vector<bool, 2>, 0>::sample_type" }
+%"class.RWTexture2DMS<vector<bool, 2>, 0>::sample_type" = type { i32 }
+%"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" = type { <2 x i64>, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>::sample_type" }
+%"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>::sample_type" = type { i32 }
+%"class.RWTexture2DMS<double, 0>" = type { double, %"class.RWTexture2DMS<double, 0>::sample_type" }
+%"class.RWTexture2DMS<double, 0>::sample_type" = type { i32 }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+
+@"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A" = external global %"class.RWBuffer<vector<float, 3> >", align 4
+@"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" = external global %"class.RWBuffer<vector<bool, 2> >", align 4
+@"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A" = external global %"class.RWBuffer<vector<unsigned long long, 2> >", align 8
+@"\01?DTyBuf@@3V?$RWBuffer@N@@A" = external global %"class.RWBuffer<double>", align 8
+@"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A" = external global %"class.RWTexture1D<vector<float, 3> >", align 4
+@"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A" = external global %"class.RWTexture1D<vector<bool, 2> >", align 4
+@"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A" = external global %"class.RWTexture1D<vector<unsigned long long, 2> >", align 8
+@"\01?DTex1d@@3V?$RWTexture1D@N@@A" = external global %"class.RWTexture1D<double>", align 8
+@"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A" = external global %"class.RWTexture2D<vector<float, 3> >", align 4
+@"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A" = external global %"class.RWTexture2D<vector<bool, 2> >", align 4
+@"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A" = external global %"class.RWTexture2D<vector<unsigned long long, 2> >", align 8
+@"\01?DTex2d@@3V?$RWTexture2D@N@@A" = external global %"class.RWTexture2D<double>", align 8
+@"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A" = external global %"class.RWTexture3D<vector<float, 3> >", align 4
+@"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A" = external global %"class.RWTexture3D<vector<bool, 2> >", align 4
+@"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A" = external global %"class.RWTexture3D<vector<unsigned long long, 2> >", align 8
+@"\01?DTex3d@@3V?$RWTexture3D@N@@A" = external global %"class.RWTexture3D<double>", align 8
+@"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A" = external global %"class.RWTexture2DMS<vector<float, 3>, 0>", align 4
+@"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A" = external global %"class.RWTexture2DMS<vector<bool, 2>, 0>", align 4
+@"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A" = external global %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>", align 8
+@"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A" = external global %"class.RWTexture2DMS<double, 0>", align 8
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3)
+define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3) #0 {
+bb:
+  ; CHECK: [[ix3_0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 0, i32 undef)
+  ; CHECK: [[ix3:%.*]] = insertelement <3 x i32> undef, i32 [[ix3_0]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 1, i32 undef)
+  ; CHECK: [[vec3:%.*]] = insertelement <3 x i32> [[ix3]], i32 [[ix3_1]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 2, i32 undef)
+  ; CHECK: [[ix3:%.*]] = insertelement <3 x i32> [[vec3]], i32 [[ix3_2]], i64 2
+  ; CHECK: [[ix2_0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
+  ; CHECK: [[vec2:%.*]] = insertelement <2 x i32> undef, i32 [[ix2_0]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 1, i32 undef)
+  ; CHECK: [[ix2:%.*]] = insertelement <2 x i32> [[vec2]], i32 [[ix2_1]], i64 1
+  ; CHECK: [[ix1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<float, 3> >"(i32 160, %"class.RWBuffer<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix1]], i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 1
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<float, 3> >"(i32 160, %"class.RWBuffer<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 })
+  ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15)
+  %tmp = load %"class.RWBuffer<vector<float, 3> >", %"class.RWBuffer<vector<float, 3> >"* @"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A"
+  %tmp1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 0, %"class.RWBuffer<vector<float, 3> >" %tmp)
+  %tmp2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer<vector<float, 3> >" zeroinitializer)
+  %tmp3 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp2, i32 %ix1)
+  %tmp4 = load <3 x float>, <3 x float>* %tmp3
+  %tmp5 = add i32 %ix1, 1
+  %tmp6 = load %"class.RWBuffer<vector<float, 3> >", %"class.RWBuffer<vector<float, 3> >"* @"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A"
+  %tmp7 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 0, %"class.RWBuffer<vector<float, 3> >" %tmp6)
+  %tmp8 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer<vector<float, 3> >" zeroinitializer)
+  %tmp9 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp8, i32 %tmp5)
+  store <3 x float> %tmp4, <3 x float>* %tmp9
+
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 2
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<bool, 2> >"(i32 160, %"class.RWBuffer<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1
+  ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 3
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<bool, 2> >"(i32 160, %"class.RWBuffer<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 })
+  ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32>
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1
+  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15)
+  %tmp10 = add i32 %ix1, 2
+  %tmp11 = load %"class.RWBuffer<vector<bool, 2> >", %"class.RWBuffer<vector<bool, 2> >"* @"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A"
+  %tmp12 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >" %tmp11)
+  %tmp13 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp12, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" zeroinitializer)
+  %tmp14 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp13, i32 %tmp10)
+  %tmp15 = load <2 x i32>, <2 x i32>* %tmp14
+  %tmp16 = icmp ne <2 x i32> %tmp15, zeroinitializer
+  %tmp17 = add i32 %ix1, 3
+  %tmp18 = load %"class.RWBuffer<vector<bool, 2> >", %"class.RWBuffer<vector<bool, 2> >"* @"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A"
+  %tmp19 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >" %tmp18)
+  %tmp20 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp19, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" zeroinitializer)
+  %tmp21 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp20, i32 %tmp17)
+  %tmp22 = zext <2 x i1> %tmp16 to <2 x i32>
+  store <2 x i32> %tmp22, <2 x i32>* %tmp21
+
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 4
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<unsigned long long, 2> >"(i32 160, %"class.RWBuffer<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3
+  ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0
+  ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 5
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<unsigned long long, 2> >"(i32 160, %"class.RWBuffer<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 })
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1
+  ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32
+  ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32
+  ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32
+  ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32
+  ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32
+  ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32
+  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15)
+  %tmp23 = add i32 %ix1, 4
+  %tmp24 = load %"class.RWBuffer<vector<unsigned long long, 2> >", %"class.RWBuffer<vector<unsigned long long, 2> >"* @"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A"
+  %tmp25 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWBuffer<vector<unsigned long long, 2> >" %tmp24)
+  %tmp26 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp25, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp27 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp26, i32 %tmp23)
+  %tmp28 = load <2 x i64>, <2 x i64>* %tmp27
+  %tmp29 = add i32 %ix1, 5
+  %tmp30 = load %"class.RWBuffer<vector<unsigned long long, 2> >", %"class.RWBuffer<vector<unsigned long long, 2> >"* @"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A"
+  %tmp31 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWBuffer<vector<unsigned long long, 2> >" %tmp30)
+  %tmp32 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp31, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp33 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp32, i32 %tmp29)
+  store <2 x i64> %tmp28, <2 x i64>* %tmp33
+
+
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 6
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<double>"(i32 160, %"class.RWBuffer<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]])
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 7
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<double>"(i32 160, %"class.RWBuffer<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 })
+  ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]])
+  ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0
+  ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1
+  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15)
+  %tmp34 = add i32 %ix1, 6
+  %tmp35 = load %"class.RWBuffer<double>", %"class.RWBuffer<double>"* @"\01?DTyBuf@@3V?$RWBuffer@N@@A"
+  %tmp36 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<double>\22)"(i32 0, %"class.RWBuffer<double>" %tmp35)
+  %tmp37 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<double>\22)"(i32 14, %dx.types.Handle %tmp36, %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer<double>" zeroinitializer)
+  %tmp38 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp37, i32 %tmp34)
+  %tmp39 = load double, double* %tmp38
+  %tmp40 = add i32 %ix1, 7
+  %tmp41 = load %"class.RWBuffer<double>", %"class.RWBuffer<double>"* @"\01?DTyBuf@@3V?$RWBuffer@N@@A"
+  %tmp42 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<double>\22)"(i32 0, %"class.RWBuffer<double>" %tmp41)
+  %tmp43 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<double>\22)"(i32 14, %dx.types.Handle %tmp42, %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer<double>" zeroinitializer)
+  %tmp44 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp43, i32 %tmp40)
+  store double %tmp39, double* %tmp44
+
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 8
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<vector<float, 3> >"(i32 160, %"class.RWTexture1D<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 9
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<vector<float, 3> >"(i32 160, %"class.RWTexture1D<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 })
+  ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2
+  ; CHECK: call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15)
+  %tmp45 = add i32 %ix1, 8
+  %tmp46 = load %"class.RWTexture1D<vector<float, 3> >", %"class.RWTexture1D<vector<float, 3> >"* @"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A"
+  %tmp47 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture1D<vector<float, 3> >" %tmp46)
+  %tmp48 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp47, %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D<vector<float, 3> >" zeroinitializer)
+  %tmp49 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp48, i32 %tmp45)
+  %tmp50 = load <3 x float>, <3 x float>* %tmp49
+  %tmp51 = add i32 %ix1, 9
+  %tmp52 = load %"class.RWTexture1D<vector<float, 3> >", %"class.RWTexture1D<vector<float, 3> >"* @"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A"
+  %tmp53 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture1D<vector<float, 3> >" %tmp52)
+  %tmp54 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp53, %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D<vector<float, 3> >" zeroinitializer)
+  %tmp55 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp54, i32 %tmp51)
+  store <3 x float> %tmp50, <3 x float>* %tmp55
+
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 10
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<vector<bool, 2> >"(i32 160, %"class.RWTexture1D<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1
+  ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 11
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<vector<bool, 2> >"(i32 160, %"class.RWTexture1D<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 })
+  ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32>
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15)
+  %tmp56 = add i32 %ix1, 10
+  %tmp57 = load %"class.RWTexture1D<vector<bool, 2> >", %"class.RWTexture1D<vector<bool, 2> >"* @"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A"
+  %tmp58 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<bool, 2> >" %tmp57)
+  %tmp59 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp58, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<bool, 2> >" zeroinitializer)
+  %tmp60 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp59, i32 %tmp56)
+  %tmp61 = load <2 x i32>, <2 x i32>* %tmp60
+  %tmp62 = icmp ne <2 x i32> %tmp61, zeroinitializer
+  %tmp63 = add i32 %ix1, 11
+  %tmp64 = load %"class.RWTexture1D<vector<bool, 2> >", %"class.RWTexture1D<vector<bool, 2> >"* @"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A"
+  %tmp65 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<bool, 2> >" %tmp64)
+  %tmp66 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp65, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<bool, 2> >" zeroinitializer)
+  %tmp67 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp66, i32 %tmp63)
+  %tmp68 = zext <2 x i1> %tmp62 to <2 x i32>
+  store <2 x i32> %tmp68, <2 x i32>* %tmp67
+
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 12
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<vector<unsigned long long, 2> >"(i32 160, %"class.RWTexture1D<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3
+  ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0
+  ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 13
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<vector<unsigned long long, 2> >"(i32 160, %"class.RWTexture1D<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 })
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1
+  ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32
+  ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32
+  ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32
+  ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32
+  ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32
+  ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15)
+  %tmp69 = add i32 %ix1, 12
+  %tmp70 = load %"class.RWTexture1D<vector<unsigned long long, 2> >", %"class.RWTexture1D<vector<unsigned long long, 2> >"* @"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A"
+  %tmp71 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<unsigned long long, 2> >" %tmp70)
+  %tmp72 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp71, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp73 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp72, i32 %tmp69)
+  %tmp74 = load <2 x i64>, <2 x i64>* %tmp73
+  %tmp75 = add i32 %ix1, 13
+  %tmp76 = load %"class.RWTexture1D<vector<unsigned long long, 2> >", %"class.RWTexture1D<vector<unsigned long long, 2> >"* @"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A"
+  %tmp77 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<unsigned long long, 2> >" %tmp76)
+  %tmp78 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp77, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp79 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp78, i32 %tmp75)
+  store <2 x i64> %tmp74, <2 x i64>* %tmp79
+
+
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 14
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<double>"(i32 160, %"class.RWTexture1D<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]])
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 15
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<double>"(i32 160, %"class.RWTexture1D<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 })
+  ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]])
+  ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0
+  ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15)
+  %tmp80 = add i32 %ix1, 14
+  %tmp81 = load %"class.RWTexture1D<double>", %"class.RWTexture1D<double>"* @"\01?DTex1d@@3V?$RWTexture1D@N@@A"
+  %tmp82 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<double>\22)"(i32 0, %"class.RWTexture1D<double>" %tmp81)
+  %tmp83 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<double>\22)"(i32 14, %dx.types.Handle %tmp82, %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D<double>" zeroinitializer)
+  %tmp84 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp83, i32 %tmp80)
+  %tmp85 = load double, double* %tmp84
+  %tmp86 = add i32 %ix1, 15
+  %tmp87 = load %"class.RWTexture1D<double>", %"class.RWTexture1D<double>"* @"\01?DTex1d@@3V?$RWTexture1D@N@@A"
+  %tmp88 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<double>\22)"(i32 0, %"class.RWTexture1D<double>" %tmp87)
+  %tmp89 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<double>\22)"(i32 14, %dx.types.Handle %tmp88, %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D<double>" zeroinitializer)
+  %tmp90 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp89, i32 %tmp86)
+  store double %tmp85, double* %tmp90
+
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 16, i32 16>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<vector<float, 3> >"(i32 160, %"class.RWTexture2D<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 17, i32 17>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<vector<float, 3> >"(i32 160, %"class.RWTexture2D<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2
+  ; CHECK: call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15)
+  %tmp91 = add <2 x i32> %ix2, <i32 16, i32 16>
+  %tmp92 = load %"class.RWTexture2D<vector<float, 3> >", %"class.RWTexture2D<vector<float, 3> >"* @"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A"
+  %tmp93 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture2D<vector<float, 3> >" %tmp92)
+  %tmp94 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp93, %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D<vector<float, 3> >" zeroinitializer)
+  %tmp95 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp94, <2 x i32> %tmp91)
+  %tmp96 = load <3 x float>, <3 x float>* %tmp95
+  %tmp97 = add <2 x i32> %ix2, <i32 17, i32 17>
+  %tmp98 = load %"class.RWTexture2D<vector<float, 3> >", %"class.RWTexture2D<vector<float, 3> >"* @"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A"
+  %tmp99 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture2D<vector<float, 3> >" %tmp98)
+  %tmp100 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp99, %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D<vector<float, 3> >" zeroinitializer)
+  %tmp101 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp100, <2 x i32> %tmp97)
+  store <3 x float> %tmp96, <3 x float>* %tmp101
+
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 18, i32 18>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<vector<bool, 2> >"(i32 160, %"class.RWTexture2D<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1
+  ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 19, i32 19>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<vector<bool, 2> >"(i32 160, %"class.RWTexture2D<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 })
+  ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15)
+  %tmp102 = add <2 x i32> %ix2, <i32 18, i32 18>
+  %tmp103 = load %"class.RWTexture2D<vector<bool, 2> >", %"class.RWTexture2D<vector<bool, 2> >"* @"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A"
+  %tmp104 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<bool, 2> >" %tmp103)
+  %tmp105 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp104, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<bool, 2> >" zeroinitializer)
+  %tmp106 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp105, <2 x i32> %tmp102)
+  %tmp107 = load <2 x i32>, <2 x i32>* %tmp106
+  %tmp108 = icmp ne <2 x i32> %tmp107, zeroinitializer
+  %tmp109 = add <2 x i32> %ix2, <i32 19, i32 19>
+  %tmp110 = load %"class.RWTexture2D<vector<bool, 2> >", %"class.RWTexture2D<vector<bool, 2> >"* @"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A"
+  %tmp111 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<bool, 2> >" %tmp110)
+  %tmp112 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp111, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<bool, 2> >" zeroinitializer)
+  %tmp113 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp112, <2 x i32> %tmp109)
+  %tmp114 = zext <2 x i1> %tmp108 to <2 x i32>
+  store <2 x i32> %tmp114, <2 x i32>* %tmp113
+
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 20, i32 20>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<vector<unsigned long long, 2> >"(i32 160, %"class.RWTexture2D<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3
+  ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0
+  ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 21, i32 21>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<vector<unsigned long long, 2> >"(i32 160, %"class.RWTexture2D<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1
+  ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32
+  ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32
+  ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32
+  ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32
+  ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32
+  ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15)
+  %tmp115 = add <2 x i32> %ix2, <i32 20, i32 20>
+  %tmp116 = load %"class.RWTexture2D<vector<unsigned long long, 2> >", %"class.RWTexture2D<vector<unsigned long long, 2> >"* @"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A"
+  %tmp117 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<unsigned long long, 2> >" %tmp116)
+  %tmp118 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp117, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp119 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp118, <2 x i32> %tmp115)
+  %tmp120 = load <2 x i64>, <2 x i64>* %tmp119
+  %tmp121 = add <2 x i32> %ix2, <i32 21, i32 21>
+  %tmp122 = load %"class.RWTexture2D<vector<unsigned long long, 2> >", %"class.RWTexture2D<vector<unsigned long long, 2> >"* @"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A"
+  %tmp123 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<unsigned long long, 2> >" %tmp122)
+  %tmp124 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp123, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp125 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp124, <2 x i32> %tmp121)
+  store <2 x i64> %tmp120, <2 x i64>* %tmp125
+
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 22, i32 22>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<double>"(i32 160, %"class.RWTexture2D<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]])
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 23, i32 23>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<double>"(i32 160, %"class.RWTexture2D<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]])
+  ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0
+  ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15)
+  %tmp126 = add <2 x i32> %ix2, <i32 22, i32 22>
+  %tmp127 = load %"class.RWTexture2D<double>", %"class.RWTexture2D<double>"* @"\01?DTex2d@@3V?$RWTexture2D@N@@A"
+  %tmp128 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<double>\22)"(i32 0, %"class.RWTexture2D<double>" %tmp127)
+  %tmp129 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<double>\22)"(i32 14, %dx.types.Handle %tmp128, %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D<double>" zeroinitializer)
+  %tmp130 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp129, <2 x i32> %tmp126)
+  %tmp131 = load double, double* %tmp130
+  %tmp132 = add <2 x i32> %ix2, <i32 23, i32 23>
+  %tmp133 = load %"class.RWTexture2D<double>", %"class.RWTexture2D<double>"* @"\01?DTex2d@@3V?$RWTexture2D@N@@A"
+  %tmp134 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<double>\22)"(i32 0, %"class.RWTexture2D<double>" %tmp133)
+  %tmp135 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<double>\22)"(i32 14, %dx.types.Handle %tmp134, %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D<double>" zeroinitializer)
+  %tmp136 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp135, <2 x i32> %tmp132)
+  store double %tmp131, double* %tmp136
+
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 24, i32 24, i32 24>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<vector<float, 3> >"(i32 160, %"class.RWTexture3D<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 })
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 25, i32 25, i32 25>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<vector<float, 3> >"(i32 160, %"class.RWTexture3D<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 })
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2
+  ; CHECK: call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15)
+  %tmp137 = add <3 x i32> %ix3, <i32 24, i32 24, i32 24>
+  %tmp138 = load %"class.RWTexture3D<vector<float, 3> >", %"class.RWTexture3D<vector<float, 3> >"* @"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A"
+  %tmp139 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture3D<vector<float, 3> >" %tmp138)
+  %tmp140 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp139, %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D<vector<float, 3> >" zeroinitializer)
+  %tmp141 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp140, <3 x i32> %tmp137)
+  %tmp142 = load <3 x float>, <3 x float>* %tmp141
+  %tmp143 = add <3 x i32> %ix3, <i32 25, i32 25, i32 25>
+  %tmp144 = load %"class.RWTexture3D<vector<float, 3> >", %"class.RWTexture3D<vector<float, 3> >"* @"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A"
+  %tmp145 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture3D<vector<float, 3> >" %tmp144)
+  %tmp146 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp145, %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D<vector<float, 3> >" zeroinitializer)
+  %tmp147 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp146, <3 x i32> %tmp143)
+  store <3 x float> %tmp142, <3 x float>* %tmp147
+
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 26, i32 26, i32 26>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<vector<bool, 2> >"(i32 160, %"class.RWTexture3D<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 })
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1
+  ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 27, i32 27, i32 27>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<vector<bool, 2> >"(i32 160, %"class.RWTexture3D<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 })
+  ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32>
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15)
+  %tmp148 = add <3 x i32> %ix3, <i32 26, i32 26, i32 26>
+  %tmp149 = load %"class.RWTexture3D<vector<bool, 2> >", %"class.RWTexture3D<vector<bool, 2> >"* @"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A"
+  %tmp150 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<bool, 2> >" %tmp149)
+  %tmp151 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp150, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<bool, 2> >" zeroinitializer)
+  %tmp152 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp151, <3 x i32> %tmp148)
+  %tmp153 = load <2 x i32>, <2 x i32>* %tmp152
+  %tmp154 = icmp ne <2 x i32> %tmp153, zeroinitializer
+  %tmp155 = add <3 x i32> %ix3, <i32 27, i32 27, i32 27>
+  %tmp156 = load %"class.RWTexture3D<vector<bool, 2> >", %"class.RWTexture3D<vector<bool, 2> >"* @"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A"
+  %tmp157 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<bool, 2> >" %tmp156)
+  %tmp158 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp157, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<bool, 2> >" zeroinitializer)
+  %tmp159 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp158, <3 x i32> %tmp155)
+  %tmp160 = zext <2 x i1> %tmp154 to <2 x i32>
+  store <2 x i32> %tmp160, <2 x i32>* %tmp159
+
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 28, i32 28, i32 28>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<vector<unsigned long long, 2> >"(i32 160, %"class.RWTexture3D<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 })
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3
+  ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0
+  ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 29, i32 29, i32 29>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<vector<unsigned long long, 2> >"(i32 160, %"class.RWTexture3D<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 })
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1
+  ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32
+  ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32
+  ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32
+  ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32
+  ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32
+  ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15)
+  %tmp161 = add <3 x i32> %ix3, <i32 28, i32 28, i32 28>
+  %tmp162 = load %"class.RWTexture3D<vector<unsigned long long, 2> >", %"class.RWTexture3D<vector<unsigned long long, 2> >"* @"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A"
+  %tmp163 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<unsigned long long, 2> >" %tmp162)
+  %tmp164 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp163, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp165 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp164, <3 x i32> %tmp161)
+  %tmp166 = load <2 x i64>, <2 x i64>* %tmp165
+  %tmp167 = add <3 x i32> %ix3, <i32 29, i32 29, i32 29>
+  %tmp168 = load %"class.RWTexture3D<vector<unsigned long long, 2> >", %"class.RWTexture3D<vector<unsigned long long, 2> >"* @"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A"
+  %tmp169 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<unsigned long long, 2> >" %tmp168)
+  %tmp170 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp169, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp171 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp170, <3 x i32> %tmp167)
+  store <2 x i64> %tmp166, <2 x i64>* %tmp171
+
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 30, i32 30, i32 30>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<double>"(i32 160, %"class.RWTexture3D<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 })
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]])
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 31, i32 31, i32 31>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<double>"(i32 160, %"class.RWTexture3D<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 })
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]])
+  ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0
+  ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15)
+  %tmp172 = add <3 x i32> %ix3, <i32 30, i32 30, i32 30>
+  %tmp173 = load %"class.RWTexture3D<double>", %"class.RWTexture3D<double>"* @"\01?DTex3d@@3V?$RWTexture3D@N@@A"
+  %tmp174 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<double>\22)"(i32 0, %"class.RWTexture3D<double>" %tmp173)
+  %tmp175 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<double>\22)"(i32 14, %dx.types.Handle %tmp174, %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D<double>" zeroinitializer)
+  %tmp176 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp175, <3 x i32> %tmp172)
+  %tmp177 = load double, double* %tmp176
+  %tmp178 = add <3 x i32> %ix3, <i32 31, i32 31, i32 31>
+  %tmp179 = load %"class.RWTexture3D<double>", %"class.RWTexture3D<double>"* @"\01?DTex3d@@3V?$RWTexture3D@N@@A"
+  %tmp180 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<double>\22)"(i32 0, %"class.RWTexture3D<double>" %tmp179)
+  %tmp181 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<double>\22)"(i32 14, %dx.types.Handle %tmp180, %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D<double>" zeroinitializer)
+  %tmp182 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp181, <3 x i32> %tmp178)
+  store double %tmp177, double* %tmp182
+
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 32, i32 32>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<float, 3>, 0>"(i32 160, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 33, i32 33>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<float, 3>, 0>"(i32 160, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2
+  ; CHECK: call void @dx.op.textureStoreSample.f32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 0)
+  %tmp183 = add <2 x i32> %ix2, <i32 32, i32 32>
+  %tmp184 = load %"class.RWTexture2DMS<vector<float, 3>, 0>", %"class.RWTexture2DMS<vector<float, 3>, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A"
+  %tmp185 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>" %tmp184)
+  %tmp186 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle %tmp185, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" zeroinitializer)
+  %tmp187 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp186, <2 x i32> %tmp183)
+  %tmp188 = load <3 x float>, <3 x float>* %tmp187
+  %tmp189 = add <2 x i32> %ix2, <i32 33, i32 33>
+  %tmp190 = load %"class.RWTexture2DMS<vector<float, 3>, 0>", %"class.RWTexture2DMS<vector<float, 3>, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A"
+  %tmp191 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>" %tmp190)
+  %tmp192 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle %tmp191, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" zeroinitializer)
+  %tmp193 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp192, <2 x i32> %tmp189)
+  store <3 x float> %tmp188, <3 x float>* %tmp193
+
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 34, i32 34>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<bool, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1
+  ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 35, i32 35>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<bool, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1
+  ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15, i32 0)
+  %tmp194 = add <2 x i32> %ix2, <i32 34, i32 34>
+  %tmp195 = load %"class.RWTexture2DMS<vector<bool, 2>, 0>", %"class.RWTexture2DMS<vector<bool, 2>, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A"
+  %tmp196 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>" %tmp195)
+  %tmp197 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp196, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" zeroinitializer)
+  %tmp198 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp197, <2 x i32> %tmp194)
+  %tmp199 = load <2 x i32>, <2 x i32>* %tmp198
+  %tmp200 = icmp ne <2 x i32> %tmp199, zeroinitializer
+  %tmp201 = add <2 x i32> %ix2, <i32 35, i32 35>
+  %tmp202 = load %"class.RWTexture2DMS<vector<bool, 2>, 0>", %"class.RWTexture2DMS<vector<bool, 2>, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A"
+  %tmp203 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>" %tmp202)
+  %tmp204 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp203, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" zeroinitializer)
+  %tmp205 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp204, <2 x i32> %tmp201)
+  %tmp206 = zext <2 x i1> %tmp200 to <2 x i32>
+  store <2 x i32> %tmp206, <2 x i32>* %tmp205
+
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 36, i32 36>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3
+  ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0
+  ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 37, i32 37>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1
+  ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32
+  ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32
+  ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32
+  ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32
+  ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32
+  ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32
+  ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15, i32 0)
+
+  %tmp207 = add <2 x i32> %ix2, <i32 36, i32 36>
+  %tmp208 = load %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>", %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A"
+  %tmp209 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" %tmp208)
+  %tmp210 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp209, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" zeroinitializer)
+  %tmp211 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp210, <2 x i32> %tmp207)
+  %tmp212 = load <2 x i64>, <2 x i64>* %tmp211
+  %tmp213 = add <2 x i32> %ix2, <i32 37, i32 37>
+  %tmp214 = load %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>", %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A"
+  %tmp215 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" %tmp214)
+  %tmp216 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp215, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" zeroinitializer)
+  %tmp217 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp216, <2 x i32> %tmp213)
+  store <2 x i64> %tmp212, <2 x i64>* %tmp217
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 38, i32 38>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<double, 0>"(i32 160, %"class.RWTexture2DMS<double, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]])
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 39, i32 39>
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<double, 0>"(i32 160, %"class.RWTexture2DMS<double, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]])
+  ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0
+  ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1
+  ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15, i32 0)
+
+  %tmp218 = add <2 x i32> %ix2, <i32 38, i32 38>
+  %tmp219 = load %"class.RWTexture2DMS<double, 0>", %"class.RWTexture2DMS<double, 0>"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A"
+  %tmp220 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>" %tmp219)
+  %tmp221 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle %tmp220, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" zeroinitializer)
+  %tmp222 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp221, <2 x i32> %tmp218)
+  %tmp223 = load double, double* %tmp222
+  %tmp224 = add <2 x i32> %ix2, <i32 39, i32 39>
+  %tmp225 = load %"class.RWTexture2DMS<double, 0>", %"class.RWTexture2DMS<double, 0>"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A"
+  %tmp226 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>" %tmp225)
+  %tmp227 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle %tmp226, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" zeroinitializer)
+  %tmp228 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp227, <2 x i32> %tmp224)
+  store double %tmp223, double* %tmp228
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<float, 3>, 0>"(i32 160, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 40, i32 40>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 [[ix1]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 1
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<float, 3>, 0>"(i32 160, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 41, i32 41>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2
+  ; CHECK: call void @dx.op.textureStoreSample.f32(i32 225, %dx.types.Handle %388, i32 %389, i32 %390, i32 undef, float %392, float %393, float %394, float %391, i8 15, i32 %tmp235)
+  %tmp229 = load %"class.RWTexture2DMS<vector<float, 3>, 0>", %"class.RWTexture2DMS<vector<float, 3>, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A"
+  %tmp230 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>" %tmp229)
+  %tmp231 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle %tmp230, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" zeroinitializer)
+  %tmp232 = add <2 x i32> %ix2, <i32 40, i32 40>
+  %tmp233 = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp231, <2 x i32> %tmp232, i32 %ix1)
+  %tmp234 = load <3 x float>, <3 x float>* %tmp233
+  %tmp235 = add i32 %ix1, 1
+  %tmp236 = load %"class.RWTexture2DMS<vector<float, 3>, 0>", %"class.RWTexture2DMS<vector<float, 3>, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A"
+  %tmp237 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>" %tmp236)
+  %tmp238 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle %tmp237, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" zeroinitializer)
+  %tmp239 = add <2 x i32> %ix2, <i32 41, i32 41>
+  %tmp240 = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp238, <2 x i32> %tmp239, i32 %tmp235)
+  store <3 x float> %tmp234, <3 x float>* %tmp240
+
+  ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 2
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<bool, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 42, i32 42>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 [[sax]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1
+  ; CHECK: %tmp248 = icmp ne <2 x i32> %402, zeroinitializer
+  ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 3
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<bool, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 43, i32 43>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: %407 = extractelement <2 x i32> %tmp255, i64 0
+  ; CHECK: %408 = extractelement <2 x i32> %tmp255, i64 0
+  ; CHECK: %409 = extractelement <2 x i32> %tmp255, i64 1
+  ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle %404, i32 %405, i32 %406, i32 undef, i32 %408, i32 %409, i32 %407, i32 %407, i8 15, i32 %tmp249)
+  ; CHECK: %tmp255 = zext <2 x i1> %tmp248 to <2 x i32>
+  %tmp241 = add i32 %ix1, 2
+  %tmp242 = load %"class.RWTexture2DMS<vector<bool, 2>, 0>", %"class.RWTexture2DMS<vector<bool, 2>, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A"
+  %tmp243 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>" %tmp242)
+  %tmp244 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp243, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" zeroinitializer)
+  %tmp245 = add <2 x i32> %ix2, <i32 42, i32 42>
+  %tmp246 = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp244, <2 x i32> %tmp245, i32 %tmp241)
+  %tmp247 = load <2 x i32>, <2 x i32>* %tmp246
+  %tmp248 = icmp ne <2 x i32> %tmp247, zeroinitializer
+  %tmp249 = add i32 %ix1, 3
+  %tmp250 = load %"class.RWTexture2DMS<vector<bool, 2>, 0>", %"class.RWTexture2DMS<vector<bool, 2>, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A"
+  %tmp251 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>" %tmp250)
+  %tmp252 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp251, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" zeroinitializer)
+  %tmp253 = add <2 x i32> %ix2, <i32 43, i32 43>
+  %tmp254 = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp252, <2 x i32> %tmp253, i32 %tmp249)
+  %tmp255 = zext <2 x i1> %tmp248 to <2 x i32>
+  store <2 x i32> %tmp255, <2 x i32>* %tmp254
+
+  ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 4
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 44, i32 44>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 [[sax]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3
+  ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0
+  ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1
+  ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 5
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 45, i32 45>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1
+  ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32
+  ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32
+  ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32
+  ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32
+  ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32
+  ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32
+  ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15, i32 [[sax]])
+  %tmp256 = add i32 %ix1, 4
+  %tmp257 = load %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>", %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A"
+  %tmp258 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" %tmp257)
+  %tmp259 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp258, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" zeroinitializer)
+  %tmp260 = add <2 x i32> %ix2, <i32 44, i32 44>
+  %tmp261 = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp259, <2 x i32> %tmp260, i32 %tmp256)
+  %tmp262 = load <2 x i64>, <2 x i64>* %tmp261
+  %tmp263 = add i32 %ix1, 5
+  %tmp264 = load %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>", %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A"
+  %tmp265 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" %tmp264)
+  %tmp266 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp265, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" zeroinitializer)
+  %tmp267 = add <2 x i32> %ix2, <i32 45, i32 45>
+  %tmp268 = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp266, <2 x i32> %tmp267, i32 %tmp263)
+  store <2 x i64> %tmp262, <2 x i64>* %tmp268
+
+  ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 6
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<double, 0>"(i32 160, %"class.RWTexture2DMS<double, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 46, i32 46>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 [[sax]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: %447 = call double @dx.op.makeDouble.f64(i32 101, i32 %445, i32 %446)
+  ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 7
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<double, 0>"(i32 160, %"class.RWTexture2DMS<double, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 47, i32 47>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: %452 = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %447)
+  ; CHECK: %453 = extractvalue %dx.types.splitdouble %452, 0
+  ; CHECK: %454 = extractvalue %dx.types.splitdouble %452, 1
+  ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle %449, i32 %450, i32 %451, i32 undef, i32 %453, i32 %454, i32 %453, i32 %454, i8 15, i32 %tmp276)
+  %tmp269 = add i32 %ix1, 6
+  %tmp270 = load %"class.RWTexture2DMS<double, 0>", %"class.RWTexture2DMS<double, 0>"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A"
+  %tmp271 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>" %tmp270)
+  %tmp272 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle %tmp271, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" zeroinitializer)
+  %tmp273 = add <2 x i32> %ix2, <i32 46, i32 46>
+  %tmp274 = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp272, <2 x i32> %tmp273, i32 %tmp269)
+  %tmp275 = load double, double* %tmp274
+  %tmp276 = add i32 %ix1, 7
+  %tmp277 = load %"class.RWTexture2DMS<double, 0>", %"class.RWTexture2DMS<double, 0>"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A"
+  %tmp278 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>" %tmp277)
+  %tmp279 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle %tmp278, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" zeroinitializer)
+  %tmp280 = add <2 x i32> %ix2, <i32 47, i32 47>
+  %tmp281 = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp279, <2 x i32> %tmp280, i32 %tmp276)
+  store double %tmp275, double* %tmp281
+
+
+  ; CHECK: ret void
+  ret void
+}
+
+
+declare <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 3> >\22)"(i32, %"class.RWBuffer<vector<float, 3> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 3> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer<vector<float, 3> >") #1
+declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32, %"class.RWBuffer<vector<bool, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer<vector<bool, 2> >") #1
+declare <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32, %"class.RWBuffer<vector<unsigned long long, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer<vector<unsigned long long, 2> >") #1
+declare double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<double>\22)"(i32, %"class.RWBuffer<double>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<double>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer<double>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32, %"class.RWTexture1D<vector<float, 3> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D<vector<float, 3> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32, %"class.RWTexture1D<vector<bool, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D<vector<bool, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32, %"class.RWTexture1D<vector<unsigned long long, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D<vector<unsigned long long, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<double>\22)"(i32, %"class.RWTexture1D<double>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<double>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D<double>") #1
+declare <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32, %"class.RWTexture2D<vector<float, 3> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D<vector<float, 3> >") #1
+declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32, %"class.RWTexture2D<vector<bool, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D<vector<bool, 2> >") #1
+declare <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32, %"class.RWTexture2D<vector<unsigned long long, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D<vector<unsigned long long, 2> >") #1
+declare double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<double>\22)"(i32, %"class.RWTexture2D<double>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<double>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D<double>") #1
+declare <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32, %"class.RWTexture3D<vector<float, 3> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D<vector<float, 3> >") #1
+declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32, %"class.RWTexture3D<vector<bool, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D<vector<bool, 2> >") #1
+declare <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32, %"class.RWTexture3D<vector<unsigned long long, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D<vector<unsigned long long, 2> >") #1
+declare double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<double>\22)"(i32, %"class.RWTexture3D<double>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<double>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D<double>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32, %"class.RWTexture2DMS<vector<float, 3>, 0>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS<vector<float, 3>, 0>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32, %"class.RWTexture2DMS<vector<bool, 2>, 0>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS<vector<bool, 2>, 0>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32, %"class.RWTexture2DMS<double, 0>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS<double, 0>") #1
+declare <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1
+declare <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1
+declare <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1
+declare double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!dx.version = !{!3}
+!dx.valver = !{!4}
+!dx.shaderModel = !{!5}
+!dx.typeAnnotations = !{!6}
+!dx.entryPoints = !{!19}
+!dx.fnprops = !{!44}
+!dx.options = !{!45, !46}
+
+!3 = !{i32 1, i32 6}
+!4 = !{i32 1, i32 9}
+!5 = !{!"vs", i32 6, i32 6}
+!6 = !{i32 1, void (i32, <2 x i32>, <3 x i32>)* @main, !7}
+!7 = !{!8, !10, !13, !16}
+!8 = !{i32 1, !9, !9}
+!9 = !{}
+!10 = !{i32 0, !11, !12}
+!11 = !{i32 4, !"IX1", i32 7, i32 5}
+!12 = !{i32 1}
+!13 = !{i32 0, !14, !15}
+!14 = !{i32 4, !"IX2", i32 7, i32 5}
+!15 = !{i32 2}
+!16 = !{i32 0, !17, !18}
+!17 = !{i32 4, !"IX3", i32 7, i32 5}
+!18 = !{i32 3}
+!19 = !{void (i32, <2 x i32>, <3 x i32>)* @main, !"main", null, !20, null}
+!20 = !{null, !21, null, null}
+!21 = !{!22, !24, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43}
+!22 = !{i32 0, %"class.RWBuffer<vector<float, 3> >"* @"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A", !"FTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !23}
+!23 = !{i32 0, i32 9}
+!24 = !{i32 1, %"class.RWBuffer<vector<bool, 2> >"* @"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A", !"BTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !25}
+!25 = !{i32 0, i32 5}
+!26 = !{i32 2, %"class.RWBuffer<vector<unsigned long long, 2> >"* @"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A", !"LTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !25}
+!27 = !{i32 3, %"class.RWBuffer<double>"* @"\01?DTyBuf@@3V?$RWBuffer@N@@A", !"DTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !25}
+!28 = !{i32 4, %"class.RWTexture1D<vector<float, 3> >"* @"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A", !"FTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !23}
+!29 = !{i32 5, %"class.RWTexture1D<vector<bool, 2> >"* @"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A", !"BTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !25}
+!30 = !{i32 6, %"class.RWTexture1D<vector<unsigned long long, 2> >"* @"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A", !"LTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !25}
+!31 = !{i32 7, %"class.RWTexture1D<double>"* @"\01?DTex1d@@3V?$RWTexture1D@N@@A", !"DTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !25}
+!32 = !{i32 8, %"class.RWTexture2D<vector<float, 3> >"* @"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A", !"FTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !23}
+!33 = !{i32 9, %"class.RWTexture2D<vector<bool, 2> >"* @"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A", !"BTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !25}
+!34 = !{i32 10, %"class.RWTexture2D<vector<unsigned long long, 2> >"* @"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A", !"LTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !25}
+!35 = !{i32 11, %"class.RWTexture2D<double>"* @"\01?DTex2d@@3V?$RWTexture2D@N@@A", !"DTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !25}
+!36 = !{i32 12, %"class.RWTexture3D<vector<float, 3> >"* @"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A", !"FTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !23}
+!37 = !{i32 13, %"class.RWTexture3D<vector<bool, 2> >"* @"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A", !"BTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !25}
+!38 = !{i32 14, %"class.RWTexture3D<vector<unsigned long long, 2> >"* @"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A", !"LTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !25}
+!39 = !{i32 15, %"class.RWTexture3D<double>"* @"\01?DTex3d@@3V?$RWTexture3D@N@@A", !"DTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !25}
+!40 = !{i32 16, %"class.RWTexture2DMS<vector<float, 3>, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A", !"FTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !23}
+!41 = !{i32 17, %"class.RWTexture2DMS<vector<bool, 2>, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A", !"BTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !25}
+!42 = !{i32 18, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A", !"LTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !25}
+!43 = !{i32 19, %"class.RWTexture2DMS<double, 0>"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A", !"DTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !25}
+!44 = !{void (i32, <2 x i32>, <3 x i32>)* @main, i32 1}
+!45 = !{i32 64}
+!46 = !{i32 -1}

From c5f62d93c18ab5aa4ad6c5fa5288d3f445aa1f03 Mon Sep 17 00:00:00 2001
From: Simon Moll <smoll@nvidia.com>
Date: Tue, 25 Mar 2025 17:19:26 +0100
Subject: [PATCH 46/88] [SER] Patch 1: HitObject type lowering and SM 6.9
 enablement (#7097)

Reduction of the complete SER implementation to just the HitObject type
and its default constructor.

This has most of the infrastructure changes in DXC to support SER, eg
static member functions for builtins, HitObject scalar type.

Specification PR: https://github.com/microsoft/hlsl-specs/pull/277
---
 include/dxc/DXIL/DxilUtil.h                   |   2 +
 include/dxc/HlslIntrinsicOp.h                 |   4 +-
 include/dxc/dxcapi.internal.h                 |   5 +-
 lib/DXIL/DxilUtil.cpp                         |  21 +
 lib/HLSL/HLOperationLower.cpp                 |  24 +
 tools/clang/include/clang/AST/HlslTypes.h     |   3 +
 tools/clang/include/clang/Basic/Attr.td       |   8 +
 .../clang/Basic/DiagnosticSemaKinds.td        |   9 +
 tools/clang/lib/AST/ASTContextHLSL.cpp        |  52 +-
 tools/clang/lib/AST/HlslTypes.cpp             |   4 +
 tools/clang/lib/CodeGen/CGHLSLMS.cpp          |   8 +-
 tools/clang/lib/CodeGen/CodeGenTypes.cpp      |  11 +-
 tools/clang/lib/Sema/SemaExpr.cpp             |  11 +-
 tools/clang/lib/Sema/SemaHLSL.cpp             | 443 +++++++++++++-----
 tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp   |  91 +++-
 tools/clang/lib/Sema/SemaOverload.cpp         |   8 +-
 .../DXC/Passes/DxilGen/hitobject_dxilgen.ll   | 101 ++++
 .../Passes/DxilGen/maybereorder_dxilgen.ll    | 106 +++++
 .../objects/HitObject/hitobject_make.hlsl     |  12 +
 .../objects/HitObject/hitobject_make_ast.hlsl |  24 +
 .../hlsl/objects/HitObject/maybereorder.hlsl  |  13 +
 .../objects/HitObject/maybereorder_ast.hlsl   |  28 ++
 .../intrinsics/reorder/hitobject_reorder.hlsl |  10 +
 .../reorder/reorder-entry-errors.hlsl         |  62 +++
 .../reorder/reorder-unavailable-pre-sm69.hlsl |   9 +
 .../hlsl/namespace/dx-namespace-pre-sm69.hlsl |   8 +
 .../HitObject/hitobject-entry-errors.hlsl     |  58 +++
 .../HitObject/hitobject-in-buffer.hlsl        |   4 +
 .../hitobject-unavailable-pre-sm69.hlsl       |  11 +
 .../HitObject/hitobject-unsupported-vs.hlsl   |   8 +
 .../HitObject/hitobject-using-namespace.hlsl  |  36 ++
 .../hitobject-without-namespace.hlsl          |  39 ++
 .../maybereorderthread-without-namespace.hlsl |  31 ++
 utils/hct/gen_intrin_main.txt                 |  11 +
 utils/hct/hctdb.py                            |  39 +-
 utils/hct/hctdb_instrhelp.py                  |  12 +-
 utils/hct/hlsl_intrinsic_opcodes.json         |   6 +-
 37 files changed, 1174 insertions(+), 158 deletions(-)
 create mode 100644 tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll
 create mode 100644 tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll
 create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl
 create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl
 create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl
 create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/hitobject_reorder.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-entry-errors.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-unavailable-pre-sm69.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/namespace/dx-namespace-pre-sm69.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-entry-errors.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unavailable-pre-sm69.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unsupported-vs.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-using-namespace.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-without-namespace.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/maybereorderthread-without-namespace.hlsl

diff --git a/include/dxc/DXIL/DxilUtil.h b/include/dxc/DXIL/DxilUtil.h
index 490f335db5..5652c56f50 100644
--- a/include/dxc/DXIL/DxilUtil.h
+++ b/include/dxc/DXIL/DxilUtil.h
@@ -162,6 +162,8 @@ GetHLSLResourceProperties(llvm::Type *Ty);
 bool IsHLSLResourceType(llvm::Type *Ty);
 bool IsHLSLObjectType(llvm::Type *Ty);
 bool IsHLSLRayQueryType(llvm::Type *Ty);
+llvm::Type *GetHLSLHitObjectType(llvm::Module *M);
+bool IsHLSLHitObjectType(llvm::Type *Ty);
 bool IsHLSLResourceDescType(llvm::Type *Ty);
 bool IsResourceSingleComponent(llvm::Type *Ty);
 uint8_t GetResourceComponentCount(llvm::Type *Ty);
diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h
index 41c72d1a51..90f3fafd79 100644
--- a/include/dxc/HlslIntrinsicOp.h
+++ b/include/dxc/HlslIntrinsicOp.h
@@ -333,6 +333,8 @@ enum class IntrinsicOp {
   MOP_TraceRayInline = 325,
   MOP_WorldRayDirection = 326,
   MOP_WorldRayOrigin = 327,
+  MOP_DxHitObject_MakeNop = 358,
+  IOP_DxMaybeReorderThread = 359,
   MOP_Count = 328,
   MOP_FinishedCrossGroupSharing = 329,
   MOP_GetGroupNodeOutputRecords = 330,
@@ -364,7 +366,7 @@ enum class IntrinsicOp {
   IOP_usign = 355,
   MOP_InterlockedUMax = 356,
   MOP_InterlockedUMin = 357,
-  Num_Intrinsics = 358,
+  Num_Intrinsics = 360,
 };
 inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) {
   switch (opcode) {
diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h
index 4b8e237201..bf8a040673 100644
--- a/include/dxc/dxcapi.internal.h
+++ b/include/dxc/dxcapi.internal.h
@@ -126,7 +126,9 @@ enum LEGAL_INTRINSIC_COMPTYPES {
   LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS = 49,
   LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS = 50,
 
-  LICOMPTYPE_COUNT = 51
+  LICOMPTYPE_HIT_OBJECT = 51,
+
+  LICOMPTYPE_COUNT = 52
 };
 
 static const BYTE IA_SPECIAL_BASE = 0xf0;
@@ -164,6 +166,7 @@ struct HLSL_INTRINSIC_ARGUMENT {
 static const UINT INTRIN_FLAG_READ_ONLY = 1U << 0;
 static const UINT INTRIN_FLAG_READ_NONE = 1U << 1;
 static const UINT INTRIN_FLAG_IS_WAVE = 1U << 2;
+static const UINT INTRIN_FLAG_STATIC_MEMBER = 1U << 3;
 
 struct HLSL_INTRINSIC {
   UINT Op;                 // Intrinsic Op ID
diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp
index 865fad487c..0a4fb1160a 100644
--- a/lib/DXIL/DxilUtil.cpp
+++ b/lib/DXIL/DxilUtil.cpp
@@ -574,6 +574,9 @@ bool IsHLSLObjectType(llvm::Type *Ty) {
 
     if (IsHLSLNodeIOType(Ty))
       return true;
+
+    if (IsHLSLHitObjectType(Ty))
+      return true;
   }
   return false;
 }
@@ -591,6 +594,24 @@ bool IsHLSLRayQueryType(llvm::Type *Ty) {
   return false;
 }
 
+llvm::Type *GetHLSLHitObjectType(llvm::Module *M) {
+  using namespace llvm;
+  StructType *HitObjectTy = M->getTypeByName("dx.types.HitObject");
+  if (!HitObjectTy)
+    HitObjectTy = StructType::create({Type::getInt8PtrTy(M->getContext(), 0)},
+                                     "dx.types.HitObject", false);
+  return HitObjectTy;
+}
+
+bool IsHLSLHitObjectType(llvm::Type *Ty) {
+  llvm::StructType *ST = dyn_cast<llvm::StructType>(Ty);
+  if (!ST)
+    return false;
+  if (!ST->hasName())
+    return false;
+  return ST->getName() == "dx.types.HitObject";
+}
+
 bool IsHLSLResourceDescType(llvm::Type *Ty) {
   if (llvm::StructType *ST = dyn_cast<llvm::StructType>(Ty)) {
     if (!ST->hasName())
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index 5a0dadf7f4..3ab1f9fdec 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -6062,6 +6062,24 @@ Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 
 } // namespace
 
+// Shader Execution Reordering.
+namespace {
+Value *TranslateHitObjectMake(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
+                              HLOperationLowerHelper &helper,
+                              HLObjectOperationLowerHelper *pObjHelper,
+                              bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateMaybeReorderThread(CallInst *CI, IntrinsicOp IOP,
+                                   OP::OpCode opcode,
+                                   HLOperationLowerHelper &helper,
+                                   HLObjectOperationLowerHelper *pObjHelper,
+                                   bool &Translated) {
+  return nullptr; // TODO: Merge SER DXIL patches
+}
+} // namespace
+
 // Resource Handle.
 namespace {
 Value *TranslateGetHandleFromHeap(CallInst *CI, IntrinsicOp IOP,
@@ -6794,6 +6812,12 @@ IntrinsicLower gLowerTable[] = {
      DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation,
      DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMake,
+     DXIL::OpCode::NumOpCodes_Dxil_1_8}, // FIXME: Just a placeholder Dxil
+                                         // opcode
+    {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread,
+     DXIL::OpCode::NumOpCodes_Dxil_1_8}, // FIXME: Just a placeholder Dxil
+                                         // opcode
 };
 } // namespace
 static_assert(
diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h
index e6a50de8fb..3b517576fe 100644
--- a/tools/clang/include/clang/AST/HlslTypes.h
+++ b/tools/clang/include/clang/AST/HlslTypes.h
@@ -391,6 +391,7 @@ clang::CXXRecordDecl *
 DeclareConstantBufferViewType(clang::ASTContext &context,
                               clang::InheritableAttr *Attr);
 clang::CXXRecordDecl *DeclareRayQueryType(clang::ASTContext &context);
+clang::CXXRecordDecl *DeclareHitObjectType(clang::NamespaceDecl &NSDecl);
 clang::CXXRecordDecl *DeclareResourceType(clang::ASTContext &context,
                                           bool bSampler);
 
@@ -472,6 +473,7 @@ bool IsHLSLNodeInputType(clang::QualType type);
 bool IsHLSLDynamicResourceType(clang::QualType type);
 bool IsHLSLDynamicSamplerType(clang::QualType type);
 bool IsHLSLNodeType(clang::QualType type);
+bool IsHLSLHitObjectType(clang::QualType type);
 
 bool IsHLSLObjectWithImplicitMemberAccess(clang::QualType type);
 bool IsHLSLObjectWithImplicitROMemberAccess(clang::QualType type);
@@ -545,6 +547,7 @@ clang::CXXMethodDecl *CreateObjectFunctionDeclarationWithParams(
     clang::QualType resultType, llvm::ArrayRef<clang::QualType> paramTypes,
     llvm::ArrayRef<clang::StringRef> paramNames,
     clang::DeclarationName declarationName, bool isConst,
+    clang::StorageClass SC = clang::StorageClass::SC_None,
     bool isTemplateFunction = false);
 
 DXIL::ResourceClass GetResourceClassForType(const clang::ASTContext &context,
diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td
index 3afbaa91c7..48193f7077 100644
--- a/tools/clang/include/clang/Basic/Attr.td
+++ b/tools/clang/include/clang/Basic/Attr.td
@@ -1157,6 +1157,14 @@ def HLSLRayQueryObject : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+// HLSL HitObject Attribute
+
+def HLSLHitObject : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+}
+
 // HLSL Parameter Attributes
 
 def HLSLMaxRecords : InheritableAttr {
diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 16ff7777a7..6ae59cac14 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7665,6 +7665,9 @@ def err_hlsl_unsupported_builtin_op: Error<
 def warn_hlsl_builtin_constant_unavailable: Warning<
   "potential misuse of built-in constant %0 in shader model %1; introduced"
   " in shader model %2">, InGroup<HLSLAvailabilityConstant>;
+def warn_hlsl_builtin_type_unavailable: Warning<
+  "potential misuse of built-in type %0 in shader model %1; introduced"
+  " in shader model %2">, DefaultError, InGroup<HLSLAvailability>;
 def err_hlsl_unsupported_char_literal : Error<
   "unsupported style of char literal - use a single-character char-based literal">;
 def err_hlsl_unsupported_clipplane_argument_expression : Error<
@@ -7991,6 +7994,12 @@ def warn_hlsl_legacy_integer_literal_signedness: Warning<
   InGroup<HLSLLegacyLiterals>, DefaultIgnore;
 def err_hlsl_unsupported_semantic_index: Error<
   "'%0' is defined with semantic index %1, but only values 0 through %2 are supported">;
+
+// Shader Execution Reordering
+def err_hlsl_reorder_unsupported_stage : Error<
+   "dx::MaybeReorderThread is unavailable in shader stage '%0' (requires 'raygeneration')">;
+def err_hlsl_hitobject_unsupported_stage : Error<
+   "dx::HitObject is unavailable in shader stage '%0' (requires 'raygeneration', 'closesthit' or 'miss')">;
 // HLSL Change Ends
 
 // SPIRV Change Starts
diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp
index 1b6c346acd..dcd3e89e9a 100644
--- a/tools/clang/lib/AST/ASTContextHLSL.cpp
+++ b/tools/clang/lib/AST/ASTContextHLSL.cpp
@@ -23,6 +23,7 @@
 #include "clang/AST/ExternalASTSource.h"
 #include "clang/AST/HlslBuiltinTypeDeclBuilder.h"
 #include "clang/AST/TypeLoc.h"
+#include "clang/Basic/Specifiers.h"
 #include "clang/Sema/Overload.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaDiagnostic.h"
@@ -1070,7 +1071,7 @@ static void CreateConstructorDeclaration(
 static void CreateObjectFunctionDeclaration(
     ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType,
     ArrayRef<QualType> args, DeclarationName declarationName, bool isConst,
-    CXXMethodDecl **functionDecl, TypeSourceInfo **tinfo) {
+    StorageClass SC, CXXMethodDecl **functionDecl, TypeSourceInfo **tinfo) {
   DXASSERT_NOMSG(recordDecl != nullptr);
   DXASSERT_NOMSG(functionDecl != nullptr);
 
@@ -1082,8 +1083,8 @@ static void CreateObjectFunctionDeclaration(
   *tinfo = context.getTrivialTypeSourceInfo(functionQT, NoLoc);
   DXASSERT_NOMSG(*tinfo != nullptr);
   *functionDecl = CXXMethodDecl::Create(
-      context, recordDecl, NoLoc, declNameInfo, functionQT, *tinfo,
-      StorageClass::SC_None, InlineSpecifiedFalse, IsConstexprFalse, NoLoc);
+      context, recordDecl, NoLoc, declNameInfo, functionQT, *tinfo, SC,
+      InlineSpecifiedFalse, IsConstexprFalse, NoLoc);
   DXASSERT_NOMSG(*functionDecl != nullptr);
   (*functionDecl)->setLexicalDeclContext(recordDecl);
   (*functionDecl)->setAccess(AccessSpecifier::AS_public);
@@ -1092,7 +1093,8 @@ static void CreateObjectFunctionDeclaration(
 CXXMethodDecl *hlsl::CreateObjectFunctionDeclarationWithParams(
     ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType,
     ArrayRef<QualType> paramTypes, ArrayRef<StringRef> paramNames,
-    DeclarationName declarationName, bool isConst, bool isTemplateFunction) {
+    DeclarationName declarationName, bool isConst, StorageClass SC,
+    bool isTemplateFunction) {
   DXASSERT_NOMSG(recordDecl != nullptr);
   DXASSERT_NOMSG(!resultType.isNull());
   DXASSERT_NOMSG(paramTypes.size() == paramNames.size());
@@ -1100,7 +1102,7 @@ CXXMethodDecl *hlsl::CreateObjectFunctionDeclarationWithParams(
   TypeSourceInfo *tinfo;
   CXXMethodDecl *functionDecl;
   CreateObjectFunctionDeclaration(context, recordDecl, resultType, paramTypes,
-                                  declarationName, isConst, &functionDecl,
+                                  declarationName, isConst, SC, &functionDecl,
                                   &tinfo);
 
   // Create and associate parameters to method.
@@ -1215,6 +1217,46 @@ CXXRecordDecl *hlsl::DeclareRayQueryType(ASTContext &context) {
   return typeDeclBuilder.getRecordDecl();
 }
 
+CXXRecordDecl *hlsl::DeclareHitObjectType(NamespaceDecl &NSDecl) {
+  ASTContext &Context = NSDecl.getASTContext();
+  // HitObject { ... }
+  BuiltinTypeDeclBuilder TypeDeclBuilder(&NSDecl, "HitObject");
+  TypeDeclBuilder.startDefinition();
+
+  // Add handle to mark as HLSL object.
+  TypeDeclBuilder.addField("h", GetHLSLObjectHandleType(Context));
+  CXXRecordDecl *RecordDecl = TypeDeclBuilder.getRecordDecl();
+
+  CanQualType canQualType = Context.getCanonicalType(
+      Context.getRecordType(TypeDeclBuilder.getRecordDecl()));
+
+  // Add constructor that will be lowered to MOP_HitObject_MakeNop.
+  CXXConstructorDecl *pConstructorDecl = nullptr;
+  TypeSourceInfo *pTypeSourceInfo = nullptr;
+  CreateConstructorDeclaration(
+      Context, RecordDecl, Context.VoidTy, {},
+      Context.DeclarationNames.getCXXConstructorName(canQualType), false,
+      &pConstructorDecl, &pTypeSourceInfo);
+  RecordDecl->addDecl(pConstructorDecl);
+  pConstructorDecl->addAttr(HLSLIntrinsicAttr::CreateImplicit(
+      Context, "op", "",
+      static_cast<int>(hlsl::IntrinsicOp::MOP_DxHitObject_MakeNop)));
+  pConstructorDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(Context));
+
+  // Add AvailabilityAttribute for SM6.9+
+  VersionTuple VT69 = VersionTuple(6, 9);
+  RecordDecl->addAttr(ConstructAvailabilityAttribute(Context, VT69));
+
+  // Add the implicit HLSLHitObjectAttr attribute to unambiguously recognize the
+  // builtin HitObject type.
+  RecordDecl->addAttr(HLSLHitObjectAttr::CreateImplicit(Context));
+  RecordDecl->setImplicit(true);
+
+  // Add to namespace
+  RecordDecl->setDeclContext(&NSDecl);
+  return RecordDecl;
+}
+
 CXXRecordDecl *hlsl::DeclareResourceType(ASTContext &context, bool bSampler) {
   // struct ResourceDescriptor { uint8 desc; }
   StringRef Name = bSampler ? ".Sampler" : ".Resource";
diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp
index 630e969881..8f9460ce63 100644
--- a/tools/clang/lib/AST/HlslTypes.cpp
+++ b/tools/clang/lib/AST/HlslTypes.cpp
@@ -507,6 +507,10 @@ bool IsHLSLResourceType(clang::QualType type) {
   return false;
 }
 
+bool IsHLSLHitObjectType(QualType type) {
+  return nullptr != getAttr<HLSLHitObjectAttr>(type);
+}
+
 DXIL::NodeIOKind GetNodeIOType(clang::QualType type) {
   if (const HLSLNodeObjectAttr *Attr = getAttr<HLSLNodeObjectAttr>(type))
     return Attr->getNodeIOType();
diff --git a/tools/clang/lib/CodeGen/CGHLSLMS.cpp b/tools/clang/lib/CodeGen/CGHLSLMS.cpp
index 29ed954425..b041db95a7 100644
--- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp
+++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp
@@ -2500,9 +2500,11 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
 
     // Type annotation for this pointer.
     if (const CXXMethodDecl *MFD = dyn_cast<CXXMethodDecl>(FD)) {
-      const CXXRecordDecl *RD = MFD->getParent();
-      QualType Ty = CGM.getContext().getTypeDeclType(RD);
-      AddTypeAnnotation(Ty, dxilTypeSys, arrayEltSize);
+      if (!MFD->isStatic()) {
+        const CXXRecordDecl *RD = MFD->getParent();
+        QualType Ty = CGM.getContext().getTypeDeclType(RD);
+        AddTypeAnnotation(Ty, dxilTypeSys, arrayEltSize);
+      }
     }
 
     for (const ValueDecl *param : FD->params()) {
diff --git a/tools/clang/lib/CodeGen/CodeGenTypes.cpp b/tools/clang/lib/CodeGen/CodeGenTypes.cpp
index d11575d359..82328c8fb5 100644
--- a/tools/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/tools/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -14,21 +14,23 @@
 #include "CodeGenTypes.h"
 #include "CGCXXABI.h"
 #include "CGCall.h"
+#include "CGHLSLRuntime.h" // HLSL Change
 #include "CGOpenCLRuntime.h"
 #include "CGRecordLayout.h"
+#include "CodeGenModule.h" // HLSL Change
 #include "TargetInfo.h"
+#include "dxc/DXIL/DxilUtil.h" // HLSL Change
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclCXX.h"
-#include "clang/AST/DeclTemplate.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/DeclTemplate.h" // HLSL Change - clang-format
 #include "clang/AST/Expr.h"
+#include "clang/AST/HlslTypes.h" // HLSL Change
 #include "clang/AST/RecordLayout.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Module.h"
-#include "CodeGenModule.h" // HLSL Change
-#include "CGHLSLRuntime.h" // HLSL Change
 using namespace clang;
 using namespace CodeGen;
 
@@ -365,7 +367,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
                  .getConstantArrayType(eltTy, llvm::APInt(32, count),
                                        ArrayType::ArraySizeModifier::Normal, 0)
                  .getTypePtr();
-      }
+      } else if (hlsl::IsHLSLHitObjectType(T)) // HLSL Change
+        return hlsl::dxilutil::GetHLSLHitObjectType(&TheModule);
       else
         return ConvertRecordDeclType(RT->getDecl());
     }
diff --git a/tools/clang/lib/Sema/SemaExpr.cpp b/tools/clang/lib/Sema/SemaExpr.cpp
index c8c762a0a1..507b6a7508 100644
--- a/tools/clang/lib/Sema/SemaExpr.cpp
+++ b/tools/clang/lib/Sema/SemaExpr.cpp
@@ -2787,13 +2787,18 @@ bool Sema::UseArgumentDependentLookup(const CXXScopeSpec &SS,
   // Never if a scope specifier was provided.
   if (SS.isSet()) {
     // HLSL Change begins
-    // We want to be able to have intrinsics inside the "vk" namespace.
+    // We want to be able to have intrinsics inside the "vk" and "dx"
+    // namespaces.
     const bool isVkNamespace =
         SS.getScopeRep() && SS.getScopeRep()->getAsNamespace() &&
         SS.getScopeRep()->getAsNamespace()->getName() == "vk";
 
-    if (!isVkNamespace)
-    // HLSL Change ends
+    const bool isDxNamespace =
+        SS.getScopeRep() && SS.getScopeRep()->getAsNamespace() &&
+        SS.getScopeRep()->getAsNamespace()->getName() == "dx";
+
+    if (!isVkNamespace && !isDxNamespace)
+      // HLSL Change ends
       return false;
   }
 
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 66cbea12ce..40010b1596 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -14,6 +14,7 @@
 #include "VkConstantsTables.h"
 #include "dxc/DXIL/DxilFunctionProps.h"
 #include "dxc/DXIL/DxilShaderModel.h"
+#include "dxc/DXIL/DxilUtil.h"
 #include "dxc/HLSL/HLOperations.h"
 #include "dxc/HlslIntrinsicOp.h"
 #include "dxc/Support/Global.h"
@@ -31,6 +32,8 @@
 #include "clang/AST/HlslTypes.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/Specifiers.h"
+#include "clang/Parse/ParseDiagnostic.h"
 #include "clang/Sema/ExternalSemaSource.h"
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/Lookup.h"
@@ -40,6 +43,7 @@
 #include "clang/Sema/TemplateDeduction.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -243,6 +247,9 @@ enum ArBasicKind {
   AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS,
   AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS,
 
+  // Shader Execution Reordering
+  AR_OBJECT_HIT_OBJECT,
+
   AR_BASIC_MAXIMUM_COUNT
 };
 
@@ -593,6 +600,9 @@ const UINT g_uBasicKindProps[] = {
     BPROP_OBJECT | BPROP_RWBUFFER, // AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS,
     BPROP_OBJECT | BPROP_RWBUFFER, // AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS,
 
+    // Shader Execution Reordering
+    LICOMPTYPE_HIT_OBJECT, // AR_OBJECT_HIT_OBJECT,
+
     // AR_BASIC_MAXIMUM_COUNT
 };
 
@@ -1218,6 +1228,10 @@ static const ArBasicKind g_AnyOutputRecordCT[] = {
     AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS, AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS,
     AR_BASIC_UNKNOWN};
 
+// Shader Execution Reordering
+static const ArBasicKind g_DxHitObjectCT[] = {AR_OBJECT_HIT_OBJECT,
+                                              AR_BASIC_UNKNOWN};
+
 // Basic kinds, indexed by a LEGAL_INTRINSIC_COMPTYPES value.
 const ArBasicKind *g_LegalIntrinsicCompTypes[] = {
     g_NullCT,               // LICOMPTYPE_VOID
@@ -1272,6 +1286,7 @@ const ArBasicKind *g_LegalIntrinsicCompTypes[] = {
     g_AnyOutputRecordCT,         // LICOMPTYPE_ANY_NODE_OUTPUT_RECORD
     g_GroupNodeOutputRecordsCT,  // LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS
     g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS
+    g_DxHitObjectCT,             // LICOMPTYPE_HIT_OBJECT
 };
 static_assert(
     ARRAYSIZE(g_LegalIntrinsicCompTypes) == LICOMPTYPE_COUNT,
@@ -1360,7 +1375,10 @@ static const ArBasicKind g_ArBasicKindsAsTypes[] = {
     AR_OBJECT_NODE_OUTPUT, AR_OBJECT_EMPTY_NODE_OUTPUT,
     AR_OBJECT_NODE_OUTPUT_ARRAY, AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY,
 
-    AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS};
+    AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS,
+
+    // Shader Execution Reordering
+    AR_OBJECT_HIT_OBJECT};
 
 // Count of template arguments for basic kind of objects that look like
 // templates (one or more type arguments).
@@ -1476,6 +1494,9 @@ static const uint8_t g_ArBasicKindsTemplateCount[] = {
 
     1, // AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS,
     1, // AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS
+
+    // Shader Execution Reordering
+    0, // AR_OBJECT_HIT_OBJECT,
 };
 
 C_ASSERT(_countof(g_ArBasicKindsAsTypes) ==
@@ -1622,76 +1643,176 @@ static const SubscriptOperatorRecord g_ArBasicKindsSubscripts[] = {
 
     {1, MipsFalse, SampleFalse}, // AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS
     {1, MipsFalse, SampleFalse}, // AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS
+
+    // Shader Execution Reordering
+    {0, MipsFalse, SampleFalse}, // AR_OBJECT_HIT_OBJECT,
 };
 
 C_ASSERT(_countof(g_ArBasicKindsAsTypes) == _countof(g_ArBasicKindsSubscripts));
 
 // Type names for ArBasicKind values.
 static const char *g_ArBasicTypeNames[] = {
-    "bool", "float", "half", "half", "float", "double", "int", "sbyte", "byte",
-    "short", "ushort", "int", "uint", "long", "ulong", "min10float",
-    "min16float", "min12int", "min16int", "min16uint", "int8_t4_packed",
-    "uint8_t4_packed", "enum",
-
-    "<count>", "<none>", "<unknown>", "<nocast>", "<dependent>", "<pointer>",
+    "bool",
+    "float",
+    "half",
+    "half",
+    "float",
+    "double",
+    "int",
+    "sbyte",
+    "byte",
+    "short",
+    "ushort",
+    "int",
+    "uint",
+    "long",
+    "ulong",
+    "min10float",
+    "min16float",
+    "min12int",
+    "min16int",
+    "min16uint",
+    "int8_t4_packed",
+    "uint8_t4_packed",
+    "enum",
+
+    "<count>",
+    "<none>",
+    "<unknown>",
+    "<nocast>",
+    "<dependent>",
+    "<pointer>",
     "enum class",
 
-    "null", "literal string", "string",
+    "null",
+    "literal string",
+    "string",
     // "texture",
-    "Texture1D", "Texture1DArray", "Texture2D", "Texture2DArray", "Texture3D",
-    "TextureCube", "TextureCubeArray", "Texture2DMS", "Texture2DMSArray",
-    "SamplerState", "sampler1D", "sampler2D", "sampler3D", "samplerCUBE",
-    "SamplerComparisonState", "Buffer", "RenderTargetView", "DepthStencilView",
-    "ComputeShader", "DomainShader", "GeometryShader", "HullShader",
-    "PixelShader", "VertexShader", "pixelfragment", "vertexfragment",
-    "StateBlock", "Rasterizer", "DepthStencil", "Blend", "PointStream",
-    "LineStream", "TriangleStream", "InputPatch", "OutputPatch", "RWTexture1D",
-    "RWTexture1DArray", "RWTexture2D", "RWTexture2DArray", "RWTexture3D",
-    "RWBuffer", "ByteAddressBuffer", "RWByteAddressBuffer", "StructuredBuffer",
-    "RWStructuredBuffer", "RWStructuredBuffer(Incrementable)",
-    "RWStructuredBuffer(Decrementable)", "AppendStructuredBuffer",
+    "Texture1D",
+    "Texture1DArray",
+    "Texture2D",
+    "Texture2DArray",
+    "Texture3D",
+    "TextureCube",
+    "TextureCubeArray",
+    "Texture2DMS",
+    "Texture2DMSArray",
+    "SamplerState",
+    "sampler1D",
+    "sampler2D",
+    "sampler3D",
+    "samplerCUBE",
+    "SamplerComparisonState",
+    "Buffer",
+    "RenderTargetView",
+    "DepthStencilView",
+    "ComputeShader",
+    "DomainShader",
+    "GeometryShader",
+    "HullShader",
+    "PixelShader",
+    "VertexShader",
+    "pixelfragment",
+    "vertexfragment",
+    "StateBlock",
+    "Rasterizer",
+    "DepthStencil",
+    "Blend",
+    "PointStream",
+    "LineStream",
+    "TriangleStream",
+    "InputPatch",
+    "OutputPatch",
+    "RWTexture1D",
+    "RWTexture1DArray",
+    "RWTexture2D",
+    "RWTexture2DArray",
+    "RWTexture3D",
+    "RWBuffer",
+    "ByteAddressBuffer",
+    "RWByteAddressBuffer",
+    "StructuredBuffer",
+    "RWStructuredBuffer",
+    "RWStructuredBuffer(Incrementable)",
+    "RWStructuredBuffer(Decrementable)",
+    "AppendStructuredBuffer",
     "ConsumeStructuredBuffer",
 
-    "ConstantBuffer", "TextureBuffer",
+    "ConstantBuffer",
+    "TextureBuffer",
 
-    "RasterizerOrderedBuffer", "RasterizerOrderedByteAddressBuffer",
-    "RasterizerOrderedStructuredBuffer", "RasterizerOrderedTexture1D",
-    "RasterizerOrderedTexture1DArray", "RasterizerOrderedTexture2D",
-    "RasterizerOrderedTexture2DArray", "RasterizerOrderedTexture3D",
+    "RasterizerOrderedBuffer",
+    "RasterizerOrderedByteAddressBuffer",
+    "RasterizerOrderedStructuredBuffer",
+    "RasterizerOrderedTexture1D",
+    "RasterizerOrderedTexture1DArray",
+    "RasterizerOrderedTexture2D",
+    "RasterizerOrderedTexture2DArray",
+    "RasterizerOrderedTexture3D",
 
-    "FeedbackTexture2D", "FeedbackTexture2DArray",
+    "FeedbackTexture2D",
+    "FeedbackTexture2DArray",
 
 // SPIRV change starts
 #ifdef ENABLE_SPIRV_CODEGEN
-    "SubpassInput", "SubpassInputMS", "SpirvType", "SpirvOpaqueType",
-    "integral_constant", "Literal", "ext_type", "ext_result_id",
+    "SubpassInput",
+    "SubpassInputMS",
+    "SpirvType",
+    "SpirvOpaqueType",
+    "integral_constant",
+    "Literal",
+    "ext_type",
+    "ext_result_id",
 #endif // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
     "<internal inner type object>",
 
-    "deprecated effect object", "wave_t", "RayDesc",
-    "RaytracingAccelerationStructure", "user defined type",
+    "deprecated effect object",
+    "wave_t",
+    "RayDesc",
+    "RaytracingAccelerationStructure",
+    "user defined type",
     "BuiltInTriangleIntersectionAttributes",
 
     // subobjects
-    "StateObjectConfig", "GlobalRootSignature", "LocalRootSignature",
-    "SubobjectToExportsAssociation", "RaytracingShaderConfig",
-    "RaytracingPipelineConfig", "TriangleHitGroup",
-    "ProceduralPrimitiveHitGroup", "RaytracingPipelineConfig1",
-
-    "RayQuery", "HEAP_Resource", "HEAP_Sampler",
-
-    "RWTexture2DMS", "RWTexture2DMSArray",
+    "StateObjectConfig",
+    "GlobalRootSignature",
+    "LocalRootSignature",
+    "SubobjectToExportsAssociation",
+    "RaytracingShaderConfig",
+    "RaytracingPipelineConfig",
+    "TriangleHitGroup",
+    "ProceduralPrimitiveHitGroup",
+    "RaytracingPipelineConfig1",
+
+    "RayQuery",
+    "HEAP_Resource",
+    "HEAP_Sampler",
+
+    "RWTexture2DMS",
+    "RWTexture2DMSArray",
 
     // Workgraphs
-    "EmptyNodeInput", "DispatchNodeInputRecord", "RWDispatchNodeInputRecord",
-    "GroupNodeInputRecords", "RWGroupNodeInputRecords", "ThreadNodeInputRecord",
+    "EmptyNodeInput",
+    "DispatchNodeInputRecord",
+    "RWDispatchNodeInputRecord",
+    "GroupNodeInputRecords",
+    "RWGroupNodeInputRecords",
+    "ThreadNodeInputRecord",
     "RWThreadNodeInputRecord",
 
-    "NodeOutput", "EmptyNodeOutput", "NodeOutputArray", "EmptyNodeOutputArray",
+    "NodeOutput",
+    "EmptyNodeOutput",
+    "NodeOutputArray",
+    "EmptyNodeOutputArray",
 
-    "ThreadNodeOutputRecords", "GroupNodeOutputRecords"};
+    "ThreadNodeOutputRecords",
+    "GroupNodeOutputRecords",
+
+    // Shader Execution Reordering
+    "HitObject",
+};
 
 C_ASSERT(_countof(g_ArBasicTypeNames) == AR_BASIC_MAXIMUM_COUNT);
 
@@ -1731,6 +1852,10 @@ static const char *g_DeprecatedEffectObjectNames[] = {
     "RenderTargetView",  // 16
 };
 
+static bool IsStaticMember(const HLSL_INTRINSIC *fn) {
+  return fn->Flags & INTRIN_FLAG_STATIC_MEMBER;
+}
+
 static bool IsVariadicIntrinsicFunction(const HLSL_INTRINSIC *fn) {
   return fn->pArgs[fn->uNumArgs - 1].uTemplateId == INTRIN_TEMPLATE_VARARGS;
 }
@@ -1816,15 +1941,13 @@ static void AddHLSLIntrinsicAttr(FunctionDecl *FD, ASTContext &context,
     FD->addAttr(PureAttr::CreateImplicit(context));
   if (pIntrinsic->Flags & INTRIN_FLAG_IS_WAVE)
     FD->addAttr(HLSLWaveSensitiveAttr::CreateImplicit(context));
-  // TBD: Add availability attribute if MinShaderModel is set.
-  // if (pIntrinsic->MinShaderModel) {
-  //  unsigned Major = pIntrinsic->MinShaderModel >> 4;
-  //  unsigned Minor = pIntrinsic->MinShaderModel & 0xF;
-  //  FD->addAttr(AvailabilityAttr::CreateImplicit(
-  //      context, &context.Idents.get(""), clang::VersionTuple(Major, Minor),
-  //      clang::VersionTuple(), clang::VersionTuple(), false,
-  //      "HLSL Intrinsic availability limited by shader model."));
-  //}
+  if (pIntrinsic->MinShaderModel) {
+    unsigned Major = pIntrinsic->MinShaderModel >> 4;
+    unsigned Minor = pIntrinsic->MinShaderModel & 0xF;
+    FD->addAttr(AvailabilityAttr::CreateImplicit(
+        context, &context.Idents.get(""), clang::VersionTuple(Major, Minor),
+        clang::VersionTuple(), clang::VersionTuple(), false, ""));
+  }
 }
 
 static FunctionDecl *
@@ -1870,12 +1993,14 @@ AddHLSLIntrinsicFunction(ASTContext &context, NamespaceDecl *NS,
   const QualType fnReturnType = functionArgQualTypes[0];
   std::vector<QualType> fnArgTypes(functionArgQualTypes.begin() + 1,
                                    functionArgQualTypes.end());
+
+  StorageClass SC = IsStaticMember(pIntrinsic) ? SC_Static : SC_Extern;
   QualType functionType =
       context.getFunctionType(fnReturnType, fnArgTypes, protoInfo, paramMods);
   FunctionDecl *functionDecl = FunctionDecl::Create(
       context, currentDeclContext, NoLoc,
-      DeclarationNameInfo(functionName, NoLoc), functionType, nullptr,
-      StorageClass::SC_Extern, InlineSpecifiedFalse, HasWrittenPrototypeTrue);
+      DeclarationNameInfo(functionName, NoLoc), functionType, nullptr, SC,
+      InlineSpecifiedFalse, HasWrittenPrototypeTrue);
   currentDeclContext->addDecl(functionDecl);
 
   functionDecl->setLexicalDeclContext(currentDeclContext);
@@ -2284,6 +2409,10 @@ static void GetIntrinsicMethods(ArBasicKind kind,
     *intrinsics = g_RayQueryMethods;
     *intrinsicCount = _countof(g_RayQueryMethods);
     break;
+  case AR_OBJECT_HIT_OBJECT:
+    *intrinsics = g_DxHitObjectMethods;
+    *intrinsicCount = _countof(g_DxHitObjectMethods);
+    break;
   case AR_OBJECT_RWTEXTURE2DMS:
     *intrinsics = g_RWTexture2DMSMethods;
     *intrinsicCount = _countof(g_RWTexture2DMSMethods);
@@ -2846,6 +2975,9 @@ class HLSLExternalSource : public ExternalSemaSource {
   // Namespace decl for Vulkan-specific intrinsic functions
   NamespaceDecl *m_vkNSDecl;
 
+  // Namespace decl for dx intrinsic functions
+  NamespaceDecl *m_dxNSDecl;
+
   // Context being processed.
   ASTContext *m_context;
 
@@ -3063,10 +3195,13 @@ class HLSLExternalSource : public ExternalSemaSource {
     IdentifierInfo *ii =
         &m_context->Idents.get(StringRef(intrinsic->pArgs[0].pName));
     DeclarationName declarationName = DeclarationName(ii);
+
+    StorageClass SC = IsStaticMember(intrinsic) ? SC_Static : SC_None;
+
     CXXMethodDecl *functionDecl = CreateObjectFunctionDeclarationWithParams(
         *m_context, recordDecl, functionResultQT,
         ArrayRef<QualType>(argsQTs, numParams),
-        ArrayRef<StringRef>(argNames, numParams), declarationName, true,
+        ArrayRef<StringRef>(argNames, numParams), declarationName, true, SC,
         templateParamNamedDeclsCount > 0);
     functionDecl->setImplicit(true);
 
@@ -3268,7 +3403,7 @@ class HLSLExternalSource : public ExternalSemaSource {
         *m_context, recordDecl, resultType, ArrayRef<QualType>(indexType),
         ArrayRef<StringRef>(StringRef("index")),
         m_context->DeclarationNames.getCXXOperatorName(OO_Subscript), true,
-        true);
+        StorageClass::SC_None, true);
     hlsl::CreateFunctionTemplateDecl(
         *m_context, recordDecl, functionDecl,
         reinterpret_cast<NamedDecl **>(&templateTypeParmDecl), 1);
@@ -3312,9 +3447,8 @@ class HLSLExternalSource : public ExternalSemaSource {
       return -1;
   }
 
-#ifdef ENABLE_SPIRV_CODEGEN
-  SmallVector<NamedDecl *, 1> CreateTemplateTypeParmDeclsForVkIntrinsicFunction(
-      const HLSL_INTRINSIC *intrinsic) {
+  SmallVector<NamedDecl *, 1> CreateTemplateTypeParmDeclsForIntrinsicFunction(
+      const HLSL_INTRINSIC *intrinsic, NamespaceDecl *nsDecl) {
     SmallVector<NamedDecl *, 1> templateTypeParmDecls;
     auto &context = m_sema->getASTContext();
     const HLSL_INTRINSIC_ARGUMENT *pArgs = intrinsic->pArgs;
@@ -3325,9 +3459,8 @@ class HLSLExternalSource : public ExternalSemaSource {
           pArgs[i].uLegalTemplates == LITEMPLATE_ANY) {
         IdentifierInfo *id = &context.Idents.get("T");
         TemplateTypeParmDecl *templateTypeParmDecl =
-            TemplateTypeParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc, 0,
-                                         0, id, TypenameTrue,
-                                         ParameterPackFalse);
+            TemplateTypeParmDecl::Create(context, nsDecl, NoLoc, NoLoc, 0, 0,
+                                         id, TypenameTrue, ParameterPackFalse);
         if (TInfo == nullptr) {
           TInfo = m_sema->getASTContext().CreateTypeSourceInfo(
               m_context->UnsignedIntTy, 0);
@@ -3341,7 +3474,7 @@ class HLSLExternalSource : public ExternalSemaSource {
   }
 
   SmallVector<ParmVarDecl *, g_MaxIntrinsicParamCount>
-  CreateParmDeclsForVkIntrinsicFunction(
+  CreateParmDeclsForIntrinsicFunction(
       const HLSL_INTRINSIC *intrinsic,
       const SmallVectorImpl<QualType> &paramTypes,
       const SmallVectorImpl<ParameterModifier> &paramMods) {
@@ -3366,7 +3499,7 @@ class HLSLExternalSource : public ExternalSemaSource {
     return paramDecls;
   }
 
-  SmallVector<QualType, 2> VkIntrinsicFunctionParamTypes(
+  SmallVector<QualType, 2> getIntrinsicFunctionParamTypes(
       const HLSL_INTRINSIC *intrinsic,
       const SmallVectorImpl<NamedDecl *> &templateTypeParmDecls) {
     auto &context = m_sema->getASTContext();
@@ -3401,8 +3534,11 @@ class HLSLExternalSource : public ExternalSemaSource {
       case LICOMPTYPE_VOID:
         paramTypes.push_back(context.VoidTy);
         break;
+      case LICOMPTYPE_HIT_OBJECT:
+        paramTypes.push_back(GetBasicKindType(AR_OBJECT_HIT_OBJECT));
+        break;
       default:
-        DXASSERT(false, "Argument type of vk:: intrinsic function is not "
+        DXASSERT(false, "Argument type of intrinsic function is not "
                         "supported");
         break;
       }
@@ -3410,9 +3546,9 @@ class HLSLExternalSource : public ExternalSemaSource {
     return paramTypes;
   }
 
-  QualType
-  VkIntrinsicFunctionType(const SmallVectorImpl<QualType> &paramTypes,
-                          const SmallVectorImpl<ParameterModifier> &paramMods) {
+  QualType getIntrinsicFunctionType(
+      const SmallVectorImpl<QualType> &paramTypes,
+      const SmallVectorImpl<ParameterModifier> &paramMods) {
     DXASSERT(!paramTypes.empty(), "Given param type vector is empty");
 
     ArrayRef<QualType> params({});
@@ -3425,7 +3561,7 @@ class HLSLExternalSource : public ExternalSemaSource {
                                                    EmptyEPI, paramMods);
   }
 
-  void SetParmDeclsForVkIntrinsicFunction(
+  void SetParmDeclsForIntrinsicFunction(
       TypeSourceInfo *TInfo, FunctionDecl *functionDecl,
       const SmallVectorImpl<ParmVarDecl *> &paramDecls) {
     FunctionProtoTypeLoc Proto =
@@ -3440,47 +3576,39 @@ class HLSLExternalSource : public ExternalSemaSource {
     functionDecl->setParams(paramDecls);
   }
 
-  // Adds intrinsic function declarations to the "vk" namespace.
-  // It does so only if SPIR-V code generation is being done.
-  // Assumes the implicit "vk" namespace has already been created.
-  void AddVkIntrinsicFunctions() {
-    // If not doing SPIR-V CodeGen, return.
-    if (!m_sema->getLangOpts().SPIRV)
-      return;
-
-    DXASSERT(m_vkNSDecl, "caller has not created the vk namespace yet");
-
+  void AddIntrinsicFunctionsToNamespace(const HLSL_INTRINSIC *table,
+                                        uint32_t tableSize,
+                                        NamespaceDecl *nsDecl) {
     auto &context = m_sema->getASTContext();
-    for (uint32_t i = 0; i < _countof(g_VkIntrinsics); ++i) {
-      const HLSL_INTRINSIC *intrinsic = &g_VkIntrinsics[i];
+    for (uint32_t i = 0; i < tableSize; ++i) {
+      const HLSL_INTRINSIC *intrinsic = &table[i];
       const IdentifierInfo &fnII = context.Idents.get(
           intrinsic->pArgs->pName, tok::TokenKind::identifier);
       DeclarationName functionName(&fnII);
 
       // Create TemplateTypeParmDecl.
       SmallVector<NamedDecl *, 1> templateTypeParmDecls =
-          CreateTemplateTypeParmDeclsForVkIntrinsicFunction(intrinsic);
+          CreateTemplateTypeParmDeclsForIntrinsicFunction(intrinsic, nsDecl);
 
       // Get types for parameters.
       SmallVector<QualType, 2> paramTypes =
-          VkIntrinsicFunctionParamTypes(intrinsic, templateTypeParmDecls);
+          getIntrinsicFunctionParamTypes(intrinsic, templateTypeParmDecls);
       SmallVector<hlsl::ParameterModifier, g_MaxIntrinsicParamCount> paramMods;
       InitParamMods(intrinsic, paramMods);
 
       // Create FunctionDecl.
-      QualType fnType = VkIntrinsicFunctionType(paramTypes, paramMods);
+      StorageClass SC = IsStaticMember(intrinsic) ? SC_Static : SC_Extern;
+      QualType fnType = getIntrinsicFunctionType(paramTypes, paramMods);
       TypeSourceInfo *TInfo =
           m_sema->getASTContext().CreateTypeSourceInfo(fnType, 0);
       FunctionDecl *functionDecl = FunctionDecl::Create(
-          context, m_vkNSDecl, NoLoc, DeclarationNameInfo(functionName, NoLoc),
-          fnType, TInfo, StorageClass::SC_Extern, InlineSpecifiedFalse,
-          HasWrittenPrototypeTrue);
+          context, nsDecl, NoLoc, DeclarationNameInfo(functionName, NoLoc),
+          fnType, TInfo, SC, InlineSpecifiedFalse, HasWrittenPrototypeTrue);
 
       // Create and set ParmVarDecl.
       SmallVector<ParmVarDecl *, g_MaxIntrinsicParamCount> paramDecls =
-          CreateParmDeclsForVkIntrinsicFunction(intrinsic, paramTypes,
-                                                paramMods);
-      SetParmDeclsForVkIntrinsicFunction(TInfo, functionDecl, paramDecls);
+          CreateParmDeclsForIntrinsicFunction(intrinsic, paramTypes, paramMods);
+      SetParmDeclsForIntrinsicFunction(TInfo, functionDecl, paramDecls);
 
       if (!templateTypeParmDecls.empty()) {
         TemplateParameterList *templateParmList = TemplateParameterList::Create(
@@ -3488,22 +3616,52 @@ class HLSLExternalSource : public ExternalSemaSource {
             templateTypeParmDecls.size(), NoLoc);
         functionDecl->setTemplateParameterListsInfo(context, 1,
                                                     &templateParmList);
-        FunctionTemplateDecl *functionTemplate = FunctionTemplateDecl::Create(
-            context, m_vkNSDecl, NoLoc, functionName, templateParmList,
-            functionDecl);
+        FunctionTemplateDecl *functionTemplate =
+            FunctionTemplateDecl::Create(context, nsDecl, NoLoc, functionName,
+                                         templateParmList, functionDecl);
         functionDecl->setDescribedFunctionTemplate(functionTemplate);
-        m_vkNSDecl->addDecl(functionTemplate);
-        functionTemplate->setDeclContext(m_vkNSDecl);
+        nsDecl->addDecl(functionTemplate);
+        functionTemplate->setDeclContext(nsDecl);
       } else {
-        m_vkNSDecl->addDecl(functionDecl);
-        functionDecl->setLexicalDeclContext(m_vkNSDecl);
-        functionDecl->setDeclContext(m_vkNSDecl);
+        nsDecl->addDecl(functionDecl);
+        functionDecl->setLexicalDeclContext(nsDecl);
+        functionDecl->setDeclContext(nsDecl);
       }
 
       functionDecl->setImplicit(true);
     }
   }
 
+  // Adds intrinsic function declarations to the "dx" namespace.
+  // Assumes the implicit "vk" namespace has already been created.
+  void AddDxIntrinsicFunctions() {
+    DXASSERT(m_dxNSDecl, "caller has not created the dx namespace yet");
+
+    AddIntrinsicFunctionsToNamespace(g_DxIntrinsics, _countof(g_DxIntrinsics),
+                                     m_dxNSDecl);
+    // Eagerly declare HitObject methods. This is required to make lookup of
+    // 'static' HLSL member functions work without special-casing HLSL scope
+    // lookup.
+    CXXRecordDecl *HitObjectDecl =
+        GetBasicKindType(AR_OBJECT_HIT_OBJECT)->getAsCXXRecordDecl();
+    CompleteType(HitObjectDecl);
+  }
+
+#ifdef ENABLE_SPIRV_CODEGEN
+  // Adds intrinsic function declarations to the "vk" namespace.
+  // It does so only if SPIR-V code generation is being done.
+  // Assumes the implicit "vk" namespace has already been created.
+  void AddVkIntrinsicFunctions() {
+    // If not doing SPIR-V CodeGen, return.
+    if (!m_sema->getLangOpts().SPIRV)
+      return;
+
+    DXASSERT(m_vkNSDecl, "caller has not created the vk namespace yet");
+
+    AddIntrinsicFunctionsToNamespace(g_VkIntrinsics, _countof(g_VkIntrinsics),
+                                     m_vkNSDecl);
+  }
+
   // Adds implicitly defined Vulkan-specific constants to the "vk" namespace.
   // It does so only if SPIR-V code generation is being done.
   // Assumes the implicit "vk" namespace has already been created.
@@ -3619,6 +3777,10 @@ class HLSLExternalSource : public ExternalSemaSource {
         recordDecl = DeclareConstantBufferViewType(*m_context, Attr);
       } else if (kind == AR_OBJECT_RAY_QUERY) {
         recordDecl = DeclareRayQueryType(*m_context);
+      } else if (kind == AR_OBJECT_HIT_OBJECT) {
+        // Declare 'HitObject' in '::dx' extension namespace.
+        DXASSERT(m_dxNSDecl, "namespace ::dx must be declared in SM6.9+");
+        recordDecl = DeclareHitObjectType(*m_dxNSDecl);
       } else if (kind == AR_OBJECT_HEAP_RESOURCE) {
         recordDecl = DeclareResourceType(*m_context, /*bSampler*/ false);
         if (SM->IsSM66Plus()) {
@@ -3866,8 +4028,8 @@ class HLSLExternalSource : public ExternalSemaSource {
       : m_matrixTemplateDecl(nullptr), m_vectorTemplateDecl(nullptr),
         m_vkIntegralConstantTemplateDecl(nullptr),
         m_vkLiteralTemplateDecl(nullptr), m_hlslNSDecl(nullptr),
-        m_vkNSDecl(nullptr), m_context(nullptr), m_sema(nullptr),
-        m_hlslStringTypedef(nullptr) {
+        m_vkNSDecl(nullptr), m_dxNSDecl(nullptr), m_context(nullptr),
+        m_sema(nullptr), m_hlslStringTypedef(nullptr) {
     memset(m_matrixTypes, 0, sizeof(m_matrixTypes));
     memset(m_matrixShorthandTypes, 0, sizeof(m_matrixShorthandTypes));
     memset(m_vectorTypes, 0, sizeof(m_vectorTypes));
@@ -3896,6 +4058,14 @@ class HLSLExternalSource : public ExternalSemaSource {
     m_sema = &S;
     S.addExternalSource(this);
 
+    m_dxNSDecl =
+        NamespaceDecl::Create(context, context.getTranslationUnitDecl(),
+                              /*Inline*/ false, SourceLocation(),
+                              SourceLocation(), &context.Idents.get("dx"),
+                              /*PrevDecl*/ nullptr);
+    m_dxNSDecl->setImplicit();
+    context.getTranslationUnitDecl()->addDecl(m_dxNSDecl);
+
 #ifdef ENABLE_SPIRV_CODEGEN
     if (m_sema->getLangOpts().SPIRV) {
       // Create the "vk" namespace which contains Vulkan-specific intrinsics.
@@ -3914,6 +4084,8 @@ class HLSLExternalSource : public ExternalSemaSource {
       AddIntrinsicTableMethods(intrinsic);
     }
 
+    AddDxIntrinsicFunctions();
+
 #ifdef ENABLE_SPIRV_CODEGEN
     if (m_sema->getLangOpts().SPIRV) {
       // Add Vulkan-specific intrinsics.
@@ -4596,6 +4768,7 @@ class HLSLExternalSource : public ExternalSemaSource {
     case AR_OBJECT_WAVE:
     case AR_OBJECT_ACCELERATION_STRUCT:
     case AR_OBJECT_RAY_DESC:
+    case AR_OBJECT_HIT_OBJECT:
     case AR_OBJECT_TRIANGLE_INTERSECTION_ATTRIBUTES:
     case AR_OBJECT_RWTEXTURE2DMS:
     case AR_OBJECT_RWTEXTURE2DMS_ARRAY:
@@ -4919,12 +5092,18 @@ class HLSLExternalSource : public ExternalSemaSource {
         ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace &&
         ULE->getQualifier()->getAsNamespace()->getName() == "vk";
 
+    const bool isDxNamespace =
+        ULE->getQualifier() &&
+        ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace &&
+        ULE->getQualifier()->getAsNamespace()->getName() == "dx";
+
     // Intrinsics live in the global namespace, so references to their names
     // should be either unqualified or '::'-prefixed.
-    // Exception: Vulkan-specific intrinsics live in the 'vk::' namespace.
-    if (isQualified && !isGlobalNamespace && !isVkNamespace) {
+    // Exceptions:
+    // - Vulkan-specific intrinsics live in the 'vk::' namespace.
+    // - DirectX-specific intrinsics live in the 'dx::' namespace.
+    if (isQualified && !isGlobalNamespace && !isVkNamespace && !isDxNamespace)
       return false;
-    }
 
     const DeclarationNameInfo declName = ULE->getNameInfo();
     IdentifierInfo *idInfo = declName.getName().getAsIdentifierInfo();
@@ -4935,6 +5114,10 @@ class HLSLExternalSource : public ExternalSemaSource {
     StringRef nameIdentifier = idInfo->getName();
     const HLSL_INTRINSIC *table = g_Intrinsics;
     auto tableCount = _countof(g_Intrinsics);
+    if (isDxNamespace) {
+      table = g_DxIntrinsics;
+      tableCount = _countof(g_DxIntrinsics);
+    }
 #ifdef ENABLE_SPIRV_CODEGEN
     if (isVkNamespace) {
       table = g_VkIntrinsics;
@@ -4971,11 +5154,16 @@ class HLSLExternalSource : public ExternalSemaSource {
           m_usedIntrinsics.insert(UsedIntrinsic(pIntrinsic, functionArgTypes));
       bool insertedNewValue = insertResult.second;
       if (insertedNewValue) {
+        NamespaceDecl *nsDecl = m_hlslNSDecl;
+        if (isVkNamespace)
+          nsDecl = m_vkNSDecl;
+        else if (isDxNamespace)
+          nsDecl = m_dxNSDecl;
         DXASSERT(tableName,
                  "otherwise IDxcIntrinsicTable::GetTableName() failed");
-        intrinsicFuncDecl = AddHLSLIntrinsicFunction(
-            *m_context, isVkNamespace ? m_vkNSDecl : m_hlslNSDecl, tableName,
-            lowering, pIntrinsic, &functionArgTypes);
+        intrinsicFuncDecl =
+            AddHLSLIntrinsicFunction(*m_context, nsDecl, tableName, lowering,
+                                     pIntrinsic, &functionArgTypes);
         insertResult.first->setFunctionDecl(intrinsicFuncDecl);
       } else {
         intrinsicFuncDecl = (*insertResult.first).getFunctionDecl();
@@ -5742,11 +5930,12 @@ class HLSLExternalSource : public ExternalSemaSource {
       Params.push_back(paramDecl);
     }
 
+    StorageClass SC = IsStaticMember(intrinsic) ? SC_Static : SC_Extern;
     QualType T = TInfo->getType();
     DeclarationNameInfo NameInfo(FunctionTemplate->getDeclName(), NoLoc);
     CXXMethodDecl *method = CXXMethodDecl::Create(
         *m_context, dyn_cast<CXXRecordDecl>(owner), NoLoc, NameInfo, T, TInfo,
-        SC_Extern, InlineSpecifiedFalse, IsConstexprFalse, NoLoc);
+        SC, InlineSpecifiedFalse, IsConstexprFalse, NoLoc);
 
     // Add intrinsic attr
     AddHLSLIntrinsicAttr(method, *m_context, tableName, lowering, intrinsic);
@@ -8007,7 +8196,8 @@ void HLSLExternalSource::InitializeInitSequenceForHLSL(
   DXASSERT_NOMSG(initSequence != nullptr);
 
   // In HLSL there are no default initializers, eg float4x4 m();
-  // Except for RayQuery constructor (also handle InitializationKind::IK_Value)
+  // Except for RayQuery and HitObject constructors (also handle
+  // InitializationKind::IK_Value)
   if (Kind.getKind() == InitializationKind::IK_Default ||
       Kind.getKind() == InitializationKind::IK_Value) {
     QualType destBaseType = m_context->getBaseElementType(Entity.getType());
@@ -8018,7 +8208,9 @@ void HLSLExternalSource::InitializeInitSequenceForHLSL(
           GetRecordDeclForBuiltInOrStruct(typeRecordDecl));
       DXASSERT(index != -1,
                "otherwise can't find type we already determined was an object");
-      if (g_ArBasicKindsAsTypes[index] == AR_OBJECT_RAY_QUERY) {
+
+      if (g_ArBasicKindsAsTypes[index] == AR_OBJECT_RAY_QUERY ||
+          g_ArBasicKindsAsTypes[index] == AR_OBJECT_HIT_OBJECT) {
         CXXConstructorDecl *Constructor = *typeRecordDecl->ctor_begin();
         initSequence->AddConstructorInitializationStep(
             Constructor, AccessSpecifier::AS_public, destBaseType, false, false,
@@ -11650,6 +11842,35 @@ static bool isStringLiteral(QualType type) {
   return eType->isSpecificBuiltinType(BuiltinType::Char_S);
 }
 
+static void DiagnoseReachableSERCall(Sema &S, CallExpr *CE,
+                                     DXIL::ShaderKind EntrySK,
+                                     const FunctionDecl *EntryDecl,
+                                     bool IsReorderOperation) {
+  bool ValidEntry = false;
+  switch (EntrySK) {
+  default:
+    break;
+  case DXIL::ShaderKind::ClosestHit:
+  case DXIL::ShaderKind::Miss:
+    ValidEntry = !IsReorderOperation;
+    break;
+  case DXIL::ShaderKind::RayGeneration:
+    ValidEntry = true;
+    break;
+  }
+
+  if (ValidEntry)
+    return;
+
+  int DiagID = IsReorderOperation ? diag::err_hlsl_reorder_unsupported_stage
+                                  : diag::err_hlsl_hitobject_unsupported_stage;
+
+  SourceLocation EntryLoc = EntryDecl->getLocation();
+  SourceLocation Loc = CE->getExprLoc();
+  S.Diag(Loc, DiagID) << ShaderModel::FullNameFromKind(EntrySK);
+  S.Diag(EntryLoc, diag::note_hlsl_entry_defined_here);
+}
+
 // Check HLSL member call constraints for used functions.
 // locallyVisited is true if this call has been visited already from any other
 // entry function.  Used to avoid duplicate diagnostics when not dependent on
@@ -11690,6 +11911,12 @@ void Sema::DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM,
   case hlsl::IntrinsicOp::MOP_TraceRayInline:
     DiagnoseTraceRayInline(*this, CE);
     break;
+  case hlsl::IntrinsicOp::MOP_DxHitObject_MakeNop:
+    DiagnoseReachableSERCall(*this, CE, EntrySK, EntryDecl, false);
+    break;
+  case hlsl::IntrinsicOp::IOP_DxMaybeReorderThread:
+    DiagnoseReachableSERCall(*this, CE, EntrySK, EntryDecl, true);
+    break;
   default:
     break;
   }
diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
index 827798a852..ed727af149 100644
--- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
+++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
@@ -9,6 +9,7 @@
 //                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
 
+#include "dxc/DXIL/DxilFunctionProps.h"
 #include "dxc/DXIL/DxilShaderModel.h"
 #include "dxc/HLSL/HLOperations.h"
 #include "dxc/HlslIntrinsicOp.h"
@@ -16,12 +17,16 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/HlslTypes.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/TypeLoc.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "clang/Sema/SemaHLSL.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include <optional>
 
 using namespace clang;
@@ -334,17 +339,19 @@ ValidateNoRecursion(CallGraphWithRecurseGuard &callGraph,
   return nullptr;
 }
 
-class HLSLCallDiagnoseVisitor // Could rename to HLSLReachableDiagnoseVisitor
-    : public RecursiveASTVisitor<HLSLCallDiagnoseVisitor> {
+class HLSLReachableDiagnoseVisitor
+    : public RecursiveASTVisitor<HLSLReachableDiagnoseVisitor> {
 public:
-  explicit HLSLCallDiagnoseVisitor(
+  explicit HLSLReachableDiagnoseVisitor(
       Sema *S, const hlsl::ShaderModel *SM, DXIL::ShaderKind EntrySK,
       DXIL::NodeLaunchType NodeLaunchTy, const FunctionDecl *EntryDecl,
       llvm::SmallPtrSetImpl<CallExpr *> &DiagnosedCalls,
-      llvm::SmallPtrSetImpl<DeclRefExpr *> &DeclAvailabilityChecked)
+      llvm::SmallPtrSetImpl<DeclRefExpr *> &DeclAvailabilityChecked,
+      llvm::SmallSet<SourceLocation, 16> &DiagnosedTypeLocs)
       : sema(S), SM(SM), EntrySK(EntrySK), NodeLaunchTy(NodeLaunchTy),
         EntryDecl(EntryDecl), DiagnosedCalls(DiagnosedCalls),
-        DeclAvailabilityChecked(DeclAvailabilityChecked) {}
+        DeclAvailabilityChecked(DeclAvailabilityChecked),
+        DiagnosedTypeLocs(DiagnosedTypeLocs) {}
 
   bool VisitCallExpr(CallExpr *CE) {
     // Set flag if already diagnosed from another entry, allowing some
@@ -401,16 +408,41 @@ class HLSLCallDiagnoseVisitor // Could rename to HLSLReachableDiagnoseVisitor
     return true;
   }
 
+  bool VisitTypeLoc(TypeLoc TL) {
+    // Diagnose availability for used type.
+    if (AvailabilityAttr *AAttr = GetAvailabilityAttrOnce(TL)) {
+      UnqualTypeLoc UTL = TL.getUnqualifiedLoc();
+      DiagnoseAvailability(AAttr, TL.getType(), UTL.getLocStart());
+    }
+
+    return true;
+  }
+
   bool VisitDeclRefExpr(DeclRefExpr *DRE) {
     // Diagnose availability for referenced decl.
     if (AvailabilityAttr *AAttr = GetAvailabilityAttrOnce(DRE)) {
-      NamedDecl *ND = DRE->getDecl();
-      DiagnoseAvailability(AAttr, ND, DRE->getExprLoc());
+      DiagnoseAvailability(AAttr, DRE->getDecl(), DRE->getExprLoc());
     }
 
     return true;
   }
 
+  AvailabilityAttr *GetAvailabilityAttrOnce(TypeLoc TL) {
+    QualType Ty = TL.getType();
+    CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
+    if (!RD)
+      return nullptr;
+    AvailabilityAttr *AAttr = RD->getAttr<AvailabilityAttr>();
+    if (!AAttr)
+      return nullptr;
+    // Skip redundant availability diagnostics for the same Type.
+    // Use the end location to avoid diagnosing the same type multiple times.
+    if (!DiagnosedTypeLocs.insert(TL.getEndLoc()).second)
+      return nullptr;
+
+    return AAttr;
+  }
+
   AvailabilityAttr *GetAvailabilityAttrOnce(DeclRefExpr *DRE) {
     AvailabilityAttr *AAttr = DRE->getDecl()->getAttr<AvailabilityAttr>();
     if (!AAttr)
@@ -422,21 +454,36 @@ class HLSLCallDiagnoseVisitor // Could rename to HLSLReachableDiagnoseVisitor
     return AAttr;
   }
 
-  void DiagnoseAvailability(AvailabilityAttr *AAttr, NamedDecl *ND,
+  bool CheckSMVersion(VersionTuple AAttrVT) {
+    VersionTuple SMVT = VersionTuple(SM->GetMajor(), SM->GetMinor());
+    return SMVT >= AAttrVT;
+  }
+
+  void DiagnoseAvailability(AvailabilityAttr *AAttr, QualType Ty,
                             SourceLocation Loc) {
     VersionTuple AAttrVT = AAttr->getIntroduced();
-    VersionTuple SMVT = VersionTuple(SM->GetMajor(), SM->GetMinor());
+    if (CheckSMVersion(AAttrVT))
+      return;
 
-    // if the current shader model is lower than what
-    // is stated in the availability attribute, emit
-    // the availability warning.
+    sema->Diag(Loc, diag::warn_hlsl_builtin_type_unavailable)
+        << Ty << SM->GetName() << AAttrVT.getAsString();
+  }
 
-    if (SMVT < AAttrVT) {
-      // TBD: Determine best way to distinguish between builtin constant decls
-      // and other decls.
-      sema->Diag(Loc, diag::warn_hlsl_builtin_constant_unavailable)
-          << ND << SM->GetName() << AAttrVT.getAsString();
+  void DiagnoseAvailability(AvailabilityAttr *AAttr, NamedDecl *ND,
+                            SourceLocation Loc) {
+    VersionTuple AAttrVT = AAttr->getIntroduced();
+    if (CheckSMVersion(AAttrVT))
+      return;
+
+    if (isa<FunctionDecl>(ND)) {
+      sema->Diag(Loc, diag::warn_hlsl_intrinsic_in_wrong_shader_model)
+          << ND->getQualifiedNameAsString() << EntryDecl
+          << AAttrVT.getAsString();
+      return;
     }
+
+    sema->Diag(Loc, diag::warn_hlsl_builtin_constant_unavailable)
+        << ND << SM->GetName() << AAttrVT.getAsString();
   }
 
   clang::Sema *getSema() { return sema; }
@@ -449,6 +496,7 @@ class HLSLCallDiagnoseVisitor // Could rename to HLSLReachableDiagnoseVisitor
   const FunctionDecl *EntryDecl;
   llvm::SmallPtrSetImpl<CallExpr *> &DiagnosedCalls;
   llvm::SmallPtrSetImpl<DeclRefExpr *> &DeclAvailabilityChecked;
+  llvm::SmallSet<SourceLocation, 16> &DiagnosedTypeLocs;
 };
 
 std::optional<uint32_t>
@@ -550,6 +598,8 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
   std::set<FunctionDecl *> DiagnosedRecursiveDecls;
   llvm::SmallPtrSet<CallExpr *, 16> DiagnosedCalls;
   llvm::SmallPtrSet<DeclRefExpr *, 16> DeclAvailabilityChecked;
+  llvm::SmallSet<SourceLocation, 16> DiagnosedTypeLocs;
+
   // for each FDecl, check for recursion
   for (FunctionDecl *FDecl : FDeclsToCheck) {
     CallGraphWithRecurseGuard callGraph;
@@ -671,11 +721,12 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
           NodeLaunchTy = DXIL::NodeLaunchType::Broadcasting;
       }
     }
+
     // Visit all visited functions in call graph to collect illegal intrinsic
     // calls.
-    HLSLCallDiagnoseVisitor Visitor(self, shaderModel, EntrySK, NodeLaunchTy,
-                                    FDecl, DiagnosedCalls,
-                                    DeclAvailabilityChecked);
+    HLSLReachableDiagnoseVisitor Visitor(
+        self, shaderModel, EntrySK, NodeLaunchTy, FDecl, DiagnosedCalls,
+        DeclAvailabilityChecked, DiagnosedTypeLocs);
     // Visit globals with initializers when processing entry point.
     for (VarDecl *VD : InitGlobals)
       Visitor.TraverseDecl(VD);
diff --git a/tools/clang/lib/Sema/SemaOverload.cpp b/tools/clang/lib/Sema/SemaOverload.cpp
index 650fe38adc..636eaf0213 100644
--- a/tools/clang/lib/Sema/SemaOverload.cpp
+++ b/tools/clang/lib/Sema/SemaOverload.cpp
@@ -10936,7 +10936,13 @@ bool Sema::buildOverloadedCallSet(Scope *S, Expr *Fn,
         ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace &&
         ULE->getQualifier()->getAsNamespace()->getName() == "vk";
 
-    assert((!ULE->getQualifier() || isVkNamespace) && "non-vk qualified name with ADL");
+    bool isDxNamespace =
+        ULE->getQualifier() &&
+        ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace &&
+        ULE->getQualifier()->getAsNamespace()->getName() == "dx";
+
+    assert((!ULE->getQualifier() || isVkNamespace || isDxNamespace) &&
+           "expected vk or dx qualified name with ADL");
     // HLSL Change Ends
 
     // We don't perform ADL for implicit declarations of builtins.
diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll
new file mode 100644
index 0000000000..01dafe5e86
--- /dev/null
+++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll
@@ -0,0 +1,101 @@
+; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
+; REQUIRES: dxil-1-9
+
+; CHECK-NOT:  @dx.op.hitObject_
+; CHECK-NOT:  @dx.op.maybeReorderThread
+
+;
+; Buffer Definitions:
+;
+; cbuffer $Globals
+; {
+;
+;   [0 x i8] (type annotation not present)
+;
+; }
+;
+;
+; Resource Bindings:
+;
+; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
+; ------------------------------ ---------- ------- ----------- ------- -------------- ------
+; $Globals                          cbuffer      NA          NA     CB0   cb4294967295     1
+;
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%ConstantBuffer = type opaque
+%dx.types.HitObject = type { i8* }
+%"class.dx::HitObject" = type { i32 }
+
+@"$Globals" = external constant %ConstantBuffer
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+entry:
+  %hit = alloca %dx.types.HitObject, align 4
+  %tmp = alloca %dx.types.HitObject, align 4
+  %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !19 ; line:9 col:3
+  call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !19 ; line:9 col:3
+  %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !23 ; line:9 col:17
+  %2 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !24 ; line:10 col:3
+  call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !24 ; line:10 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %tmp), !dbg !24 ; line:10 col:3
+  %3 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !24 ; line:10 col:3
+  call void @llvm.lifetime.end(i64 4, i8* %3) #0, !dbg !24 ; line:10 col:3
+  %4 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !25 ; line:11 col:1
+  call void @llvm.lifetime.end(i64 4, i8* %4) #0, !dbg !25 ; line:11 col:1
+  ret void, !dbg !25 ; line:11 col:1
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0
+
+; Function Attrs: nounwind
+declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0
+
+attributes #0 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!pauseresume = !{!1}
+!llvm.ident = !{!2}
+!dx.version = !{!3}
+!dx.valver = !{!3}
+!dx.shaderModel = !{!4}
+!dx.typeAnnotations = !{!5, !8}
+!dx.entryPoints = !{!12}
+!dx.fnprops = !{!16}
+!dx.options = !{!17, !18}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!2 = !{!"dxc(private) 1.8.0.4840 (ser_patch_1 9ffd030b1)"}
+!3 = !{i32 1, i32 9}
+!4 = !{!"lib", i32 6, i32 9}
+!5 = !{i32 0, %"class.dx::HitObject" undef, !6}
+!6 = !{i32 4, !7}
+!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4}
+!8 = !{i32 1, void ()* @"\01?main@@YAXXZ", !9}
+!9 = !{!10}
+!10 = !{i32 1, !11, !11}
+!11 = !{}
+!12 = !{null, !"", null, !13, null}
+!13 = !{null, null, !14, null}
+!14 = !{!15}
+!15 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null}
+!16 = !{void ()* @"\01?main@@YAXXZ", i32 7}
+!17 = !{i32 -2147483584}
+!18 = !{i32 -1}
+!19 = !DILocation(line: 9, column: 3, scope: !20)
+!20 = !DISubprogram(name: "main", scope: !21, file: !21, line: 8, type: !22, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ")
+!21 = !DIFile(filename: "tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl", directory: "")
+!22 = !DISubroutineType(types: !11)
+!23 = !DILocation(line: 9, column: 17, scope: !20)
+!24 = !DILocation(line: 10, column: 3, scope: !20)
+!25 = !DILocation(line: 11, column: 1, scope: !20)
diff --git a/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll
new file mode 100644
index 0000000000..f5130bca3f
--- /dev/null
+++ b/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll
@@ -0,0 +1,106 @@
+; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
+; REQUIRES: dxil-1-9
+
+; CHECK-NOT:  @dx.op.hitObject_
+; CHECK-NOT:  @dx.op.maybeReorderThread
+
+;
+; Buffer Definitions:
+;
+; cbuffer $Globals
+; {
+;
+;   [0 x i8] (type annotation not present)
+;
+; }
+;
+;
+; Resource Bindings:
+;
+; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
+; ------------------------------ ---------- ------- ----------- ------- -------------- ------
+; $Globals                          cbuffer      NA          NA     CB0   cb4294967295     1
+;
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%ConstantBuffer = type opaque
+%dx.types.HitObject = type { i8* }
+%"class.dx::HitObject" = type { i32 }
+
+@"$Globals" = external constant %ConstantBuffer
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+entry:
+  %hit = alloca %dx.types.HitObject, align 4
+  %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !19 ; line:9 col:3
+  call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !19 ; line:9 col:3
+  %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !23 ; line:9 col:17
+  call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %hit), !dbg !24 ; line:10 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32)"(i32 359, %dx.types.HitObject* %hit, i32 241, i32 3), !dbg !25 ; line:11 col:3
+  call void @"dx.hl.op..void (i32, i32, i32)"(i32 359, i32 242, i32 7), !dbg !26 ; line:12 col:3
+  %2 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !27 ; line:13 col:1
+  call void @llvm.lifetime.end(i64 4, i8* %2) #0, !dbg !27 ; line:13 col:1
+  ret void, !dbg !27 ; line:13 col:1
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0
+
+; Function Attrs: nounwind
+declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0
+
+; Function Attrs: nounwind
+declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32)"(i32, %dx.types.HitObject*, i32, i32) #0
+
+; Function Attrs: nounwind
+declare void @"dx.hl.op..void (i32, i32, i32)"(i32, i32, i32) #0
+
+attributes #0 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!pauseresume = !{!1}
+!llvm.ident = !{!2}
+!dx.version = !{!3}
+!dx.valver = !{!3}
+!dx.shaderModel = !{!4}
+!dx.typeAnnotations = !{!5, !8}
+!dx.entryPoints = !{!12}
+!dx.fnprops = !{!16}
+!dx.options = !{!17, !18}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!2 = !{!"dxc(private) 1.8.0.4840 ser_patch_1 9ffd030b1)"}
+!3 = !{i32 1, i32 9}
+!4 = !{!"lib", i32 6, i32 9}
+!5 = !{i32 0, %"class.dx::HitObject" undef, !6}
+!6 = !{i32 4, !7}
+!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4}
+!8 = !{i32 1, void ()* @"\01?main@@YAXXZ", !9}
+!9 = !{!10}
+!10 = !{i32 1, !11, !11}
+!11 = !{}
+!12 = !{null, !"", null, !13, null}
+!13 = !{null, null, !14, null}
+!14 = !{!15}
+!15 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null}
+!16 = !{void ()* @"\01?main@@YAXXZ", i32 7}
+!17 = !{i32 -2147483584}
+!18 = !{i32 -1}
+!19 = !DILocation(line: 9, column: 3, scope: !20)
+!20 = !DISubprogram(name: "main", scope: !21, file: !21, line: 8, type: !22, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ")
+!21 = !DIFile(filename: "tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl", directory: "")
+!22 = !DISubroutineType(types: !11)
+!23 = !DILocation(line: 9, column: 17, scope: !20)
+!24 = !DILocation(line: 10, column: 3, scope: !20)
+!25 = !DILocation(line: 11, column: 3, scope: !20)
+!26 = !DILocation(line: 12, column: 3, scope: !20)
+!27 = !DILocation(line: 13, column: 1, scope: !20)
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl
new file mode 100644
index 0000000000..4e09b770ec
--- /dev/null
+++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl
@@ -0,0 +1,12 @@
+// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s
+// REQUIRES: dxil-1-9
+
+// TODO: Implement lowering for dx::HitObject::MakeNop
+
+// CHECK-NOT: call
+
+[shader("raygeneration")]
+void main() {
+  dx::HitObject hit;
+  dx::HitObject::MakeNop();
+}
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl
new file mode 100644
index 0000000000..fd2fbc5974
--- /dev/null
+++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl
@@ -0,0 +1,24 @@
+// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s
+
+// CHECK: | |-CXXRecordDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit referenced class HitObject definition
+// CHECK-NEXT: | | |-FinalAttr {{[^ ]+}} <<invalid sloc>> Implicit final
+// CHECK-NEXT: | | |-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+// CHECK-NEXT: | | |-HLSLHitObjectAttr {{[^ ]+}} <<invalid sloc>> Implicit
+// CHECK-NEXT: | | |-FieldDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit h 'int'
+// CHECK-NEXT: | | |-CXXConstructorDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> used HitObject 'void ()'
+// CHECK-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 358
+// CHECK-NEXT: | | | `-HLSLCXXOverloadAttr {{[^ ]+}} <<invalid sloc>> Implicit
+
+// CHECK: | | |-FunctionTemplateDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> MakeNop
+// CHECK-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> class TResult
+// CHECK-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit MakeNop 'TResult () const' static
+// CHECK-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> used MakeNop 'dx::HitObject ()' static
+// CHECK-NEXT: | | |   |-TemplateArgument type 'dx::HitObject'
+// CHECK-NEXT: | | |   |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 358
+// CHECK-NEXT: | | |   `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+[shader("raygeneration")]
+void main() {
+  dx::HitObject hit;
+  dx::HitObject::MakeNop();
+}
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl
new file mode 100644
index 0000000000..8824cffaec
--- /dev/null
+++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl
@@ -0,0 +1,13 @@
+// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s
+
+// TODO: Implement lowering for dx::MaybeReorderThread
+
+// CHECK-NOT:   call
+
+[shader("raygeneration")]
+void main() {
+  dx::HitObject hit;
+  dx::MaybeReorderThread(hit);
+  dx::MaybeReorderThread(hit, 0xf1, 3);
+  dx::MaybeReorderThread(0xf2, 7);
+}
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl
new file mode 100644
index 0000000000..d570ef021f
--- /dev/null
+++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl
@@ -0,0 +1,28 @@
+// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s
+
+// CHECK: |-FunctionDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit used MaybeReorderThread 'void (dx::HitObject)' extern
+// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> HitObject 'dx::HitObject':'dx::HitObject'
+// CHECK-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 359
+// CHECK-NEXT: | `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+// CHECK: |-FunctionDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit used MaybeReorderThread 'void (dx::HitObject, unsigned int, unsigned int)' extern
+// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> HitObject 'dx::HitObject':'dx::HitObject'
+// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> CoherenceHint 'unsigned int'
+// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> NumCoherenceHintBitsFromLSB 'unsigned int'
+// CHECK-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 359
+// CHECK-NEXT: | `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+// CHECK: `-FunctionDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit used MaybeReorderThread 'void (unsigned int, unsigned int)' extern
+// CHECK-NEXT:   |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> CoherenceHint 'unsigned int'
+// CHECK-NEXT:   |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> NumCoherenceHintBitsFromLSB 'unsigned int'
+// CHECK-NEXT:   |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 359
+// CHECK-NEXT:   `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+
+[shader("raygeneration")]
+void main() {
+  dx::HitObject hit;
+  dx::MaybeReorderThread(hit);
+  dx::MaybeReorderThread(hit, 0xf1, 3);
+  dx::MaybeReorderThread(0xf2, 7);
+}
\ No newline at end of file
diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/hitobject_reorder.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/hitobject_reorder.hlsl
new file mode 100644
index 0000000000..fa3ab68506
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/hitobject_reorder.hlsl
@@ -0,0 +1,10 @@
+// RUN: %dxc -T lib_6_9 -E main %s -verify
+
+// expected-no-diagnostics
+
+[shader("raygeneration")] void main() {
+  dx::HitObject hit;
+  dx::MaybeReorderThread(hit);
+  dx::MaybeReorderThread(hit, 0xf1, 3);
+  dx::MaybeReorderThread(0xf2, 7);
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-entry-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-entry-errors.hlsl
new file mode 100644
index 0000000000..3c97ea0a77
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-entry-errors.hlsl
@@ -0,0 +1,62 @@
+// RUN: %dxc -T lib_6_9 %s -verify
+
+struct [raypayload] Payload
+{
+    float elem
+          : write(caller,closesthit,anyhit,miss)
+          : read(caller,closesthit,anyhit,miss);
+};
+
+struct Attribs { float2 barys; };
+void CallReorder()
+{
+// expected-error@+6{{dx::MaybeReorderThread is unavailable in shader stage 'compute' (requires 'raygeneration')}}
+// expected-error@+5{{dx::MaybeReorderThread is unavailable in shader stage 'callable' (requires 'raygeneration')}}
+// expected-error@+4{{dx::MaybeReorderThread is unavailable in shader stage 'intersection' (requires 'raygeneration')}}
+// expected-error@+3{{dx::MaybeReorderThread is unavailable in shader stage 'anyhit' (requires 'raygeneration')}}
+// expected-error@+2{{dx::MaybeReorderThread is unavailable in shader stage 'closesthit' (requires 'raygeneration')}}
+// expected-error@+1{{dx::MaybeReorderThread is unavailable in shader stage 'miss' (requires 'raygeneration')}}
+  dx::MaybeReorderThread(0,0);
+}
+
+// expected-note@+3{{entry function defined here}}
+[shader("compute")]
+[numthreads(4,4,4)]
+void mainReorderCS(uint ix : SV_GroupIndex, uint3 id : SV_GroupThreadID) {
+  CallReorder();
+}
+
+[shader("raygeneration")]
+void mainReorderRG() {
+  CallReorder();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("callable")]
+void mainReorderCALL(inout Attribs attrs) {
+  CallReorder();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("intersection")]
+void mainReorderIS() {
+  CallReorder();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("anyhit")]
+void mainReorderAH(inout Payload pld, in Attribs attrs) {
+  CallReorder();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("closesthit")]
+void mainReorderCH(inout Payload pld, in Attribs attrs) {
+  CallReorder();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("miss")]
+void mainReorderMS(inout Payload pld) {
+  CallReorder();
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-unavailable-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-unavailable-pre-sm69.hlsl
new file mode 100644
index 0000000000..db2d0fd2e3
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-unavailable-pre-sm69.hlsl
@@ -0,0 +1,9 @@
+// RUN: %dxc -T lib_6_8 %s -verify
+
+// Check that inciwMaybeReorderThread is unavailable pre SM 6.9.
+
+[shader("raygeneration")]
+void main() {
+  // expected-error@+1{{intrinsic dx::MaybeReorderThread potentially used by ''main'' requires shader model 6.9 or greater}}
+  dx::MaybeReorderThread(15u, 4u);
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/namespace/dx-namespace-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/namespace/dx-namespace-pre-sm69.hlsl
new file mode 100644
index 0000000000..e23f398538
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/namespace/dx-namespace-pre-sm69.hlsl
@@ -0,0 +1,8 @@
+// RUN: %dxc -T lib_6_8 %s -verify
+
+// expected-no-diagnostics
+using namespace dx;
+
+[shader("raygeneration")]
+void main() {
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-entry-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-entry-errors.hlsl
new file mode 100644
index 0000000000..44afcf47e7
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-entry-errors.hlsl
@@ -0,0 +1,58 @@
+// RUN: %dxc -T lib_6_9 %s -verify
+
+struct [raypayload] Payload
+{
+    float elem
+          : write(caller,anyhit,closesthit,miss)
+          : read(caller,anyhit,closesthit,miss);
+};
+
+struct Attribs { float2 barys; };
+
+dx::HitObject UseHitObject() {
+  return dx::HitObject::MakeNop();
+}
+
+// expected-note@+3{{entry function defined here}}
+[shader("compute")]
+[numthreads(4,4,4)]
+void mainHitCS(uint ix : SV_GroupIndex, uint3 id : SV_GroupThreadID) {
+// expected-error@-7{{dx::HitObject is unavailable in shader stage 'compute' (requires 'raygeneration', 'closesthit' or 'miss')}}
+  UseHitObject();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("callable")]
+void mainHitCALL(inout Attribs attrs) {
+// expected-error@-14{{dx::HitObject is unavailable in shader stage 'callable' (requires 'raygeneration', 'closesthit' or 'miss')}}
+  UseHitObject();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("intersection")]
+void mainHitIS() {
+// expected-error@-21{{dx::HitObject is unavailable in shader stage 'intersection' (requires 'raygeneration', 'closesthit' or 'miss')}}
+  UseHitObject();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("anyhit")]
+void mainHitAH(inout Payload pld, in Attribs attrs) {
+// expected-error@-28{{dx::HitObject is unavailable in shader stage 'anyhit' (requires 'raygeneration', 'closesthit' or 'miss')}}
+  UseHitObject();
+}
+
+[shader("raygeneration")]
+void mainHitRG() {
+  UseHitObject();
+}
+
+[shader("closesthit")]
+void mainHitCH(inout Payload pld, in Attribs attrs) {
+  UseHitObject();
+}
+
+[shader("miss")]
+void mainHitMS(inout Payload pld) {
+  UseHitObject();
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl
new file mode 100644
index 0000000000..baa3a07a5b
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl
@@ -0,0 +1,4 @@
+// RUN: %dxc -T lib_6_9 %s -verify
+
+// expected-error@+1{{'dx::HitObject' is an object and cannot be used as a type parameter}}
+RWStructuredBuffer<dx::HitObject> InvalidBuffer;
diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unavailable-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unavailable-pre-sm69.hlsl
new file mode 100644
index 0000000000..59c8dfbe2f
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unavailable-pre-sm69.hlsl
@@ -0,0 +1,11 @@
+// RUN: %dxc -T lib_6_8 %s -verify
+
+// Check that the HitObject is unavailable pre SM 6.9.
+
+[shader("raygeneration")]
+void main() {
+  // expected-error@+3{{intrinsic dx::HitObject::MakeNop potentially used by ''main'' requires shader model 6.9 or greater}}
+  // expected-error@+2{{potential misuse of built-in type 'dx::HitObject' in shader model lib_6_8; introduced in shader model 6.9}}
+  // expected-error@+1{{potential misuse of built-in type 'dx::HitObject' in shader model lib_6_8; introduced in shader model 6.9}}
+  dx::HitObject hit = dx::HitObject::MakeNop();
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unsupported-vs.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unsupported-vs.hlsl
new file mode 100644
index 0000000000..4b6c45806b
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unsupported-vs.hlsl
@@ -0,0 +1,8 @@
+// RUN: %dxc -T vs_6_9 %s -verify
+
+// expected-note@+1{{entry function defined here}}
+float main(RayDesc rayDesc: RAYDESC) : OUT {
+// expected-error@+1{{dx::HitObject is unavailable in shader stage 'vertex' (requires 'raygeneration', 'closesthit' or 'miss')}}
+  dx::HitObject::MakeNop();
+  return 0.f;
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-using-namespace.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-using-namespace.hlsl
new file mode 100644
index 0000000000..c266d81ddb
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-using-namespace.hlsl
@@ -0,0 +1,36 @@
+// RUN: %dxc -T lib_6_9 %s -verify
+
+// This test checks that HitObject can be used with 'using namespace dx' instead of explicit namespace prefix
+// expected-no-diagnostics
+
+struct [raypayload] Payload
+{
+    float elem
+          : write(caller,anyhit,closesthit,miss)
+          : read(caller,anyhit,closesthit,miss);
+};
+
+struct Attribs { float2 barys; };
+
+using namespace dx;
+
+[shader("raygeneration")]
+void main()
+{
+  HitObject hit;
+  MaybeReorderThread(hit);
+}
+
+[shader("closesthit")]
+void closestHit(inout Payload pld, in Attribs attrs)
+{
+  // Create a HitObject
+  HitObject hit;
+}
+
+[shader("miss")]
+void missShader(inout Payload pld)
+{
+  // Also test using a static method
+  HitObject hit = HitObject::MakeNop();
+} 
diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-without-namespace.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-without-namespace.hlsl
new file mode 100644
index 0000000000..cb7a24e1c7
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-without-namespace.hlsl
@@ -0,0 +1,39 @@
+// RUN: %dxc -T lib_6_9 %s -verify
+
+struct [raypayload] Payload
+{
+    float elem
+          : write(caller,anyhit,closesthit,miss)
+          : read(caller,anyhit,closesthit,miss);
+};
+
+struct Attribs { float2 barys; };
+
+[shader("raygeneration")]
+void main()
+{
+  // expected-error@+1{{unknown type name 'HitObject'}}
+  HitObject hit;
+}
+
+[shader("closesthit")]
+void closestHit(inout Payload pld, in Attribs attrs)
+{
+  // expected-error@+1{{unknown type name 'HitObject'}}
+  HitObject hit;
+}
+
+[shader("miss")]
+void missShader(inout Payload pld)
+{
+  // expected-error@+1{{unknown type name 'HitObject'}}
+  HitObject hit;
+}
+
+// Also test API methods
+[shader("raygeneration")]
+void main2()
+{
+  // expected-error@+1{{use of undeclared identifier 'HitObject'}}
+  HitObject::MakeNop();
+} 
\ No newline at end of file
diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/maybereorderthread-without-namespace.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/maybereorderthread-without-namespace.hlsl
new file mode 100644
index 0000000000..edf7e4fa71
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/maybereorderthread-without-namespace.hlsl
@@ -0,0 +1,31 @@
+// RUN: %dxc -T lib_6_9 %s -verify
+
+struct [raypayload] Payload
+{
+    float elem
+          : write(caller,anyhit,closesthit,miss)
+          : read(caller,anyhit,closesthit,miss);
+};
+
+struct Attribs { float2 barys; };
+
+[shader("raygeneration")]
+void main()
+{
+  // expected-error@+1{{use of undeclared identifier 'MaybeReorderThread'}}
+  MaybeReorderThread(1);
+}
+
+[shader("closesthit")]
+void closestHit(inout Payload pld, in Attribs attrs)
+{
+  // expected-error@+1{{use of undeclared identifier 'MaybeReorderThread'}}
+  MaybeReorderThread(2);
+}
+
+[shader("miss")]
+void missShader(inout Payload pld)
+{
+  // expected-error@+1{{use of undeclared identifier 'MaybeReorderThread'}}
+  MaybeReorderThread(3);
+} 
diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt
index 51ea6b3176..0ca5b0716b 100644
--- a/utils/hct/gen_intrin_main.txt
+++ b/utils/hct/gen_intrin_main.txt
@@ -1089,6 +1089,17 @@ uint [[ro]] CommittedInstanceContributionToHitGroupIndex();
 
 } namespace
 
+// Shader Execution Reordering
+namespace DxHitObjectMethods {
+    DxHitObject [[static,class_prefix,min_sm=6.9]] MakeNop();
+} namespace
+
+namespace DxIntrinsics {
+void [[min_sm=6.9]] MaybeReorderThread(in DxHitObject HitObject);
+void [[min_sm=6.9]] MaybeReorderThread(in uint CoherenceHint, in uint NumCoherenceHintBitsFromLSB);
+void [[min_sm=6.9]] MaybeReorderThread(in DxHitObject HitObject, in uint CoherenceHint, in uint NumCoherenceHintBitsFromLSB);
+} namespace
+
 // Work Graphs objects and methods
 
 // EmptyNodeInput
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 6f4611db32..0a9ab062a3 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -8209,6 +8209,8 @@ def __init__(
         overload_idx,
         hidden,
         min_shader_model,
+        static_member,
+        class_prefix,
     ):
         self.name = name  # Function name
         self.idx = idx  # Unique number within namespace
@@ -8217,14 +8219,27 @@ def __init__(
         self.ns = ns  # Function namespace
         self.ns_idx = ns_idx  # Namespace index
         self.doc = doc  # Documentation
-        id_prefix = "IOP" if ns == "Intrinsics" else "MOP"
+        id_prefix = "IOP" if ns.endswith("Intrinsics") else "MOP"
+
+        class_name = None
+        if ns.endswith("Methods"):
+            class_name = ns[0 : -len("Methods")]
+
         # SPIR-V Change Starts
         if ns == "VkIntrinsics":
             name = "Vk" + name
             self.name = "Vk" + self.name
             id_prefix = "IOP"
         # SPIR-V Change Ends
-        self.enum_name = "%s_%s" % (id_prefix, name)  # enum name
+        if ns.startswith("Dx"):
+            if not class_prefix:
+                name = "Dx" + name
+            self.name = name
+
+        if class_prefix:
+            self.enum_name = "%s_%s_%s" % (id_prefix, class_name, name)
+        else:
+            self.enum_name = "%s_%s" % (id_prefix, name)
         self.readonly = ro  # Only read memory
         self.readnone = rn  # Not read memory
         self.argmemonly = amo  # Only accesses memory through argument pointers
@@ -8242,6 +8257,7 @@ def __init__(
             self.min_shader_model = (min_shader_model[0] << 4) | (
                 min_shader_model[1] & 0x0F
             )
+        self.static_member = static_member  # HLSL static member function
         self.key = (
             ("%3d" % ns_idx)
             + "!"
@@ -8355,6 +8371,7 @@ def __init__(self, intrinsic_defs, opcode_data):
             "AnyNodeOutputRecord": "LICOMPTYPE_ANY_NODE_OUTPUT_RECORD",
             "GroupNodeOutputRecords": "LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS",
             "ThreadNodeOutputRecords": "LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS",
+            "DxHitObject": "LICOMPTYPE_HIT_OBJECT",
         }
 
         self.trans_rowcol = {"r": "IA_R", "c": "IA_C", "r2": "IA_R2", "c2": "IA_C2"}
@@ -8414,7 +8431,7 @@ def load_intrinsics(self, intrinsic_defs):
             r"""(
             sampler\w* | string |
             (?:RW)?(?:Texture\w*|ByteAddressBuffer) |
-            acceleration_struct | ray_desc |
+            acceleration_struct | ray_desc | RayQuery | DxHitObject |
             Node\w* | RWNode\w* | EmptyNode\w* |
             AnyNodeOutput\w* | NodeOutputRecord\w* | GroupShared\w*
             $)""",
@@ -8620,7 +8637,9 @@ def process_attr(attr):
             readonly = False  # Only read memory
             readnone = False  # Not read memory
             argmemonly = False  # Only reads memory through pointer arguments
+            static_member = False  # Static member function
             is_wave = False
+            class_prefix = False  # Insert class name as enum_prefix
             # Is wave-sensitive
             unsigned_op = ""  # Unsigned opcode if exist
             overload_param_index = (
@@ -8646,6 +8665,12 @@ def process_attr(attr):
                 if a == "hidden":
                     hidden = True
                     continue
+                if a == "static":
+                    static_member = True
+                    continue
+                if a == "class_prefix":
+                    class_prefix = True
+                    continue
 
                 assign = a.split("=")
 
@@ -8689,6 +8714,8 @@ def process_attr(attr):
                 overload_param_index,
                 hidden,
                 min_shader_model,
+                static_member,
+                class_prefix,
             )
 
         current_namespace = None
@@ -8737,6 +8764,8 @@ def process_attr(attr):
                     overload_param_index,
                     hidden,
                     min_shader_model,
+                    static_member,
+                    class_prefix,
                 ) = process_attr(attr)
                 # Add an entry for this intrinsic.
                 if bracket_cleanup_re.search(opts):
@@ -8753,6 +8782,8 @@ def process_attr(attr):
                 for in_arg in in_args:
                     args.append(process_arg(in_arg, arg_idx, args, name))
                     arg_idx += 1
+                if class_prefix:
+                    assert current_namespace.endswith("Methods")
                 # We have to process the return type description last
                 # to match the compiler's handling of it and allow
                 # the return type to match an input type.
@@ -8776,6 +8807,8 @@ def process_attr(attr):
                         overload_param_index,
                         hidden,
                         min_shader_model,
+                        static_member,
+                        class_prefix,
                     )
                 )
                 num_entries += 1
diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py
index 2a0359d274..4580e6c12c 100644
--- a/utils/hct/hctdb_instrhelp.py
+++ b/utils/hct/hctdb_instrhelp.py
@@ -620,6 +620,7 @@ def print_opfunc_table(self):
             "noderecordhandle": "A(pNodeRecordHandle);",
             "nodeproperty": "A(nodeProperty);",
             "noderecordproperty": "A(nodeRecordProperty);",
+            "hit_object": "A(pHit);",
         }
         last_category = None
         for i in self.instrs:
@@ -985,15 +986,11 @@ def get_hlsl_intrinsics():
     last_ns = ""
     ns_table = ""
     is_vk_table = False  # SPIRV Change
-    id_prefix = ""
     arg_idx = 0
     opcode_namespace = db.opcode_namespace
     for i in sorted(db.intrinsics, key=lambda x: x.key):
         if last_ns != i.ns:
             last_ns = i.ns
-            id_prefix = (
-                "IOP" if last_ns == "Intrinsics" or last_ns == "VkIntrinsics" else "MOP"
-            )  # SPIRV Change
             if len(ns_table):
                 result += ns_table + "};\n"
                 # SPIRV Change Starts
@@ -1017,14 +1014,15 @@ def get_hlsl_intrinsics():
             flags.append("INTRIN_FLAG_READ_NONE")
         if i.wave:
             flags.append("INTRIN_FLAG_IS_WAVE")
+        if i.static_member:
+            flags.append("INTRIN_FLAG_STATIC_MEMBER")
         if flags:
             flags = " | ".join(flags)
         else:
             flags = "0"
-        ns_table += "    {(UINT)%s::%s_%s, %s, 0x%x, %d, %d, g_%s_Args%s},\n" % (
+        ns_table += "    {(UINT)%s::%s, %s, 0x%x, %d, %d, g_%s_Args%s},\n" % (
             opcode_namespace,
-            id_prefix,
-            i.name,
+            i.enum_name,
             flags,
             i.min_shader_model,
             i.overload_param_index,
diff --git a/utils/hct/hlsl_intrinsic_opcodes.json b/utils/hct/hlsl_intrinsic_opcodes.json
index 48a0b74c17..4c85069488 100644
--- a/utils/hct/hlsl_intrinsic_opcodes.json
+++ b/utils/hct/hlsl_intrinsic_opcodes.json
@@ -1,6 +1,6 @@
 {
   "IntrinsicOpCodes": {
-    "Num_Intrinsics": 358,
+    "Num_Intrinsics": 360,
     "IOP_AcceptHitAndEndSearch": 0,
     "IOP_AddUint64": 1,
     "IOP_AllMemoryBarrier": 2,
@@ -358,6 +358,8 @@
     "IOP_umul": 354,
     "IOP_usign": 355,
     "MOP_InterlockedUMax": 356,
-    "MOP_InterlockedUMin": 357
+    "MOP_InterlockedUMin": 357,
+    "MOP_DxHitObject_MakeNop": 358,
+    "IOP_DxMaybeReorderThread": 359
   }
 }

From 7269298ed01919ad7cb0592f51cdf896a5e3ee4a Mon Sep 17 00:00:00 2001
From: Simon Moll <smoll@nvidia.com>
Date: Tue, 25 Mar 2025 17:28:21 +0100
Subject: [PATCH 47/88] [SER] HitObject_MakeNop|Miss DXIL opcodes and
 verification tests (#7201)

- DXIL opcodes for HitObject_MakeNop and HitObject_MakeMiss
- DXV validation test
---
 include/dxc/DXIL/DxilConstants.h              | 12 ++-
 include/dxc/DXIL/DxilInstructions.h           | 74 +++++++++++++++++++
 include/dxc/DXIL/DxilOperations.h             |  2 +
 lib/DXIL/DxilOperations.cpp                   | 64 ++++++++++++----
 .../ser_hitobject_make_passing.ll             | 46 ++++++++++++
 utils/hct/hctdb.py                            | 53 ++++++++++++-
 6 files changed, 229 insertions(+), 22 deletions(-)
 create mode 100644 tools/clang/test/DXILValidation/ser_hitobject_make_passing.ll

diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
index 54131f3948..0a9c6a4ffd 100644
--- a/include/dxc/DXIL/DxilConstants.h
+++ b/include/dxc/DXIL/DxilConstants.h
@@ -520,9 +520,7 @@ enum class OpCode : unsigned {
   ReservedB27 = 289, // reserved
   ReservedB28 = 290, // reserved
   ReservedB29 = 291, // reserved
-  ReservedB3 = 265,  // reserved
   ReservedB30 = 292, // reserved
-  ReservedB4 = 266,  // reserved
   ReservedB5 = 267,  // reserved
   ReservedB6 = 268,  // reserved
   ReservedB7 = 269,  // reserved
@@ -909,6 +907,10 @@ enum class OpCode : unsigned {
   WriteSamplerFeedbackLevel = 176, // updates a feedback texture for a sampling
                                    // operation with a mipmap-level offset
 
+  // Shader Execution Reordering
+  HitObject_MakeMiss = 265, // Creates a new HitObject representing a miss
+  HitObject_MakeNop = 266,  // Creates an empty nop HitObject
+
   // Synchronization
   AtomicBinOp = 78,           // performs an atomic operation on two operands
   AtomicCompareExchange = 79, // atomic compare and exchange to memory
@@ -1281,6 +1283,10 @@ enum class OpCodeClass : unsigned {
   WriteSamplerFeedbackGrad,
   WriteSamplerFeedbackLevel,
 
+  // Shader Execution Reordering
+  HitObject_MakeMiss,
+  HitObject_MakeNop,
+
   // Synchronization
   AtomicBinOp,
   AtomicCompareExchange,
@@ -1345,7 +1351,7 @@ enum class OpCodeClass : unsigned {
   NumOpClasses_Dxil_1_7 = 153,
   NumOpClasses_Dxil_1_8 = 174,
 
-  NumOpClasses = 175 // exclusive last value of enumeration
+  NumOpClasses = 177 // exclusive last value of enumeration
 };
 // OPCODECLASS-ENUM:END
 
diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h
index 11ab8e3b8d..6a28a2a806 100644
--- a/include/dxc/DXIL/DxilInstructions.h
+++ b/include/dxc/DXIL/DxilInstructions.h
@@ -8813,5 +8813,79 @@ struct DxilInst_AllocateRayQuery2 {
                              llvm::APInt(32, (uint64_t)val)));
   }
 };
+
+/// This instruction Creates a new HitObject representing a miss
+struct DxilInst_HitObject_MakeMiss {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_MakeMiss(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_MakeMiss);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (11 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_RayFlags = 1,
+    arg_MissShaderIndex = 2,
+    arg_Origin_X = 3,
+    arg_Origin_Y = 4,
+    arg_Origin_Z = 5,
+    arg_TMin = 6,
+    arg_Direction_X = 7,
+    arg_Direction_Y = 8,
+    arg_Direction_Z = 9,
+    arg_TMax = 10,
+  };
+  // Accessors
+  llvm::Value *get_RayFlags() const { return Instr->getOperand(1); }
+  void set_RayFlags(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_MissShaderIndex() const { return Instr->getOperand(2); }
+  void set_MissShaderIndex(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_Origin_X() const { return Instr->getOperand(3); }
+  void set_Origin_X(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_Origin_Y() const { return Instr->getOperand(4); }
+  void set_Origin_Y(llvm::Value *val) { Instr->setOperand(4, val); }
+  llvm::Value *get_Origin_Z() const { return Instr->getOperand(5); }
+  void set_Origin_Z(llvm::Value *val) { Instr->setOperand(5, val); }
+  llvm::Value *get_TMin() const { return Instr->getOperand(6); }
+  void set_TMin(llvm::Value *val) { Instr->setOperand(6, val); }
+  llvm::Value *get_Direction_X() const { return Instr->getOperand(7); }
+  void set_Direction_X(llvm::Value *val) { Instr->setOperand(7, val); }
+  llvm::Value *get_Direction_Y() const { return Instr->getOperand(8); }
+  void set_Direction_Y(llvm::Value *val) { Instr->setOperand(8, val); }
+  llvm::Value *get_Direction_Z() const { return Instr->getOperand(9); }
+  void set_Direction_Z(llvm::Value *val) { Instr->setOperand(9, val); }
+  llvm::Value *get_TMax() const { return Instr->getOperand(10); }
+  void set_TMax(llvm::Value *val) { Instr->setOperand(10, val); }
+};
+
+/// This instruction Creates an empty nop HitObject
+struct DxilInst_HitObject_MakeNop {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_MakeNop(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_MakeNop);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+};
 // INSTR-HELPER:END
 } // namespace hlsl
diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h
index 3514701327..e522e06204 100644
--- a/include/dxc/DXIL/DxilOperations.h
+++ b/include/dxc/DXIL/DxilOperations.h
@@ -64,6 +64,7 @@ class OP {
   void RemoveFunction(llvm::Function *F);
   llvm::LLVMContext &GetCtx() { return m_Ctx; }
   llvm::Type *GetHandleType() const;
+  llvm::Type *GetHitObjectType() const;
   llvm::Type *GetNodeHandleType() const;
   llvm::Type *GetNodeRecordHandleType() const;
   llvm::Type *GetResourcePropertiesType() const;
@@ -146,6 +147,7 @@ class OP {
   llvm::Module *m_pModule;
 
   llvm::Type *m_pHandleType;
+  llvm::Type *m_pHitObjectType;
   llvm::Type *m_pNodeHandleType;
   llvm::Type *m_pNodeRecordHandleType;
   llvm::Type *m_pResourcePropertiesType;
diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp
index b3e552da18..86049fee9c 100644
--- a/lib/DXIL/DxilOperations.cpp
+++ b/lib/DXIL/DxilOperations.cpp
@@ -2670,24 +2670,29 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
          false},
         Attribute::None,
     },
+
+    // Shader Execution Reordering void,     h,     f,     d,    i1,    i8, i16,
+    // i32,   i64,   udt,   obj ,  function attribute
     {
-        OC::ReservedB3,
-        "ReservedB3",
-        OCC::Reserved,
-        "reserved",
+        OC::HitObject_MakeMiss,
+        "HitObject_MakeMiss",
+        OCC::HitObject_MakeMiss,
+        "hitObject_MakeMiss",
         {true, false, false, false, false, false, false, false, false, false,
          false},
-        Attribute::None,
+        Attribute::ReadNone,
     },
     {
-        OC::ReservedB4,
-        "ReservedB4",
-        OCC::Reserved,
-        "reserved",
+        OC::HitObject_MakeNop,
+        "HitObject_MakeNop",
+        OCC::HitObject_MakeNop,
+        "hitObject_MakeNop",
         {true, false, false, false, false, false, false, false, false, false,
          false},
-        Attribute::None,
+        Attribute::ReadNone,
     },
+
+    //                                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
     {
         OC::ReservedB5,
         "ReservedB5",
@@ -3750,6 +3755,14 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
     minor = 9;
     return;
   }
+  // Instructions: HitObject_MakeMiss=265, HitObject_MakeNop=266
+  if ((265 <= op && op <= 266)) {
+    major = 6;
+    minor = 9;
+    mask =
+        SFLAG(Library) | SFLAG(RayGeneration) | SFLAG(ClosestHit) | SFLAG(Miss);
+    return;
+  }
   // OPCODE-SMMASK:END
 }
 
@@ -3851,6 +3864,8 @@ OP::OP(LLVMContext &Ctx, Module *pModule)
 
   m_pHandleType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx),
                                         "dx.types.Handle", pModule);
+  m_pHitObjectType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx),
+                                           "dx.types.HitObject", pModule);
   m_pNodeHandleType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx),
                                             "dx.types.NodeHandle", pModule);
   m_pNodeRecordHandleType = GetOrCreateStructType(
@@ -3993,6 +4008,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
   Type *pF64 = Type::getDoubleTy(m_Ctx);
   Type *pSDT = GetSplitDoubleType(); // Split double type.
   Type *p4I32 = GetFourI32Type();    // 4 i32s in a struct.
+  Type *pHit = GetHitObjectType();
 
   Type *udt = pOverloadType;
   Type *obj = pOverloadType;
@@ -5871,14 +5887,28 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(pV);
     A(pI32);
     break;
-  case OpCode::ReservedB3:
-    A(pV);
+
+    // Shader Execution Reordering
+  case OpCode::HitObject_MakeMiss:
+    A(pHit);
+    A(pI32);
+    A(pI32);
     A(pI32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
     break;
-  case OpCode::ReservedB4:
-    A(pV);
+  case OpCode::HitObject_MakeNop:
+    A(pHit);
     A(pI32);
     break;
+
+    //
   case OpCode::ReservedB5:
     A(pV);
     A(pI32);
@@ -6288,8 +6318,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::ReservedB0:
   case OpCode::ReservedB1:
   case OpCode::ReservedB2:
-  case OpCode::ReservedB3:
-  case OpCode::ReservedB4:
+  case OpCode::HitObject_MakeMiss:
+  case OpCode::HitObject_MakeNop:
   case OpCode::ReservedB5:
   case OpCode::ReservedB6:
   case OpCode::ReservedB7:
@@ -6431,6 +6461,8 @@ Type *OP::GetHandleType() const { return m_pHandleType; }
 
 Type *OP::GetNodeHandleType() const { return m_pNodeHandleType; }
 
+Type *OP::GetHitObjectType() const { return m_pHitObjectType; }
+
 Type *OP::GetNodeRecordHandleType() const { return m_pNodeRecordHandleType; }
 
 Type *OP::GetResourcePropertiesType() const {
diff --git a/tools/clang/test/DXILValidation/ser_hitobject_make_passing.ll b/tools/clang/test/DXILValidation/ser_hitobject_make_passing.ll
new file mode 100644
index 0000000000..88b71ff3e0
--- /dev/null
+++ b/tools/clang/test/DXILValidation/ser_hitobject_make_passing.ll
@@ -0,0 +1,46 @@
+; RUN: %dxv %s | FileCheck %s
+
+; CHECK: Validation succeeded.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.HitObject = type { i8* }
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+  ; Test HitObject_MakeMiss (opcode 265)
+  %r265 = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 4, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03)  ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax)
+
+  ; Test HitObject_MakeNop (opcode 266)
+  %r266 = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)  ; HitObject_MakeNop()
+
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32, i32, i32, float, float, float, float, float, float, float, float) #1
+
+; Function Attrs: nounwind readnone
+declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.typeAnnotations = !{!2}
+!dx.entryPoints = !{!9, !11}
+
+!0 = !{i32 1, i32 9}
+!1 = !{!"lib", i32 6, i32 9}
+!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !3}
+!3 = !{!4}
+!4 = !{i32 1, !5, !5}
+!5 = !{}
+!9 = !{null, !"", null, null, !10}
+!10 = !{i32 0, i64 0}
+!11 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !12}
+!12 = !{i32 8, i32 7, i32 5, !13}
+!13 = !{i32 0}
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 0a9ab062a3..fc4c427580 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -699,6 +699,15 @@ def populate_categories_and_models(self):
             self.name_idx[i].category = "Extended Command Information"
             self.name_idx[i].shader_stages = ("vertex",)
             self.name_idx[i].shader_model = 6, 8
+        for i in ("HitObject_MakeMiss,HitObject_MakeNop").split(","):
+            self.name_idx[i].category = "Shader Execution Reordering"
+            self.name_idx[i].shader_model = 6, 9
+            self.name_idx[i].shader_stages = (
+                "library",
+                "raygeneration",
+                "closesthit",
+                "miss",
+            )
 
     def populate_llvm_instructions(self):
         # Add instructions that map to LLVM instructions.
@@ -5550,7 +5559,43 @@ def UFI(name, **mappings):
         next_op_idx = self.reserve_dxil_op_range("ReservedA", next_op_idx, 3)
 
         # Shader Execution Reordering
-        next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 31)
+        next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 3)
+
+        self.add_dxil_op(
+            "HitObject_MakeMiss",
+            next_op_idx,
+            "HitObject_MakeMiss",
+            "Creates a new HitObject representing a miss",
+            "v",
+            "rn",
+            [
+                db_dxil_param(0, "hit_object", "", "HitObject with a committed miss"),
+                db_dxil_param(2, "i32", "RayFlags", "ray flags"),
+                db_dxil_param(3, "i32", "MissShaderIndex", "Miss shader index"),
+                db_dxil_param(4, "f", "Origin_X", "Origin x of the ray"),
+                db_dxil_param(5, "f", "Origin_Y", "Origin y of the ray"),
+                db_dxil_param(6, "f", "Origin_Z", "Origin z of the ray"),
+                db_dxil_param(7, "f", "TMin", "Tmin of the ray"),
+                db_dxil_param(8, "f", "Direction_X", "Direction x of the ray"),
+                db_dxil_param(9, "f", "Direction_Y", "Direction y of the ray"),
+                db_dxil_param(10, "f", "Direction_Z", "Direction z of the ray"),
+                db_dxil_param(11, "f", "TMax", "Tmax of the ray"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_MakeNop",
+            next_op_idx,
+            "HitObject_MakeNop",
+            "Creates an empty nop HitObject",
+            "v",
+            "rn",
+            [db_dxil_param(0, "hit_object", "", "Empty nop HitObject")],
+        )
+        next_op_idx += 1
+
+        next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 26, 5)
 
         # Reserved block C
         next_op_idx = self.reserve_dxil_op_range("ReservedC", next_op_idx, 10)
@@ -8145,10 +8190,12 @@ def add_dxil_op_reserved(self, name, code_id):
         )
         self.instr.append(i)
 
-    def reserve_dxil_op_range(self, group_name, start_id, count):
+    def reserve_dxil_op_range(self, group_name, start_id, count, start_reserved_id=0):
         "Reserve a range of dxil opcodes for future use; returns next id"
         for i in range(0, count):
-            self.add_dxil_op_reserved("{0}{1}".format(group_name, i), start_id + i)
+            self.add_dxil_op_reserved(
+                "{0}{1}".format(group_name, start_reserved_id + i), start_id + i
+            )
         return start_id + count
 
     def get_instr_by_llvm_name(self, llvm_name):

From 33bc44a3d370a1f3f835079dc5049e9989b79d89 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Tue, 25 Mar 2025 10:20:31 -0700
Subject: [PATCH 48/88] Update github actions versions to enable coverage
 (#7183)

Coverage generation has failed because of the deprecation of versions of
upload-artifact before v4 which the version of upload-pages-artifact
that DXC used made use of. This bumps that and all other actions to the
latest versions.
---
 .github/workflows/coverage-gh-pages.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/coverage-gh-pages.yml b/.github/workflows/coverage-gh-pages.yml
index 4c7b2c2018..07e63584e3 100644
--- a/.github/workflows/coverage-gh-pages.yml
+++ b/.github/workflows/coverage-gh-pages.yml
@@ -26,11 +26,11 @@ jobs:
     timeout-minutes: 240
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           submodules: true
       - name: Setup Pages
-        uses: actions/configure-pages@v2
+        uses: actions/configure-pages@v5
       - name: Install dependencies
         run: sudo apt install -y ninja-build
       - name: Configure
@@ -44,7 +44,7 @@ jobs:
       - name: Force artifact permissions
         run: chmod -c -R +rX ${{github.workspace}}/build/report
       - name: Upload artifact
-        uses: actions/upload-pages-artifact@v1
+        uses: actions/upload-pages-artifact@v3
         with:
           path: ${{github.workspace}}/build/report
         
@@ -60,4 +60,4 @@ jobs:
     steps:
       - name: Deploy to GitHub Pages
         id: deployment
-        uses: actions/deploy-pages@v1
+        uses: actions/deploy-pages@v4

From 1eb83c777f8efc8e761f6cd83e52f6cb879deaac Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Wed, 26 Mar 2025 04:19:06 -0700
Subject: [PATCH 49/88] Allow native vectors for LLVM operations (#7155)

Disables various forms of scalarization and vector elimination to permit
vectors to pass through to final DXIL when used in native LLVM
operations and loading/storing.

Introduces a few vector manipulation llvm instructions to DXIL allowing
for them to appear in output DXIL.

Skips passes for 6.9 that scalarize, convert to arrays, or otherwise
eliminate vectors.
This eliminates the element-by-element extraction, application,
and reconstitution of the vectors to operators.
In many cases, this required plumbing the shader model information to
passes that didn't have it before and also the recreation of dxil version
information from metadata where necessary.

Many changes were needed for the MatrixBitcastLower pass related to
linking to avoid converting matrix vectors, but also to perform the
conversion if a shader was compiled for 6.9+, but then linked to a
earlier target.
This now adapts to the linker target to either preserve vectors for 6.9
or arrays for previous versions.
This requires running the DynamicIndexing VectorToArray pass during
linking since 6_x and 6_9+ will fail to run this in the initial compile,
but will still need to lower vectors to arrays. This required making the pass
particularly robust to different sources of version information as compiling,
linking, and running optimization in isolation each require retrieval from
a different source. The latter two sources are facilitated with a dxilutil function.

Ternary conditional/select operators were element extracted in codegen.
Removing this allows 6.9 to preserve the vectors, but also maintains
behavior for previous shader models because the operations get
scalarized later anyway. This was in the region of work to allow short
circuiting, but the effect of that is to introduce the select and skip the later
code that implements short circuiting for supported cases. Test confirm that
no short circuiting is introduced for native vectors.

Adds extensive tests for these operations using different types and
sizes and testing them appropriately. Booleans produce significantly
different code, so they get their own test.

Vec1s have some special treatment as they are not allowed in final dxil,
so they still need to be scalarized. This requires value specific conditionals
in transformation passes. Testing confirms that this is done.

Fixes #7123
---
 include/dxc/DXIL/DxilInstructions.h           |  36 +
 include/dxc/DXIL/DxilMetadataHelper.h         |   2 +
 include/dxc/DXIL/DxilUtil.h                   |   4 +
 lib/DXIL/DxilMetadataHelper.cpp               |  23 +-
 lib/DXIL/DxilUtil.cpp                         |  13 +
 lib/DxilValidation/DxilValidation.cpp         |  23 +-
 lib/HLSL/DxilLinker.cpp                       |   6 +
 lib/HLSL/HLMatrixBitcastLowerPass.cpp         |  60 +-
 lib/HLSL/HLModule.cpp                         |   3 +
 lib/Transforms/Scalar/LowerTypePasses.cpp     |  40 +-
 .../Scalar/ScalarReplAggregatesHLSL.cpp       |  56 +-
 lib/Transforms/Scalar/Scalarizer.cpp          |  43 +-
 tools/clang/lib/CodeGen/CGExprScalar.cpp      |  15 +-
 tools/clang/lib/Sema/SemaHLSL.cpp             |   8 +-
 .../hlsl/types/longvec-operators-bool.hlsl    | 464 +++++++++++
 .../hlsl/types/longvec-operators-int.hlsl     |  73 ++
 .../hlsl/types/longvec-operators-scalars.hlsl | 342 ++++++++
 .../types/longvec-operators-shortcircuit.hlsl |  57 ++
 .../hlsl/types/longvec-operators-vec1s.hlsl   | 479 +++++++++++
 .../hlsl/types/longvec-operators.hlsl         | 581 ++++++++++++++
 .../passes/longvec-alloca-gv-dynvec2array.ll  | 269 +++++++
 .../passes/longvec-alloca-gv-sroa.ll          | 324 ++++++++
 .../CodeGenDXIL/passes/longvec-alloca-gv.hlsl | 112 +++
 .../passes/longvec-operators-scalarizer.ll    | 660 ++++++++++++++++
 .../longvec-operators-vec1-scalarizer.ll      | 745 ++++++++++++++++++
 .../passes/longvec-operators-vec1.hlsl        | 425 ++++++++++
 .../CodeGenDXIL/passes/longvec-operators.hlsl | 420 ++++++++++
 .../passes/dxil/lower_type/vec_array_param.ll |   1 -
 tools/clang/unittests/HLSL/LinkerTest.cpp     |   5 +
 utils/hct/hctdb.py                            |  31 +
 30 files changed, 5242 insertions(+), 78 deletions(-)
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl

diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h
index 6a28a2a806..f8d9ae77f3 100644
--- a/include/dxc/DXIL/DxilInstructions.h
+++ b/include/dxc/DXIL/DxilInstructions.h
@@ -645,6 +645,42 @@ struct LlvmInst_VAArg {
   bool isAllowed() const { return false; }
 };
 
+/// This instruction extracts from vector
+struct LlvmInst_ExtractElement {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  LlvmInst_ExtractElement(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return Instr->getOpcode() == llvm::Instruction::ExtractElement;
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+};
+
+/// This instruction inserts into vector
+struct LlvmInst_InsertElement {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  LlvmInst_InsertElement(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return Instr->getOpcode() == llvm::Instruction::InsertElement;
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+};
+
+/// This instruction Shuffle two vectors
+struct LlvmInst_ShuffleVector {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  LlvmInst_ShuffleVector(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return Instr->getOpcode() == llvm::Instruction::ShuffleVector;
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+};
+
 /// This instruction extracts from aggregate
 struct LlvmInst_ExtractValue {
   llvm::Instruction *Instr;
diff --git a/include/dxc/DXIL/DxilMetadataHelper.h b/include/dxc/DXIL/DxilMetadataHelper.h
index fa13f6d766..9df155e6e7 100644
--- a/include/dxc/DXIL/DxilMetadataHelper.h
+++ b/include/dxc/DXIL/DxilMetadataHelper.h
@@ -427,6 +427,8 @@ class DxilMDHelper {
   // Dxil version.
   void EmitDxilVersion(unsigned Major, unsigned Minor);
   void LoadDxilVersion(unsigned &Major, unsigned &Minor);
+  static bool LoadDxilVersion(const llvm::Module *pModule, unsigned &Major,
+                              unsigned &Minor);
 
   // Validator version.
   void EmitValidatorVersion(unsigned Major, unsigned Minor);
diff --git a/include/dxc/DXIL/DxilUtil.h b/include/dxc/DXIL/DxilUtil.h
index 5652c56f50..ca8f2ac755 100644
--- a/include/dxc/DXIL/DxilUtil.h
+++ b/include/dxc/DXIL/DxilUtil.h
@@ -223,6 +223,10 @@ bool DeleteDeadAllocas(llvm::Function &F);
 llvm::Value *GEPIdxToOffset(llvm::GetElementPtrInst *GEP,
                             llvm::IRBuilder<> &Builder, hlsl::OP *OP,
                             const llvm::DataLayout &DL);
+
+// Passes back Dxil version of the given module on true return.
+bool LoadDxilVersion(const llvm::Module *M, unsigned &Major, unsigned &Minor);
+
 } // namespace dxilutil
 
 } // namespace hlsl
diff --git a/lib/DXIL/DxilMetadataHelper.cpp b/lib/DXIL/DxilMetadataHelper.cpp
index fdd6d6b946..19d199ee29 100644
--- a/lib/DXIL/DxilMetadataHelper.cpp
+++ b/lib/DXIL/DxilMetadataHelper.cpp
@@ -177,17 +177,28 @@ void DxilMDHelper::EmitDxilVersion(unsigned Major, unsigned Minor) {
   pDxilVersionMD->addOperand(MDNode::get(m_Ctx, MDVals));
 }
 
-void DxilMDHelper::LoadDxilVersion(unsigned &Major, unsigned &Minor) {
-  NamedMDNode *pDxilVersionMD = m_pModule->getNamedMetadata(kDxilVersionMDName);
-  IFTBOOL(pDxilVersionMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
-  IFTBOOL(pDxilVersionMD->getNumOperands() == 1, DXC_E_INCORRECT_DXIL_METADATA);
+// Load dxil version from metadata contained in pModule.
+// Returns true and passes result through
+// the dxil major/minor version params if valid.
+// Returns false if metadata is missing or invalid.
+bool DxilMDHelper::LoadDxilVersion(const Module *pModule, unsigned &Major,
+                                   unsigned &Minor) {
+  NamedMDNode *pDxilVersionMD = pModule->getNamedMetadata(kDxilVersionMDName);
+  IFRBOOL(pDxilVersionMD != nullptr, false);
+  IFRBOOL(pDxilVersionMD->getNumOperands() == 1, false);
 
   MDNode *pVersionMD = pDxilVersionMD->getOperand(0);
-  IFTBOOL(pVersionMD->getNumOperands() == kDxilVersionNumFields,
-          DXC_E_INCORRECT_DXIL_METADATA);
+  IFRBOOL(pVersionMD->getNumOperands() == kDxilVersionNumFields, false);
 
   Major = ConstMDToUint32(pVersionMD->getOperand(kDxilVersionMajorIdx));
   Minor = ConstMDToUint32(pVersionMD->getOperand(kDxilVersionMinorIdx));
+
+  return true;
+}
+
+void DxilMDHelper::LoadDxilVersion(unsigned &Major, unsigned &Minor) {
+  IFTBOOL(LoadDxilVersion(m_pModule, Major, Minor),
+          DXC_E_INCORRECT_DXIL_METADATA);
 }
 
 //
diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp
index 0a4fb1160a..966c2e189c 100644
--- a/lib/DXIL/DxilUtil.cpp
+++ b/lib/DXIL/DxilUtil.cpp
@@ -1415,5 +1415,18 @@ bool DeleteDeadAllocas(llvm::Function &F) {
   return Changed;
 }
 
+// Retrieve dxil version in the given module.
+// Where the module doesn't already have a Dxil module,
+// it identifies and returns the version info from the metatdata.
+// Returns false where none of that works, but that shouldn't happen much.
+bool LoadDxilVersion(const Module *M, unsigned &Major, unsigned &Minor) {
+  if (M->HasDxilModule()) {
+    M->GetDxilModule().GetShaderModel()->GetDxilVersion(Major, Minor);
+    return true;
+  }
+  // No module, try metadata.
+  return DxilMDHelper::LoadDxilVersion(M, Major, Minor);
+}
+
 } // namespace dxilutil
 } // namespace hlsl
diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
index 0a2001a745..4622256dfe 100644
--- a/lib/DxilValidation/DxilValidation.cpp
+++ b/lib/DxilValidation/DxilValidation.cpp
@@ -2193,6 +2193,9 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
     return true;
 
   if (Ty->isVectorTy()) {
+    if (Ty->getVectorNumElements() > 1 &&
+        ValCtx.DxilMod.GetShaderModel()->IsSM69Plus())
+      return true;
     ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoVector);
     return false;
   }
@@ -2669,6 +2672,23 @@ static bool IsLLVMInstructionAllowedForLib(Instruction &I,
   }
 }
 
+// Shader model specific checks for valid LLVM instructions.
+// Currently only checks for pre 6.9 usage of vector operations.
+// Returns false if shader model is pre 6.9 and I represents a vector
+// operation. Returns true otherwise.
+static bool IsLLVMInstructionAllowedForShaderModel(Instruction &I,
+                                                   ValidationContext &ValCtx) {
+  if (ValCtx.DxilMod.GetShaderModel()->IsSM69Plus())
+    return true;
+  unsigned OpCode = I.getOpcode();
+  if (OpCode == Instruction::InsertElement ||
+      OpCode == Instruction::ExtractElement ||
+      OpCode == Instruction::ShuffleVector)
+    return false;
+
+  return true;
+}
+
 static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
   bool SupportsMinPrecision =
       ValCtx.DxilMod.GetGlobalFlags() & DXIL::kEnableMinPrecision;
@@ -2691,7 +2711,8 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
       }
 
       // Instructions must be allowed.
-      if (!IsLLVMInstructionAllowed(I)) {
+      if (!IsLLVMInstructionAllowed(I) ||
+          !IsLLVMInstructionAllowedForShaderModel(I, ValCtx)) {
         if (!IsLLVMInstructionAllowedForLib(I, ValCtx)) {
           ValCtx.EmitInstrError(&I, ValidationRule::InstrAllowed);
           continue;
diff --git a/lib/HLSL/DxilLinker.cpp b/lib/HLSL/DxilLinker.cpp
index 68c83fc037..ca343662ab 100644
--- a/lib/HLSL/DxilLinker.cpp
+++ b/lib/HLSL/DxilLinker.cpp
@@ -1255,6 +1255,12 @@ void DxilLinkJob::RunPreparePass(Module &M) {
   // For static global handle.
   PM.add(createLowerStaticGlobalIntoAlloca());
 
+  // Change dynamic indexing vector to array where vectors aren't
+  // supported, but might be there from the initial compile.
+  if (!pSM->IsSM69Plus())
+    PM.add(
+        createDynamicIndexingVectorToArrayPass(false /* ReplaceAllVector */));
+
   // Remove MultiDimArray from function call arg.
   PM.add(createMultiDimArrayToOneDimArrayPass());
 
diff --git a/lib/HLSL/HLMatrixBitcastLowerPass.cpp b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
index 93ba3b9816..db20d8a324 100644
--- a/lib/HLSL/HLMatrixBitcastLowerPass.cpp
+++ b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
@@ -76,6 +76,7 @@ Type *TryLowerMatTy(Type *Ty) {
 }
 
 class MatrixBitcastLowerPass : public FunctionPass {
+  bool SupportsVectors = false;
 
 public:
   static char ID; // Pass identification, replacement for typeid
@@ -83,6 +84,9 @@ class MatrixBitcastLowerPass : public FunctionPass {
 
   StringRef getPassName() const override { return "Matrix Bitcast lower"; }
   bool runOnFunction(Function &F) override {
+    DxilModule &DM = F.getParent()->GetOrCreateDxilModule();
+    SupportsVectors = DM.GetShaderModel()->IsSM69Plus();
+
     bool bUpdated = false;
     std::unordered_set<BitCastInst *> matCastSet;
     for (auto blkIt = F.begin(); blkIt != F.end(); ++blkIt) {
@@ -100,7 +104,6 @@ class MatrixBitcastLowerPass : public FunctionPass {
       }
     }
 
-    DxilModule &DM = F.getParent()->GetOrCreateDxilModule();
     // Remove bitcast which has CallInst user.
     if (DM.GetShaderModel()->IsLib()) {
       for (auto it = matCastSet.begin(); it != matCastSet.end();) {
@@ -185,7 +188,7 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) {
     User *U = *(it++);
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
       Type *EltTy = GEP->getType()->getPointerElementType();
-      if (HLMatrixType::isa(EltTy)) {
+      if (HLMatrixType MatTy = HLMatrixType::dyn_cast(EltTy)) {
         // Change gep matrixArray, 0, index
         // into
         //   gep oneDimArray, 0, index * matSize
@@ -193,10 +196,11 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) {
         SmallVector<Value *, 2> idxList(GEP->idx_begin(), GEP->idx_end());
         DXASSERT(idxList.size() == 2,
                  "else not one dim matrix array index to matrix");
-
-        HLMatrixType MatTy = HLMatrixType::cast(EltTy);
-        Value *matSize = Builder.getInt32(MatTy.getNumElements());
-        idxList.back() = Builder.CreateMul(idxList.back(), matSize);
+        unsigned NumElts = MatTy.getNumElements();
+        if (!SupportsVectors || NumElts == 1) {
+          Value *MatSize = Builder.getInt32(NumElts);
+          idxList.back() = Builder.CreateMul(idxList.back(), MatSize);
+        }
         Value *NewGEP = Builder.CreateGEP(A, idxList);
         lowerMatrix(GEP, NewGEP);
         DXASSERT(GEP->user_empty(), "else lower matrix fail");
@@ -211,13 +215,23 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) {
     } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
       if (VectorType *Ty = dyn_cast<VectorType>(LI->getType())) {
         IRBuilder<> Builder(LI);
-        Value *zeroIdx = Builder.getInt32(0);
-        unsigned vecSize = Ty->getNumElements();
-        Value *NewVec = UndefValue::get(LI->getType());
-        for (unsigned i = 0; i < vecSize; i++) {
-          Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder);
-          Value *Elt = Builder.CreateLoad(GEP);
-          NewVec = Builder.CreateInsertElement(NewVec, Elt, i);
+        Value *NewVec = nullptr;
+        unsigned VecSize = Ty->getVectorNumElements();
+        if (SupportsVectors && VecSize > 1) {
+          // Create a replacement load using the vector pointer.
+          Instruction *NewLd = LI->clone();
+          unsigned VecIdx = NewLd->getNumOperands() - 1;
+          NewLd->setOperand(VecIdx, A);
+          Builder.Insert(NewLd);
+          NewVec = NewLd;
+        } else {
+          Value *zeroIdx = Builder.getInt32(0);
+          NewVec = UndefValue::get(LI->getType());
+          for (unsigned i = 0; i < VecSize; i++) {
+            Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder);
+            Value *Elt = Builder.CreateLoad(GEP);
+            NewVec = Builder.CreateInsertElement(NewVec, Elt, i);
+          }
         }
         LI->replaceAllUsesWith(NewVec);
         LI->eraseFromParent();
@@ -228,12 +242,20 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) {
       Value *V = ST->getValueOperand();
       if (VectorType *Ty = dyn_cast<VectorType>(V->getType())) {
         IRBuilder<> Builder(LI);
-        Value *zeroIdx = Builder.getInt32(0);
-        unsigned vecSize = Ty->getNumElements();
-        for (unsigned i = 0; i < vecSize; i++) {
-          Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder);
-          Value *Elt = Builder.CreateExtractElement(V, i);
-          Builder.CreateStore(Elt, GEP);
+        if (SupportsVectors && Ty->getVectorNumElements() > 1) {
+          // Create a replacement store using the vector pointer.
+          Instruction *NewSt = ST->clone();
+          unsigned VecIdx = NewSt->getNumOperands() - 1;
+          NewSt->setOperand(VecIdx, A);
+          Builder.Insert(NewSt);
+        } else {
+          Value *zeroIdx = Builder.getInt32(0);
+          unsigned vecSize = Ty->getNumElements();
+          for (unsigned i = 0; i < vecSize; i++) {
+            Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder);
+            Value *Elt = Builder.CreateExtractElement(V, i);
+            Builder.CreateStore(Elt, GEP);
+          }
         }
         ST->eraseFromParent();
       } else {
diff --git a/lib/HLSL/HLModule.cpp b/lib/HLSL/HLModule.cpp
index 037885c9d8..a67877ef3e 100644
--- a/lib/HLSL/HLModule.cpp
+++ b/lib/HLSL/HLModule.cpp
@@ -604,6 +604,9 @@ MDTuple *HLModule::EmitHLResources() {
 
 void HLModule::LoadHLResources(const llvm::MDOperand &MDO) {
   const llvm::MDTuple *pSRVs, *pUAVs, *pCBuffers, *pSamplers;
+  // No resources. Nothing to do.
+  if (MDO.get() == nullptr)
+    return;
   m_pMDHelper->GetDxilResources(MDO, pSRVs, pUAVs, pCBuffers, pSamplers);
 
   // Load SRV records.
diff --git a/lib/Transforms/Scalar/LowerTypePasses.cpp b/lib/Transforms/Scalar/LowerTypePasses.cpp
index feeb23a5da..d2438c7e22 100644
--- a/lib/Transforms/Scalar/LowerTypePasses.cpp
+++ b/lib/Transforms/Scalar/LowerTypePasses.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "dxc/DXIL/DxilConstants.h"
+#include "dxc/DXIL/DxilModule.h"
 #include "dxc/DXIL/DxilOperations.h"
 #include "dxc/DXIL/DxilUtil.h"
 #include "dxc/HLSL/HLModule.h"
@@ -180,10 +181,12 @@ bool LowerTypePass::runOnModule(Module &M) {
 namespace {
 class DynamicIndexingVectorToArray : public LowerTypePass {
   bool ReplaceAllVectors;
+  bool SupportsVectors;
 
 public:
   explicit DynamicIndexingVectorToArray(bool ReplaceAll = false)
-      : LowerTypePass(ID), ReplaceAllVectors(ReplaceAll) {}
+      : LowerTypePass(ID), ReplaceAllVectors(ReplaceAll),
+        SupportsVectors(false) {}
   static char ID; // Pass identification, replacement for typeid
   void applyOptions(PassOptions O) override;
   void dumpConfig(raw_ostream &OS) override;
@@ -194,6 +197,7 @@ class DynamicIndexingVectorToArray : public LowerTypePass {
   Type *lowerType(Type *Ty) override;
   Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override;
   StringRef getGlobalPrefix() override { return ".v"; }
+  void initialize(Module &M) override;
 
 private:
   bool HasVectorDynamicIndexing(Value *V);
@@ -207,6 +211,18 @@ class DynamicIndexingVectorToArray : public LowerTypePass {
   void ReplaceAddrSpaceCast(ConstantExpr *CE, Value *A, IRBuilder<> &Builder);
 };
 
+void DynamicIndexingVectorToArray::initialize(Module &M) {
+  // Set vector support according to available Dxil version.
+  // Use HLModule or metadata for version info.
+  // Otherwise retrieve from dxil module or metadata.
+  unsigned Major = 0, Minor = 0;
+  if (M.HasHLModule())
+    M.GetHLModule().GetShaderModel()->GetDxilVersion(Major, Minor);
+  else
+    dxilutil::LoadDxilVersion(&M, Major, Minor);
+  SupportsVectors = (Major == 1 && Minor >= 9);
+}
+
 void DynamicIndexingVectorToArray::applyOptions(PassOptions O) {
   GetPassOptionBool(O, "ReplaceAllVectors", &ReplaceAllVectors,
                     ReplaceAllVectors);
@@ -306,9 +322,21 @@ void DynamicIndexingVectorToArray::ReplaceStaticIndexingOnVector(Value *V) {
 }
 
 bool DynamicIndexingVectorToArray::needToLower(Value *V) {
+  bool MustReplaceVector = ReplaceAllVectors;
   Type *Ty = V->getType()->getPointerElementType();
-  if (dyn_cast<VectorType>(Ty)) {
-    if (isa<GlobalVariable>(V) || ReplaceAllVectors) {
+
+  if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+    // Array must be replaced even without dynamic indexing to remove vector
+    // type in dxil.
+    MustReplaceVector = true;
+    Ty = dxilutil::GetArrayEltTy(AT);
+  }
+
+  if (isa<VectorType>(Ty)) {
+    // Only needed for 2+ vectors where native vectors unsupported.
+    if (SupportsVectors && Ty->getVectorNumElements() > 1)
+      return false;
+    if (isa<GlobalVariable>(V) || MustReplaceVector) {
       return true;
     }
     // Don't lower local vector which only static indexing.
@@ -319,12 +347,6 @@ bool DynamicIndexingVectorToArray::needToLower(Value *V) {
       ReplaceStaticIndexingOnVector(V);
       return false;
     }
-  } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
-    // Array must be replaced even without dynamic indexing to remove vector
-    // type in dxil.
-    // TODO: optimize static array index in later pass.
-    Type *EltTy = dxilutil::GetArrayEltTy(AT);
-    return isa<VectorType>(EltTy);
   }
   return false;
 }
diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
index 0c3e13f608..ec17fce9c8 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
@@ -81,16 +81,18 @@ class SROA_Helper {
   static bool DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
                                   Type *&BrokenUpTy, uint64_t &NumInstances,
                                   IRBuilder<> &Builder, bool bFlatVector,
-                                  bool hasPrecise, DxilTypeSystem &typeSys,
-                                  const DataLayout &DL,
+                                  bool SupportsVectors, bool hasPrecise,
+                                  DxilTypeSystem &typeSys, const DataLayout &DL,
                                   SmallVector<Value *, 32> &DeadInsts,
                                   DominatorTree *DT);
 
-  static bool
-  DoScalarReplacement(GlobalVariable *GV, std::vector<Value *> &Elts,
-                      IRBuilder<> &Builder, bool bFlatVector, bool hasPrecise,
-                      DxilTypeSystem &typeSys, const DataLayout &DL,
-                      SmallVector<Value *, 32> &DeadInsts, DominatorTree *DT);
+  static bool DoScalarReplacement(GlobalVariable *GV,
+                                  std::vector<Value *> &Elts,
+                                  IRBuilder<> &Builder, bool bFlatVector,
+                                  bool SupportsVectors, bool hasPrecise,
+                                  DxilTypeSystem &typeSys, const DataLayout &DL,
+                                  SmallVector<Value *, 32> &DeadInsts,
+                                  DominatorTree *DT);
   static unsigned GetEltAlign(unsigned ValueAlign, const DataLayout &DL,
                               Type *EltTy, unsigned Offset);
   // Lower memcpy related to V.
@@ -1714,6 +1716,7 @@ bool isGroupShareOrConstStaticArray(GlobalVariable *GV) {
 
 bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
   Module &M = *HLM.GetModule();
+  bool SupportsVectors = HLM.GetShaderModel()->IsSM69Plus();
   DxilTypeSystem &typeSys = HLM.GetTypeSystem();
 
   const DataLayout &DL = M.getDataLayout();
@@ -1878,7 +1881,8 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
         uint64_t NumInstances = 1;
         bool SROAed = SROA_Helper::DoScalarReplacement(
             AI, Elts, BrokenUpTy, NumInstances, Builder,
-            /*bFlatVector*/ true, hasPrecise, typeSys, DL, DeadInsts, &DT);
+            /*bFlatVector*/ true, SupportsVectors, hasPrecise, typeSys, DL,
+            DeadInsts, &DT);
 
         if (SROAed) {
           Type *Ty = AI->getAllocatedType();
@@ -1945,7 +1949,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
         continue;
       }
 
-      // Flat Global vector if no dynamic vector indexing.
+      // Flatten global vector if it has no dynamic vector indexing.
       bool bFlatVector = !hasDynamicVectorIndexing(GV);
 
       if (bFlatVector) {
@@ -1981,7 +1985,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
         // SROA_Parameter_HLSL has no access to a domtree, if one is needed,
         // it'll be generated
         SROAed = SROA_Helper::DoScalarReplacement(
-            GV, Elts, Builder, bFlatVector,
+            GV, Elts, Builder, bFlatVector, SupportsVectors,
             // TODO: set precise.
             /*hasPrecise*/ false, typeSys, DL, DeadInsts, /*DT*/ nullptr);
       }
@@ -2920,7 +2924,8 @@ static ArrayType *CreateNestArrayTy(Type *FinalEltTy,
 bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
                                       Type *&BrokenUpTy, uint64_t &NumInstances,
                                       IRBuilder<> &Builder, bool bFlatVector,
-                                      bool hasPrecise, DxilTypeSystem &typeSys,
+                                      bool SupportsVectors, bool hasPrecise,
+                                      DxilTypeSystem &typeSys,
                                       const DataLayout &DL,
                                       SmallVector<Value *, 32> &DeadInsts,
                                       DominatorTree *DT) {
@@ -3033,6 +3038,10 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
       if (!bFlatVector)
         return false;
 
+      // Skip vector where supported if it has more than 1 element.
+      if (SupportsVectors && ElTy->getVectorNumElements() > 1)
+        return false;
+
       // for array of vector
       // split into arrays of scalar
       VectorType *ElVT = cast<VectorType>(ElTy);
@@ -3114,13 +3123,11 @@ unsigned SROA_Helper::GetEltAlign(unsigned ValueAlign, const DataLayout &DL,
 
 /// DoScalarReplacement - Split V into AllocaInsts with Builder and save the new
 /// AllocaInsts into Elts. Then do SROA on V.
-bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV,
-                                      std::vector<Value *> &Elts,
-                                      IRBuilder<> &Builder, bool bFlatVector,
-                                      bool hasPrecise, DxilTypeSystem &typeSys,
-                                      const DataLayout &DL,
-                                      SmallVector<Value *, 32> &DeadInsts,
-                                      DominatorTree *DT) {
+bool SROA_Helper::DoScalarReplacement(
+    GlobalVariable *GV, std::vector<Value *> &Elts, IRBuilder<> &Builder,
+    bool bFlatVector, bool SupportsVectors, bool hasPrecise,
+    DxilTypeSystem &typeSys, const DataLayout &DL,
+    SmallVector<Value *, 32> &DeadInsts, DominatorTree *DT) {
   DEBUG(dbgs() << "Found inst to SROA: " << *GV << '\n');
   Type *Ty = GV->getType();
   // Skip none pointer types.
@@ -3134,6 +3141,9 @@ bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV,
   // Skip basic types.
   if (Ty->isSingleValueType() && !Ty->isVectorTy())
     return false;
+  // Skip vector where supported if it has more than 1 element.
+  if (Ty->isVectorTy() && SupportsVectors && Ty->getVectorNumElements() > 1)
+    return false;
   // Skip matrix types.
   if (HLMatrixType::isa(Ty))
     return false;
@@ -3240,6 +3250,10 @@ bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV,
       if (!bFlatVector)
         return false;
 
+      // Skip vector where supported if it has more than 1 element.
+      if (SupportsVectors && ElTy->getVectorNumElements() > 1)
+        return false;
+
       // for array of vector
       // split into arrays of scalar
       VectorType *ElVT = cast<VectorType>(ElTy);
@@ -5277,6 +5291,8 @@ void SROA_Parameter_HLSL::flattenArgument(
     std::vector<DxilParameterAnnotation> &FlatAnnotationList,
     BasicBlock *EntryBlock, ArrayRef<DbgDeclareInst *> DDIs) {
   std::deque<AnnotatedValue> WorkList;
+  bool SupportsVectors = m_pHLModule->GetShaderModel()->IsSM69Plus();
+
   WorkList.push_back({Arg, paramAnnotation});
 
   unsigned startArgIndex = FlatAnnotationList.size();
@@ -5351,8 +5367,8 @@ void SROA_Parameter_HLSL::flattenArgument(
       // DomTree isn't used by arguments
       SROAed = SROA_Helper::DoScalarReplacement(
           V, Elts, BrokenUpTy, NumInstances, Builder,
-          /*bFlatVector*/ false, annotation.IsPrecise(), dxilTypeSys, DL,
-          DeadInsts, /*DT*/ nullptr);
+          /*bFlatVector*/ false, SupportsVectors, annotation.IsPrecise(),
+          dxilTypeSys, DL, DeadInsts, /*DT*/ nullptr);
     }
 
     if (SROAed) {
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 729771c7c7..730354af99 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -14,6 +14,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "dxc/DXIL/DxilModule.h"
+#include "dxc/DXIL/DxilUtil.h"
+
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
@@ -151,6 +154,7 @@ class Scalarizer : public FunctionPass,
 
 // HLSL Change Begin
   bool AllowFolding = false;
+  bool SupportsVectors = false;
   Scalarizer(bool AllowFolding) :
     FunctionPass(ID),
     AllowFolding(AllowFolding) {
@@ -290,6 +294,13 @@ bool Scalarizer::doInitialization(Module &M) {
 }
 
 bool Scalarizer::runOnFunction(Function &F) {
+  // HLSL Change start - set SupportsVectors
+  const Module *M = F.getParent();
+  unsigned Major = 0, Minor = 0;
+  if (hlsl::dxilutil::LoadDxilVersion(M, Major, Minor))
+    SupportsVectors = (Major == 1 && Minor >= 9);
+  // HLSL Change end - set SupportsVectors
+
   for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
     BasicBlock *BB = BBI;
     for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
@@ -436,7 +447,8 @@ bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
 template<typename Splitter>
 bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
   VectorType *VT = dyn_cast<VectorType>(I.getType());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   unsigned NumElems = VT->getNumElements();
@@ -457,7 +469,8 @@ bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
 
 bool Scalarizer::visitSelectInst(SelectInst &SI) {
   VectorType *VT = dyn_cast<VectorType>(SI.getType());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   unsigned NumElems = VT->getNumElements();
@@ -500,7 +513,8 @@ bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) {
 
 bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
   VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   IRBuilder<> Builder(GEPI.getParent(), &GEPI);
@@ -534,7 +548,8 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
 
 bool Scalarizer::visitCastInst(CastInst &CI) {
   VectorType *VT = dyn_cast<VectorType>(CI.getDestTy());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   unsigned NumElems = VT->getNumElements();
@@ -559,6 +574,12 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
 
   unsigned DstNumElems = DstVT->getNumElements();
   unsigned SrcNumElems = SrcVT->getNumElements();
+
+  // HLSL Change Begin - allow > 1 vectors where supported.
+  if (SupportsVectors && (DstNumElems > 1 || SrcNumElems > 1))
+    return false;
+  // HLSL Change End - allow > 1 vectors where supported.
+
   IRBuilder<> Builder(BCI.getParent(), &BCI);
   Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
@@ -609,7 +630,8 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
 
 bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   VectorType *VT = dyn_cast<VectorType>(SVI.getType());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   unsigned NumElems = VT->getNumElements();
@@ -643,7 +665,8 @@ bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
 
 bool Scalarizer::visitPHINode(PHINode &PHI) {
   VectorType *VT = dyn_cast<VectorType>(PHI.getType());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   unsigned NumElems = VT->getNumElements();
@@ -679,6 +702,10 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) {
     return false;
 
   unsigned NumElems = Layout.VecTy->getNumElements();
+  // HLSL Change Begin - allow > 1 vectors where supported.
+  if (SupportsVectors && NumElems > 1)
+    return false;
+  // HLSL Change End - allow > 1 vectors where supported.
   IRBuilder<> Builder(LI.getParent(), &LI);
   Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
@@ -705,6 +732,10 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
     return false;
 
   unsigned NumElems = Layout.VecTy->getNumElements();
+  // HLSL Change Begin - allow > 1 vectors where supported.
+  if (SupportsVectors && NumElems > 1)
+    return false;
+  // HLSL Change End - allow > 1 vectors where supported.
   IRBuilder<> Builder(SI.getParent(), &SI);
   Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
diff --git a/tools/clang/lib/CodeGen/CGExprScalar.cpp b/tools/clang/lib/CodeGen/CGExprScalar.cpp
index 0cb993e6f4..530c791fcc 100644
--- a/tools/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/tools/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3713,20 +3713,7 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
       llvm::Value *CondV = CGF.EmitScalarExpr(condExpr);
       llvm::Value *LHS = Visit(lhsExpr);
       llvm::Value *RHS = Visit(rhsExpr);
-      if (llvm::VectorType *VT = dyn_cast<llvm::VectorType>(CondV->getType())) {
-        llvm::VectorType *ResultVT = cast<llvm::VectorType>(LHS->getType());
-        llvm::Value *result = llvm::UndefValue::get(ResultVT);
-        for (unsigned i = 0; i < VT->getNumElements(); i++) {
-          llvm::Value *EltCond = Builder.CreateExtractElement(CondV, i);
-          llvm::Value *EltL = Builder.CreateExtractElement(LHS, i);
-          llvm::Value *EltR = Builder.CreateExtractElement(RHS, i);
-          llvm::Value *EltSelect = Builder.CreateSelect(EltCond, EltL, EltR);
-          result = Builder.CreateInsertElement(result, EltSelect, i);
-        }
-        return result;
-      } else {
-        return Builder.CreateSelect(CondV, LHS, RHS);
-      }
+      return Builder.CreateSelect(CondV, LHS, RHS);
     }
     if (hlsl::IsHLSLMatType(E->getType())) {
       llvm::Value *Cond = CGF.EmitScalarExpr(condExpr);
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 40010b1596..243471bc55 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -6892,6 +6892,9 @@ bool HLSLExternalSource::MatchArguments(
     }
   }
 
+  std::string profile = m_sema->getLangOpts().HLSLProfile;
+  const ShaderModel *SM = hlsl::ShaderModel::GetByName(profile.c_str());
+
   // Populate argTypes.
   for (size_t i = 0; i <= Args.size(); i++) {
     const HLSL_INTRINSIC_ARGUMENT *pArgument = &pIntrinsic->pArgs[i];
@@ -7062,8 +7065,9 @@ bool HLSLExternalSource::MatchArguments(
       }
 
       // Verify that the final results are in bounds.
-      CAB(uCols > 0 && uCols <= MaxVectorSize && uRows > 0 &&
-              uRows <= MaxVectorSize,
+      CAB((uCols > 0 && uRows > 0 &&
+           ((uCols <= MaxVectorSize && uRows <= MaxVectorSize) ||
+            (SM->IsSM69Plus() && uRows == 1))),
           i);
 
       // Const
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl
new file mode 100644
index 0000000000..12955c87f9
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl
@@ -0,0 +1,464 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=2 %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=5 %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=3 %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=9 %s | FileCheck %s
+
+// Test relevant operators on an assortment bool vector sizes with 6.9 native vectors.
+// Bools have a different representation in memory and a smaller set of interesting ops.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// Uses non vector buffer to avoid interacting with that implementation.
+// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z_0-9]*]]
+RWStructuredBuffer< bool > buf;
+
+groupshared vector<bool, NUM> gs_vec1, gs_vec2;
+groupshared vector<bool, NUM+1> gs_vec3;
+
+
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout vector<bool, NUM> things[10], bool scales[10]) {
+
+  // Another trick to capture the size.
+  // CHECK: [[res:%[0-9]*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{[^,]*}}, i32 [[NUM:[0-9]*]]
+  // CHECK: [[scl:%[0-9]*]] = extractvalue %dx.types.ResRet.i32 [[res]], 0
+  // CHECK: [[bscl:%[0-9]*]] = icmp ne i32 [[scl]], 0
+  bool scalar = buf.Load(NUM);
+
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add9]]
+  // CHECK: [[bvec9:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec9]], zeroinitializer
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec9]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  things[0] = things[9];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i1> undef, i1 [[bscl]], i32 0
+  // CHECK: [[res:%[0-9]*]] = shufflevector <[[NUM]] x i1> [[spt]], <[[NUM]] x i1> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[res]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]]
+  things[5] = scalar;
+
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export vector<bool, NUM> arithmetic(inout vector<bool, NUM> things[10])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]]
+
+  // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[svec0:%[0-9]*]] = sext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32>
+  // CHECK: [[bsvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[svec0]], zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bsvec0]] to <[[NUM]] x i32>
+  res[0] = -things[0];
+
+  // CHECK: [[vec0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32>
+  // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32>
+  res[1] = +things[0];
+
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[vec1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec1]] to <[[NUM]] x i32>
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32>
+  // CHECK: [[res2:%[0-9]*]] = add nuw nsw <[[NUM]] x i32> [[vec2]], [[vec1]]
+  // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer
+  // CHECK: [[res2:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32>
+  // CHECK: [[res3:%[0-9]*]] = sub nsw <[[NUM]] x i32> [[vec2]], [[vec3]]
+  // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer
+  // CHECK: [[res3:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32>
+  // CHECK: [[res4:%[0-9]*]] = mul nuw nsw <[[NUM]] x i32> [[vec4]], [[vec3]]
+  // CHECK: [[bres4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res4]], zeroinitializer
+  // CHECK: [[res4:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32>
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32>
+  // CHECK: [[res5:%[0-9]*]] = sdiv <[[NUM]] x i32> [[vec4]], [[vec5]]
+  // CHECK: [[bres5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res5]], zeroinitializer
+  // CHECK: [[res5:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32>
+  res[5] = things[4] / things[5];
+
+  // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer
+  // CHECK: [[vec6:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec6]] to <[[NUM]] x i32>
+  // CHECK: [[res6:%[0-9]*]] = {{[ufs]?rem( fast)?}} <[[NUM]] x i32> [[vec5]], [[vec6]]
+  res[6] = things[5] % things[6];
+
+  // Stores into res[]. Previous were for things[] inout.
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2
+  // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3
+  // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]]
+  // CHECK: ret void
+
+
+  return res;
+}
+
+// Test arithmetic operators with scalars.
+// CHECK-LABEL: define void @"\01?scarithmetic
+export vector<bool, NUM> scarithmetic(inout vector<bool, NUM> things[10], bool scales[10])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]]
+
+  // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[vec0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32>
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 0
+  // CHECK: [[scl0:%[0-9]*]] = load i32, i32* [[add0]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl0]], i32 0
+  // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = add <[[NUM]] x i32> [[spt0]], [[vec0]]
+  // CHECK: [[bres0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res0]], zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bres0]] to <[[NUM]] x i32>
+  res[0] = things[0] + scales[0];
+
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[vec1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec1]] to <[[NUM]] x i32>
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 1
+  // CHECK: [[scl1:%[0-9]*]] = load i32, i32* [[add1]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl1]], i32 0
+  // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res1:%[0-9]*]] = sub <[[NUM]] x i32> [[vec1]], [[spt1]]
+  // CHECK: [[bres1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res1]], zeroinitializer
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  res[1] = things[1] - scales[1];
+
+
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32>
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 2
+  // CHECK: [[scl2:%[0-9]*]] = load i32, i32* [[add2]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl2]], i32 0
+  // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res2:%[0-9]*]] = mul nuw <[[NUM]] x i32> [[spt2]], [[vec2]]
+  // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  res[2] = things[2] * scales[2];
+
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32>
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 3
+  // CHECK: [[scl3:%[0-9]*]] = load i32, i32* [[add3]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl3]], i32 0
+  // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = sdiv <[[NUM]] x i32> [[vec3]], [[spt3]]
+  // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  res[3] = things[3] / scales[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 4
+  // CHECK: [[scl4:%[0-9]*]] = load i32, i32* [[add4]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl4]], i32 0
+  // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32>
+  // CHECK: [[res4:%[0-9]*]] = add <[[NUM]] x i32> [[spt4]], [[vec4]]
+  // CHECK: [[bres4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res4]], zeroinitializer
+  // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32>
+  res[4] = scales[4] + things[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 5
+  // CHECK: [[scl5:%[0-9]*]] = load i32, i32* [[add5]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl5]], i32 0
+  // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32>
+  // CHECK: [[res5:%[0-9]*]] = sub <[[NUM]] x i32> [[spt5]], [[vec5]]
+  // CHECK: [[bres5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res5]], zeroinitializer
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32>
+  res[5] = scales[5] - things[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 6
+  // CHECK: [[scl6:%[0-9]*]] = load i32, i32* [[add6]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl6]], i32 0
+  // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer
+  // CHECK: [[vec6:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec6]] to <[[NUM]] x i32>
+  // CHECK: [[res6:%[0-9]*]] = mul nuw <[[NUM]] x i32> [[spt6]], [[vec6]]
+  // CHECK: [[bres6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res6]], zeroinitializer
+  // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32>
+  res[6] = scales[6] * things[6];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2
+  // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3
+  // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6
+  // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]]
+  // CHECK: ret void
+
+
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<bool, NUM> consequences[10])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32>
+  res[0] = !truth[0];
+
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]]
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]]
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // MORE STUFF
+
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[bres4:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[bvec0]], [[bvec1]]
+  // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32>
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[bres5:%[0-9]*]] = icmp {{u?}}ne <[[NUM]] x i1> [[bvec1]], [[bvec2]]
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32>
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[bres6:%[0-9]*]] = icmp {{[osu]?}}lt <[[NUM]] x i1> [[bvec2]], [[bvec3]]
+  // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32>
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[bres7:%[0-9]*]] = icmp {{[osu]]?}}gt <[[NUM]] x i1> [[bvec3]], [[bvec4]]
+  // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[bres7]] to <[[NUM]] x i32>
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[bres8:%[0-9]*]] = icmp {{[osu]]?}}le <[[NUM]] x i1> [[bvec4]], [[bvec5]]
+  // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[bres8]] to <[[NUM]] x i32>
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]]
+  // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer
+  // CHECK: [[bres9:%[0-9]*]] = icmp {{[osu]?}}ge <[[NUM]] x i1> [[bvec5]], [[bvec6]]
+  // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[bres9]] to <[[NUM]] x i32>
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6
+  // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]]
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 7
+  // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[add7]]
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 8
+  // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[add8]]
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 9
+  // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[add9]]
+  // CHECK: ret void
+
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export vector<bool, NUM> index(vector<bool, NUM> things[10], int i, bool val)[10] {
+  vector<bool, NUM> res[10];
+
+  // CHECK: [[res:%[0-9]*]] = alloca [10 x <[[NUM]] x i32>]
+  // CHECK: [[res0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> zeroinitializer, <[[NUM]] x i32>* [[res0]]
+  res[0] = 0;
+
+  // CHECK: [[resi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 %i
+  // CHECK: store <[[NUM]] x i32> <i32 1{{.*}}>, <[[NUM]] x i32>* [[resi]]
+  res[i] = 1;
+
+  // CHECK: [[res2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 2
+  // CHECK: store <[[NUM]] x i32> <i32 1{{.*}}>, <[[NUM]] x i32>* [[res2]]
+  res[Ix] = true;
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0
+  // CHECK: [[thg0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[bthg0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thg0]], zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 3
+  // CHECK: [[thg0:%[0-9]*]] = zext <[[NUM]] x i1> [[bthg0]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[thg0]], <[[NUM]] x i32>* [[res3]]
+  res[3] = things[0];
+
+  // CHECK: [[addi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 %i
+  // CHECK: [[thgi:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[addi]]
+  // CHECK: [[bthgi:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thgi]], zeroinitializer
+  // CHECK: [[res4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 4
+  // CHECK: [[thgi:%[0-9]*]] = zext <[[NUM]] x i1> [[bthgi]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[thgi]], <[[NUM]] x i32>* [[res4]]
+  res[4] = things[i];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2
+  // CHECK: [[thg2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[bthg2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thg2]], zeroinitializer
+  // CHECK: [[res5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 5
+  // CHECK: [[thg2:%[0-9]*]] = zext <[[NUM]] x i1> [[bthg2]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[thg2]], <[[NUM]] x i32>* [[res5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+
+}
+
+// Test bit twiddling operators.
+// CHECK-LABEL: define void @"\01?bittwiddlers
+export void bittwiddlers(inout vector<bool, NUM> things[10]) {
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32>
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32>
+  // CHECK: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]]
+  // CHECK: [[bres1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res1]], zeroinitializer
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]]
+  things[1] = things[2] | things[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec4]], [[bvec3]]
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]]
+  things[2] = things[3] & things[4];
+
+  // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32>
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32>
+  // CHECK: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]]
+  things[3] = things[4] ^ things[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer
+  // CHECK: [[bres4:%[0-9]*]] = or <[[NUM]] x i1> [[bvec6]], [[bvec4]]
+  // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]]
+  things[4] |= things[6];
+
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]]
+  // CHECK: [[bvec7:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec7]], zeroinitializer
+  // CHECK: [[bres5:%[0-9]*]] = and <[[NUM]] x i1> [[bvec7]], [[bvec5]]
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]]
+  things[5] &= things[7];
+
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]]
+  // CHECK: [[bvec8:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec8]], zeroinitializer
+  // CHECK: [[bres6:%[0-9]*]] = xor <[[NUM]] x i1> [[bvec6]], [[bvec8]]
+  // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]]
+  things[6] ^= things[8];
+
+  // CHECK: ret void
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl
new file mode 100644
index 0000000000..b749a3b255
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl
@@ -0,0 +1,73 @@
+// RUN: %dxc -T lib_6_9   -DTYPE=uint     -DNUM=5 %s | FileCheck %s --check-prefixes=CHECK,UNSIG
+// RUN: %dxc -T lib_6_9   -DTYPE=int64_t  -DNUM=3 %s | FileCheck %s --check-prefixes=CHECK,SIG
+// RUN: %dxc -T lib_6_9   -DTYPE=uint16_t -DNUM=9 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,UNSIG
+
+// Test bitwise operators on an assortment vector sizes and integer types with 6.9 native vectors.
+
+// Test bit twiddling operators.
+// CHECK-LABEL: define void @"\01?bittwiddlers
+// CHECK-SAME: ([11 x <[[NUM:[0-9][0-9]*]] x [[TYPE:[a-z0-9]*]]>]*
+export void bittwiddlers(inout vector<TYPE, NUM> things[11]) {
+  // CHECK: [[adr1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // CHECK: [[res1:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec1]], <[[TYPE]] -1,
+  // CHECK: [[adr0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr0]]
+  things[0] = ~things[1];
+
+  // CHECK: [[adr2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+
+  // CHECK: [[adr3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr1]]
+  things[1] = things[2] | things[3];
+
+  // CHECK: [[adr4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]]
+  things[2] = things[3] & things[4];
+
+  // CHECK: [[adr5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // CHECK: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]]
+  things[3] = things[4] ^ things[5];
+
+  // CHECK: [[adr6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]]
+  // CHECK: [[shv6:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec6]], <[[TYPE]]
+  // CHECK: [[res4:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[vec5]], [[shv6]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]]
+  things[4] = things[5] << things[6];
+
+  // CHECK: [[adr7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]]
+  // CHECK: [[shv7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec7]], <[[TYPE]]
+  // UNSIG: [[res5:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]]
+  // SIG: [[res5:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]]
+  things[5] = things[6] >> things[7];
+
+  // CHECK: [[adr8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]]
+  // CHECK: [[res6:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec8]], [[vec6]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[adr6]]
+  things[6] |= things[8];
+
+  // CHECK: [[adr9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]]
+  // CHECK: [[res7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec9]], [[vec7]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[adr7]]
+  things[7] &= things[9];
+
+  // CHECK: [[adr10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10
+  // CHECK: [[vec10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr10]]
+  // CHECK: [[res8:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec8]], [[vec10]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[adr8]]
+  things[8] ^= things[10];
+
+  // CHECK: ret void
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl
new file mode 100644
index 0000000000..8b12b96c80
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl
@@ -0,0 +1,342 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float  %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int       %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint      %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double    -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int64_t   %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t  %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t   -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t  -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+
+// Test relevant operators on an assortment bool vector sizes and types with 6.9 native vectors.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z0-9_]*]]
+RWStructuredBuffer<TYPE> buf;
+
+export void assignments(inout TYPE things[10], TYPE scales[10]);
+export TYPE arithmetic(inout TYPE things[11])[11];
+export bool logic(bool truth[10], TYPE consequences[10])[10];
+export TYPE index(TYPE things[10], int i, TYPE val)[10];
+
+struct Interface {
+  TYPE assigned[10];
+  TYPE arithmeticked[11];
+  bool logicked[10];
+  TYPE indexed[10];
+  TYPE scales[10];
+};
+
+#if 0
+// Requires vector loading support. Enable when available.
+RWStructuredBuffer<Interface> Input;
+RWStructuredBuffer<Interface> Output;
+
+TYPE g_val;
+
+[shader("compute")]
+[numthreads(8,1,1)]
+void main(uint GI : SV_GroupIndex) {
+  assignments(Output[GI].assigned, Input[GI].scales);
+  Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked);
+  Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned);
+  Output[GI].indexed = index(Input[GI].indexed, GI, g_val);
+}
+#endif
+
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout TYPE things[10]) {
+
+  // CHECK: [[buf:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle {{%.*}}, i32 1, i32 0, i8 1, i32 {{(8|4|2)}})
+  // CHECK: [[res0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[buf]], 0
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]]
+  things[0] = buf.Load(1);
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[val5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[val1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[res1:%.*]] = [[ADD:f?add( fast| nsw)?]] [[TYPE]] [[val1]], [[val5]]
+  // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  things[1] += things[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 6
+  // CHECK: [[val6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[val2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast| nsw)?]] [[TYPE]] [[val2]], [[val6]]
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  things[2] -= things[6];
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[val7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[val3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast| nsw)?]] [[TYPE]] [[val3]], [[val7]]
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  things[3] *= things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[val8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[val4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast| nsw)?]] [[TYPE]] [[val4]], [[val8]]
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  things[4] /= things[8];
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 9
+  // CHECK: [[val9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+#ifdef DBL
+  // DBL: [[fvec9:%.*]] = fptrunc double [[val9]] to float
+  // DBL: [[fvec5:%.*]] = fptrunc double [[val5]] to float
+  // DBL: [[fres5:%.*]] = [[REM:[ufs]?rem( fast| nsw)?]] float [[fvec5]], [[fvec9]]
+  // DBL: [[res5:%.*]] = fpext float [[fres5]] to double
+  float f9 = things[9];
+  float f5 = things[5];
+  f5 %= f9;
+  things[5] = f5;
+#else
+  // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast| nsw)?]] [[TYPE]] [[val5]], [[val9]]
+  things[5] %= things[9];
+#endif
+  // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export TYPE arithmetic(inout TYPE things[11])[11] {
+  TYPE res[11];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[res0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[res1:%.*]] = [[SUB]] [[TYPE]] {{-?(0|0\.0*e\+0*|0xH8000)}}, [[res0]]
+  res[0] = +things[0];
+  res[1] = -things[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[val1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[val2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[ADD]] [[TYPE]] [[val2]], [[val1]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[val3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[SUB]] [[TYPE]] [[val2]], [[val3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[val4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[MUL]] [[TYPE]] [[val4]], [[val3]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[val5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[res5:%.*]] = [[DIV]] [[TYPE]] [[val4]], [[val5]]
+  res[5] = things[4] / things[5];
+
+  // DBL: [[fvec5:%.*]] = fptrunc double [[val5]] to float
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6
+  // CHECK: [[val6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+#ifdef DBL
+  // DBL: [[fvec6:%.*]] = fptrunc double [[val6]] to float
+  // DBL: [[fres6:%.*]] = [[REM]] float [[fvec5]], [[fvec6]]
+  // DBL: [[res6:%.*]] = fpext float [[fres6]] to double
+  res[6] = (float)things[5] % (float)things[6];
+#else
+  // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[val5]], [[val6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[val7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[res7:%.*]] = [[ADD:f?add( fast| nsw)?]] [[TYPE]] [[val7]], {{(1|1\.?0*e?\+?0*|0xH3C00)}}
+  // CHECK: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]]
+  res[7] = things[7]++;
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[val8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[res8:%.*]] = [[ADD]] [[TYPE]] [[val8]]
+  // CHECK: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]]
+  res[8] = things[8]--;
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9
+  // CHECK: [[val9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // CHECK: [[res9:%.*]] = [[ADD]] [[TYPE]] [[val9]]
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  res[9] = ++things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10
+  // CHECK: [[val10:%.*]] = load [[TYPE]], [[TYPE]]* [[adr10]]
+  // CHECK: [[res10:%.*]] = [[ADD]] [[TYPE]] [[val10]]
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  res[10] = --things[10];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 0
+  // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 1
+  // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 2
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 3
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 4
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 5
+  // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 6
+  // CHECK: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 7
+  // This is a post op, so the original value goes into res[].
+  // CHECK: store [[TYPE]] [[val7]], [[TYPE]]* [[adr7]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 8
+  // This is a post op, so the original value goes into res[].
+  // CHECK: store [[TYPE]] [[val8]], [[TYPE]]* [[adr8]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 9
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 10
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  // CHECK: ret void
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export bool logic(bool truth[10], TYPE consequences[10])[10] {
+  bool res[10];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
+  // CHECK: [[val0:%.*]] = load i32, i32* [[adr0]]
+  // CHECK: [[res0:%.*]] = xor i32 [[val0]], 1
+  res[0] = !truth[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  // CHECK: [[val1:%.*]] = load i32, i32* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  // CHECK: [[val2:%.*]] = load i32, i32* [[adr2]]
+  // CHECK: [[res1:%.*]] = or i32 [[val2]], [[val1]]
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[bvec2:%.*]] = icmp ne i32 [[val2]], 0
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  // CHECK: [[val3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[bvec3:%.*]] = icmp ne i32 [[val3]], 0
+  // CHECK: [[bres2:%.*]] = and i1 [[bvec2]], [[bvec3]]
+  // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  // CHECK: [[val4:%.*]] = load i32, i32* [[adr4]]
+  // CHECK: [[bvec4:%.*]] = icmp ne i32 [[val4]], 0
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  // CHECK: [[val5:%.*]] = load i32, i32* [[adr5]]
+  // CHECK: [[bvec5:%.*]] = icmp ne i32 [[val5]], 0
+  // CHECK: [[bres3:%.*]] = select i1 [[bvec3]], i1 [[bvec4]], i1 [[bvec5]]
+  // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 0
+  // CHECK: [[val0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1
+  // CHECK: [[val1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast| nsw)?]] {{o?}}eq [[TYPE]] [[val0]], [[val1]]
+  // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2
+  // CHECK: [[val2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[val1]], [[val2]]
+  // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3
+  // CHECK: [[val3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[val2]], [[val3]]
+  // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4
+  // CHECK: [[val4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[val3]], [[val4]]
+  // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5
+  // CHECK: [[val5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[val4]], [[val5]]
+  // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 6
+  // CHECK: [[val6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[val5]], [[val6]]
+  // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 0
+  // CHECK: store i32 [[res0]], i32* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 1
+  // CHECK: store i32 [[res1]], i32* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 2
+  // CHECK: store i32 [[res2]], i32* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 3
+  // CHECK: store i32 [[res3]], i32* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 4
+  // CHECK: store i32 [[res4]], i32* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 5
+  // CHECK: store i32 [[res5]], i32* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 6
+  // CHECK: store i32 [[res6]], i32* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 7
+  // CHECK: store i32 [[res7]], i32* [[adr7]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 8
+  // CHECK: store i32 [[res8]], i32* [[adr8]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 9
+  // CHECK: store i32 [[res9]], i32* [[adr9]]
+
+  // CHECK: ret void
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export TYPE index(TYPE things[10], int i)[10] {
+  // CHECK: [[res:%.*]] = alloca [10 x [[TYPE]]]
+  TYPE res[10];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 0
+  // CHECK: store [[TYPE]] {{(0|0*\.?0*e?\+?0*|0xH0000)}}, [[TYPE]]* [[adr0]]
+  res[0] = 0;
+
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 %i
+  // CHECK: store [[TYPE]] {{(1|1\.?0*e?\+?0*|0xH3C00)}}, [[TYPE]]* [[adri]]
+  res[i] = 1;
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 2
+  // CHECK: store [[TYPE]] {{(2|2\.?0*e?\+?0*|0xH4000)}}, [[TYPE]]* [[adr2]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[thg0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 3
+  // CHECK: store [[TYPE]] [[thg0]], [[TYPE]]* [[adr3]]
+  res[3] = things[0];
+
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 %i
+  // CHECK: [[thgi:%.*]] = load [[TYPE]], [[TYPE]]* [[adri]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 4
+  // CHECK: store [[TYPE]] [[thgi]], [[TYPE]]* [[adr4]]
+  res[4] = things[i];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[thg2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 5
+  // CHECK: store [[TYPE]] [[thg2]], [[TYPE]]* [[adr5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl
new file mode 100644
index 0000000000..cb2fd5f781
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl
@@ -0,0 +1,57 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T lib_6_9 %s | FileCheck %s --check-prefix=NOBR
+
+// Test that no short-circuiting takes place for logic ops with native vectors.
+// First run verifies that side effects result in stores.
+// Second runline just makes sure there are no branches nor phis at all.
+
+// NOBR-NOT: br i1
+// NOBR-NOT: = phi
+
+export int4 logic(inout bool4 truth[5], inout int4 consequences[4]) {
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load <4 x i32>, <4 x i32>* [[adr0]]
+  // CHECK: [[bvec0:%.*]] = icmp ne <4 x i32> [[vec0]], zeroinitializer
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <4 x i32>, <4 x i32>* [[adr1]]
+  // CHECK: [[add:%.*]] = add <4 x i32> [[vec1]], <i32 1, i32 1, i32 1, i32 1>
+  // CHECK: store <4 x i32> [[add]], <4 x i32>* [[adr1]]
+  // CHECK: [[bvec1:%.*]] = icmp ne <4 x i32> [[vec1]], zeroinitializer
+  // CHECK: [[bres3:%.*]] = or <4 x i1> [[bvec1]], [[bvec0]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[res3:%.*]] = zext <4 x i1> [[bres3]] to <4 x i32>
+  // CHECK: store <4 x i32> [[res3]], <4 x i32>* [[adr3]]
+  truth[3] = truth[0] || consequences[1]++;
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <4 x i32>, <4 x i32>* [[adr1]]
+  // CHECK: [[bvec1:%.*]] = icmp ne <4 x i32> [[vec1]], zeroinitializer
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load <4 x i32>, <4 x i32>* [[adr0]]
+  // CHECK: [[sub:%.*]] = add <4 x i32> [[vec0]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: store <4 x i32> [[sub]], <4 x i32>* [[adr0]]
+  // CHECK: [[bvec0:%.*]] = icmp ne <4 x i32> [[vec0]], zeroinitializer
+  // CHECK: [[bres4:%.*]] = and <4 x i1> [[bvec0]], [[bvec1]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 4
+  // CHECK: [[res4:%.*]] = zext <4 x i1> [[bres4]] to <4 x i32>
+  // CHECK: store <4 x i32> [[res4]], <4 x i32>* [[adr4]]
+  truth[4] = truth[1] && consequences[0]--;
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <4 x i32>, <4 x i32>* [[adr2]]
+  // CHECK: [[bcond:%.*]] = icmp ne <4 x i32> [[vec2]], zeroinitializer
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <4 x i32>, <4 x i32>* [[adr2]]
+  // CHECK: [[add:%.*]] = add <4 x i32> %25, <i32 1, i32 1, i32 1, i32 1>
+  // CHECK: store <4 x i32> [[add]], <4 x i32>* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <4 x i32>, <4 x i32>* [[adr3]]
+  // CHECK: [[sub:%.*]] = add <4 x i32> [[vec3]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: store <4 x i32> [[sub]], <4 x i32>* [[adr3]]
+  // CHECK: [[res:%.*]] = select <4 x i1> [[bcond]], <4 x i32> [[vec2]], <4 x i32> [[vec3]]
+  int4 res = truth[2] ? consequences[2]++ : consequences[3]--;
+
+  // CHECK: ret <4 x i32> %30
+  return res;
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
new file mode 100644
index 0000000000..c366261406
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
@@ -0,0 +1,479 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float1          %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int1      -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double1   -DDBL %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t1 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t1      -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t1  -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+
+// Test relevant operators on an assortment bool vector sizes and types with 6.9 native vectors.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[ELTY:[a-z0-9_]*]]
+// CHECK: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:.*]] }
+RWStructuredBuffer<TYPE> buf;
+
+export void assignments(inout TYPE things[10], TYPE scales[10]);
+export TYPE arithmetic(inout TYPE things[11])[11];
+export bool logic(bool truth[10], TYPE consequences[10])[10];
+export TYPE index(TYPE things[10], int i, TYPE val)[10];
+
+struct Interface {
+  TYPE assigned[10];
+  TYPE arithmeticked[11];
+  bool logicked[10];
+  TYPE indexed[10];
+  TYPE scales[10];
+};
+
+#if 0
+// Requires vector loading support. Enable when available.
+RWStructuredBuffer<Interface> Input;
+RWStructuredBuffer<Interface> Output;
+
+TYPE g_val;
+
+[shader("compute")]
+[numthreads(8,1,1)]
+void main(uint GI : SV_GroupIndex) {
+  assignments(Output[GI].assigned, Input[GI].scales);
+  Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked);
+  Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned);
+  Output[GI].indexed = index(Input[GI].indexed, GI, g_val);
+}
+#endif
+
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout TYPE things[10]) {
+
+  // CHECK: [[buf:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle {{%.*}}, i32 1, i32 0, i8 1, i32 {{8|4|2}})
+  // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[buf]], 0
+  // CHECK: [[res0:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[val0]], i64 0
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]]
+  things[0] = buf.Load(1);
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[val5:%.*]] = extractelement [[TYPE]] [[ld5]], i32 0
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[ld1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[val1:%.*]] = extractelement [[TYPE]] [[ld1]], i32 0
+  // CHECK: [[add1:%.*]] = [[ADD:f?add( fast)?]] [[ELTY]] [[val1]], [[val5]]
+  // CHECK: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add1]], i32 0
+  // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  things[1] += things[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[val6:%.*]] = extractelement [[TYPE]] [[ld6]], i32 0
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[val2:%.*]] = extractelement [[TYPE]] [[ld2]], i32 0
+  // CHECK: [[sub2:%.*]] = [[SUB:f?sub( fast)?]] [[ELTY]] [[val2]], [[val6]]
+  // CHECK: [[res2:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[sub2]], i32 0
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  things[2] -= things[6];
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[val7:%.*]] = extractelement [[TYPE]] [[ld7]], i32 0
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[val3:%.*]] = extractelement [[TYPE]] [[ld3]], i32 0
+  // CHECK: [[mul3:%.*]] = [[MUL:f?mul( fast)?]] [[ELTY]] [[val3]], [[val7]]
+  // CHECK: [[res3:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[mul3]], i32 0
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  things[3] *= things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[val8:%.*]] = extractelement [[TYPE]] [[ld8]], i32 0
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[val4:%.*]] = extractelement [[TYPE]] [[ld4]], i32 0
+  // CHECK: [[div4:%.*]] = [[DIV:[ufs]?div( fast)?]] [[ELTY]] [[val4]], [[val8]]
+  // CHECK: [[res4:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[div4]], i32 0
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  things[4] /= things[8];
+
+#ifndef DBL
+  // NODBL: [[adr9:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 9
+  // NODBL: [[ld9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // NODBL: [[val9:%.*]] = extractelement [[TYPE]] [[ld9]]
+  // NODBL: [[rem5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[ELTY]] [[val5]], [[val9]]
+  // NODBL: [[res5:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[rem5]], i32 0
+  // NODBL: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  things[5] %= things[9];
+#endif
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export TYPE arithmetic(inout TYPE things[11])[11] {
+  TYPE res[11];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[res0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[val0:%.*]] = extractelement [[TYPE]] [[res0]], i32 0
+  // CHECK: [[sub1:%.*]] = [[SUB]] [[ELTY]] {{-?(0|0\.?0*e?\+?0*|0xH8000)}}, [[val0]]
+  res[0] = +things[0];
+  res[1] = -things[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[ld1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[val1:%.*]] = extractelement [[TYPE]] [[ld1]], i32 0
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[val2:%.*]] = extractelement [[TYPE]] [[ld2]], i32 0
+  // CHECK: [[add2:%.*]] = [[ADD]] [[ELTY]] [[val2]], [[val1]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[val3:%.*]] = extractelement [[TYPE]] [[ld3]], i32 0
+  // CHECK: [[sub3:%.*]] = [[SUB]] [[ELTY]] [[val2]], [[val3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[val4:%.*]] = extractelement [[TYPE]] [[ld4]], i32 0
+  // CHECK: [[mul4:%.*]] = [[MUL]] [[ELTY]] [[val4]], [[val3]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[val5:%.*]] = extractelement [[TYPE]] [[ld5]], i32 0
+  // CHECK: [[div5:%.*]] = [[DIV]] [[ELTY]] [[val4]], [[val5]]
+  res[5] = things[4] / things[5];
+
+#ifndef DBL
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6
+  // NODBL: [[ld6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // NODBL: [[val6:%.*]] = extractelement [[TYPE]] [[ld6]]
+  // NODBL: [[rem6:%.*]] = [[REM]] [[ELTY]] [[val5]], [[val6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[val7:%.*]] = extractelement [[TYPE]] [[ld7]], i32 0
+  // CHECK: [[add7:%.*]] = [[ADD]] [[ELTY]] [[val7]], [[POS1:(1|1\.0*e\+0*|0xH3C00)]]
+  // CHECK: [[res7:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add7]], i32 0
+  // CHECK: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]]
+  res[7] = things[7]++;
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[val8:%.*]] = extractelement [[TYPE]] [[ld8]], i32 0
+  // CHECK: [[add8:%.*]] = [[ADD]] [[ELTY]] [[val8]], [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]]
+  // CHECK: [[res8:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add8]], i32 0
+  // CHECK: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]]
+  res[8] = things[8]--;
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9
+  // CHECK: [[ld9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // CHECK: [[val9:%.*]] = extractelement [[TYPE]] [[ld9]], i32 0
+  // CHECK: [[add9:%.*]] = [[ADD]] [[ELTY]] [[val9]], [[POS1]]
+  // CHECK: [[res9:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add9]], i32 0
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  res[9] = ++things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10
+  // CHECK: [[ld10:%.*]] = load [[TYPE]], [[TYPE]]* [[adr10]]
+  // CHECK: [[val10:%.*]] = extractelement [[TYPE]] [[ld10]], i32 0
+  // CHECK: [[add10:%.*]] = [[ADD]] [[ELTY]] [[val10]], [[NEG1]]
+  // CHECK: [[res10:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add10]], i32 0
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  res[10] = --things[10];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 0
+  // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 1
+  // CHECK: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[sub1]], i64 0
+  // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 2
+  // CHECK: [[res2:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add2]], i64 0
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 3
+  // CHECK: [[res3:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[sub3]], i64 0
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 4
+  // CHECK: [[res4:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[mul4]], i64 0
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 5
+  // CHECK: [[res5:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[div5]], i64 0
+  // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 6
+  // NODBL: [[res6:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[rem6]], i64 0
+  // NODBL: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 7
+  // This is a post op, so the original value goes into res[].
+  // CHECK: store [[TYPE]] [[ld7]], [[TYPE]]* [[adr7]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 8
+  // This is a post op, so the original value goes into res[].
+  // CHECK: store [[TYPE]] [[ld8]], [[TYPE]]* [[adr8]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 9
+  // CHECK: [[res9:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add9]], i64 0
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 10
+  // CHECK: [[res10:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add10]], i64 0
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  // CHECK: ret void
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export bool logic(bool truth[10], TYPE consequences[10])[10] {
+  bool res[10];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
+  // CHECK: [[val0:%.*]] = load i32, i32* [[adr0]]
+  // CHECK: [[res0:%.*]] = xor i32 [[val0]], 1
+  res[0] = !truth[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  // CHECK: [[val1:%.*]] = load i32, i32* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  // CHECK: [[val2:%.*]] = load i32, i32* [[adr2]]
+  // CHECK: [[res1:%.*]] = or i32 [[val2]], [[val1]]
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[bval2:%.*]] = icmp ne i32 [[val2]], 0
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  // CHECK: [[val3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[bval3:%.*]] = icmp ne i32 [[val3]], 0
+  // CHECK: [[bres2:%.*]] = and i1 [[bval2]], [[bval3]]
+  // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  // CHECK: [[val4:%.*]] = load i32, i32* [[adr4]]
+  // CHECK: [[bval4:%.*]] = icmp ne i32 [[val4]], 0
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  // CHECK: [[val5:%.*]] = load i32, i32* [[adr5]]
+  // CHECK: [[bval5:%.*]] = icmp ne i32 [[val5]], 0
+  // CHECK: [[bres3:%.*]] = select i1 [[bval3]], i1 [[bval4]], i1 [[bval5]]
+  // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 0
+  // CHECK: [[ld0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[val0:%.*]] = extractelement [[TYPE]] [[ld0]], i32 0
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1
+  // CHECK: [[ld1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[val1:%.*]] = extractelement [[TYPE]] [[ld1]], i32 0
+  // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[ELTY]] [[val0]], [[val1]]
+  // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[val2:%.*]] = extractelement [[TYPE]] [[ld2]], i32 0
+  // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[ELTY]] [[val1]], [[val2]]
+  // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[val3:%.*]] = extractelement [[TYPE]] [[ld3]], i32 0
+  // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[ELTY]] [[val2]], [[val3]]
+  // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[val4:%.*]] = extractelement [[TYPE]] [[ld4]], i32 0
+  // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[ELTY]] [[val3]], [[val4]]
+  // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[val5:%.*]] = extractelement [[TYPE]] [[ld5]], i32 0
+  // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[ELTY]] [[val4]], [[val5]]
+  // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[val6:%.*]] = extractelement [[TYPE]] [[ld6]], i32 0
+  // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[ELTY]] [[val5]], [[val6]]
+  // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 0
+  // CHECK: store i32 [[res0]], i32* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 1
+  // CHECK: store i32 [[res1]], i32* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 2
+  // CHECK: store i32 [[res2]], i32* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 3
+  // CHECK: store i32 [[res3]], i32* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 4
+  // CHECK: store i32 [[res4]], i32* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 5
+  // CHECK: store i32 [[res5]], i32* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 6
+  // CHECK: store i32 [[res6]], i32* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 7
+  // CHECK: store i32 [[res7]], i32* [[adr7]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 8
+  // CHECK: store i32 [[res8]], i32* [[adr8]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 9
+  // CHECK: store i32 [[res9]], i32* [[adr9]]
+
+  // CHECK: ret void
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export TYPE index(TYPE things[10], int i)[10] {
+  // CHECK: [[res:%.*]] = alloca [10 x [[ELTY]]]
+  TYPE res[10];
+
+  // CHECK: [[res0:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 0
+  // CHECK: store [[ELTY]] {{(0|0*\.?0*e?\+?0*|0xH0000)}}, [[ELTY]]* [[res0]]
+  res[0] = 0;
+
+  // CHECK: [[adri:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 %i
+  // CHECK: store [[ELTY]] [[POS1]], [[ELTY]]* [[adri]]
+  res[i] = 1;
+
+  // CHECK: [[adr2:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 2
+  // CHECK: store [[ELTY]] {{(2|2\.?0*e?\+?0*|0xH4000)}}, [[ELTY]]* [[adr2]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[ld0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr3:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 3
+  // CHECK: [[thg0:%.*]] = extractelement [[TYPE]] [[ld0]], i64 0
+  // CHECK: store [[ELTY]] [[thg0]], [[ELTY]]* [[adr3]]
+  res[3] = things[0];
+
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 %i
+  // CHECK: [[ldi:%.*]] = load [[TYPE]], [[TYPE]]* [[adri]]
+  // CHECK: [[adr4:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 4
+  // CHECK: [[thgi:%.*]] = extractelement [[TYPE]] [[ldi]], i64 0
+  // CHECK: store [[ELTY]] [[thgi]], [[ELTY]]* [[adr4]]
+  res[4] = things[i];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr5:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 5
+  // CHECK: [[thg2:%.*]] = extractelement [[TYPE]] [[ld2]], i64 0
+  // CHECK: store [[ELTY]] [[thg2]], [[ELTY]]* [[adr5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+}
+
+#ifdef INT
+// Test bit twiddling operators.
+// INT-LABEL: define void @"\01?bittwiddlers
+export void bittwiddlers(inout TYPE things[13]) {
+  // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 1
+  // INT: [[ld1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // INT: [[val1:%[0-9]*]] = extractelement [[TYPE]] [[ld1]], i32 0
+  // INT: [[xor1:%[0-9]*]] = xor [[ELTY]] [[val1]], -1
+  // INT: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor1]], i32 0
+  // INT: [[adr0:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 0
+  // INT: store [[TYPE]] [[res1]], [[TYPE]]* [[adr0]]
+  things[0] = ~things[1];
+
+  // INT: [[adr2:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 2
+  // INT: [[ld2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // INT: [[val2:%[0-9]*]] = extractelement [[TYPE]] [[ld2]], i32 0
+  // INT: [[adr3:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 3
+  // INT: [[ld3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // INT: [[val3:%[0-9]*]] = extractelement [[TYPE]] [[ld3]], i32 0
+  // INT: [[or1:%[0-9]*]] = or [[ELTY]] [[val3]], [[val2]]
+  // INT: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[or1]], i32 0
+  // INT: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  things[1] = things[2] | things[3];
+
+  // INT: [[adr4:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 4
+  // INT: [[ld4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // INT: [[val4:%[0-9]*]] = extractelement [[TYPE]] [[ld4]], i32 0
+  // INT: [[and2:%[0-9]*]] = and [[ELTY]] [[val4]], [[val3]]
+  // INT: [[res2:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[and2]], i32 0
+  // INT: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  things[2] = things[3] & things[4];
+
+  // INT: [[adr5:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 5
+  // INT: [[ld5:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // INT: [[val5:%[0-9]*]] = extractelement [[TYPE]] [[ld5]], i32 0
+  // INT: [[xor3:%[0-9]*]] = xor [[ELTY]] [[val5]], [[val4]]
+  // INT: [[res3:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor3]], i32 0
+  // INT: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  things[3] = things[4] ^ things[5];
+
+  // INT: [[adr6:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 6
+  // INT: [[ld6:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // INT: [[val6:%[0-9]*]] = extractelement [[TYPE]] [[ld6]], i32 0
+  // INT: [[shv6:%[0-9]*]] = and [[ELTY]] [[val6]]
+  // INT: [[shl4:%[0-9]*]] = shl [[ELTY]] [[val5]], [[shv6]]
+  // INT: [[res4:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shl4]], i32 0
+  // INT: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  things[4] = things[5] << things[6];
+
+  // INT: [[adr7:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 7
+  // INT: [[ld7:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // INT: [[val7:%[0-9]*]] = extractelement [[TYPE]] [[ld7]], i32 0
+  // INT: [[shv7:%[0-9]*]] = and [[ELTY]] [[val7]]
+  // UNSIG: [[shr5:%[0-9]*]] = lshr [[ELTY]] [[val6]], [[shv7]]
+  // SIG: [[shr5:%[0-9]*]] = ashr [[ELTY]] [[val6]], [[shv7]]
+  // INT: [[res5:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shr5]], i32 0
+  // INT: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  things[5] = things[6] >> things[7];
+
+  // INT: [[adr8:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 8
+  // INT: [[ld8:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // INT: [[val8:%[0-9]*]] = extractelement [[TYPE]] [[ld8]], i32 0
+  // INT: [[or6:%[0-9]*]] = or [[ELTY]] [[val8]], [[val6]]
+  // INT: [[res6:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[or6]], i32 0
+  // INT: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]]
+  things[6] |= things[8];
+
+  // INT: [[adr9:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 9
+  // INT: [[ld9:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // INT: [[val9:%[0-9]*]] = extractelement [[TYPE]] [[ld9]], i32 0
+  // INT: [[and7:%[0-9]*]] = and [[ELTY]] [[val9]], [[val7]]
+  // INT: [[res7:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[and7]], i32 0
+  // INT: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]]
+  things[7] &= things[9];
+
+  // INT: [[adr10:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 10
+  // INT: [[ld10:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr10]]
+  // INT: [[val10:%[0-9]*]] = extractelement [[TYPE]] [[ld10]], i32 0
+  // INT: [[xor8:%[0-9]*]] = xor [[ELTY]] [[val10]], [[val8]]
+  // INT: [[res8:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor8]], i32 0
+  // INT: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]]
+  things[8] ^= things[10];
+
+  // INT: [[adr11:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 11
+  // INT: [[ld11:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr11]]
+  // INT: [[val11:%[0-9]*]] = extractelement [[TYPE]] [[ld11]], i32 0
+  // INT: [[shv11:%[0-9]*]] = and [[ELTY]] [[val11]]
+  // INT: [[shl9:%[0-9]*]] = shl [[ELTY]] [[val9]], [[shv11]]
+  // INT: [[res9:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shl9]], i32 0
+  // INT: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  things[9] <<= things[11];
+
+  // INT: [[adr12:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 12
+  // INT: [[ld12:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr12]]
+  // INT: [[val12:%[0-9]*]] = extractelement [[TYPE]] [[ld12]], i32 0
+  // INT: [[shv12:%[0-9]*]] = and [[ELTY]] [[val12]]
+  // UNSIG: [[shr10:%[0-9]*]] = lshr [[ELTY]] [[val10]], [[shv12]]
+  // SIG: [[shr10:%[0-9]*]] = ashr [[ELTY]] [[val10]], [[shv12]]
+  // INT: [[res10:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shr10]], i32 0
+  // INT: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  things[10] >>= things[12];
+
+  // INT: ret void
+}
+#endif // INT
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
new file mode 100644
index 0000000000..ed7a2bff25
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
@@ -0,0 +1,581 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=3 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=4 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=5 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=6 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=7 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=8 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=9 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=10 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=11 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=12 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=13 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=14 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=15 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=16 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=18 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=128 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+
+// Less exhaustive testing for some other types.
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int      -DNUM=2 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint     -DNUM=5 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double   -DNUM=3 -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DNUM=9 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -DNUM=17 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t   -DNUM=177 -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+
+// Test relevant operators on an assortment vector sizes and types with 6.9 native vectors.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// Uses non vector buffer to avoid interacting with that implementation.
+// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z_0-9]*]]
+
+RWStructuredBuffer< TYPE > buf;
+
+export void assignments(inout vector<TYPE, NUM> things[10], TYPE scales[10]);
+export vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11];
+export vector<TYPE, NUM> scarithmetic(inout vector<TYPE, NUM> things[10], TYPE scales[10])[10];
+export vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> consequences[10])[10];
+export vector<TYPE, NUM> index(vector<TYPE, NUM> things[10], int i, TYPE val)[10];
+
+struct Interface {
+  vector<TYPE, NUM> assigned[10];
+  vector<TYPE, NUM> arithmeticked[11];
+  vector<TYPE, NUM> scarithmeticked[10];
+  vector<bool, NUM> logicked[10];
+  vector<TYPE, NUM> indexed[10];
+  TYPE scales[10];
+};
+
+#if 0
+// Requires vector loading support. Enable when available.
+RWStructuredBuffer<Interface> Input;
+RWStructuredBuffer<Interface> Output;
+
+TYPE g_val;
+
+[shader("compute")]
+[numthreads(8,1,1)]
+void main(uint GI : SV_GroupIndex) {
+  assignments(Output[GI].assigned, Input[GI].scales);
+  Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked);
+  Output[GI].scarithmeticked = scarithmetic(Input[GI].scarithmeticked, Input[GI].scales);
+  Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned);
+  Output[GI].indexed = index(Input[GI].indexed, GI, g_val);
+}
+#endif
+
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout vector<TYPE, NUM> things[10], TYPE scales[10]) {
+
+  // Another trick to capture the size.
+  // CHECK: [[res:%[0-9]*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle %{{[^,]*}}, i32 [[NUM:[0-9]*]]
+  // CHECK: [[scl:%[0-9]*]] = extractvalue %dx.types.ResRet.[[TY]] [[res]], 0
+  TYPE scalar = buf.Load(NUM);
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl]], i32 0
+  // CHECK: [[res0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]]
+  things[0] = scalar;
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[res1:%[0-9]*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec5]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]]
+  things[1] += things[5];
+
+   // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[res2:%[0-9]*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]]
+  things[2] -= things[6];
+
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[res3:%[0-9]*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec7]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]]
+  things[3] *= things[7];
+
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[res4:%[0-9]*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]]
+  things[4] /= things[8];
+
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add9]]
+#ifdef DBL
+  // DBL can't use remainder operator, do something anyway to keep the rest consistent.
+  // DBL: [[fvec9:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec9]] to <[[NUM]] x float>
+  // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float>
+  // DBL: [[fres5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x float> [[fvec5]], [[fvec9]]
+  // DBL: [[res5:%[0-9]*]] = fpext <[[NUM]] x float> [[fres5]] to <[[NUM]] x double>
+  vector<float,NUM> f9 = things[9];
+  vector<float,NUM> f5 = things[5];
+  f5 %= f9;
+  things[5] = f5;
+#else
+  // NODBL: [[res5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]]
+  things[5] %= things[9];
+#endif
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]]
+
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 1
+  // CHECK: [[scl1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add1]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0
+  // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res6:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt1]], [[vec6]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]]
+  things[6] += scales[1];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 2
+  // CHECK: [[scl2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add2]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0
+  // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res7:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec7]], [[spt2]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[add7]]
+  things[7] -= scales[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 3
+  // CHECK: [[scl3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add3]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0
+  // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res8:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt3]], [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[add8]]
+  things[8] *= scales[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 4
+  // CHECK: [[scl4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add4]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0
+  // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res9:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec9]], [[spt4]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]]
+  things[9] /= scales[4];
+
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11] {
+  vector<TYPE, NUM> res[11];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: [[res1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]>
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]]
+  res[0] = -things[0];
+  res[1] = +things[0];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[res2:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec1]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[res3:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[res4:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[res5:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  res[5] = things[4] / things[5];
+
+  // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float>
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+#ifdef DBL
+  // DBL can't use remainder operator, do something anyway to keep the rest consistent.
+  // DBL: [[fvec6:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec6]] to <[[NUM]] x float>
+  // DBL: [[fres6:%[0-9]*]] = [[REM]] <[[NUM]] x float> [[fvec5]], [[fvec6]]
+  // DBL: [[res6:%[0-9]*]] = fpext <[[NUM]] x float> [[fres6]] to <[[NUM]] x double>
+  res[6] = (vector<float,NUM>)things[5] % (vector<float,NUM>)things[6];
+#else
+  // NODBL: [[res6:%[0-9]*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]]
+  // CHECK: [[res7:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[add7]]
+  res[7] = things[7]++;
+
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]]
+  // CHECK: [[res8:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[add8]]
+  res[8] = things[8]--;
+
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add9]]
+  // CHECK: [[res9:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]]
+  res[9] = ++things[9];
+
+  // CHECK: [[add10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10
+  // CHECK: [[vec10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add10]]
+  // CHECK: [[res10:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[add10]]
+  res[10] = --things[10];
+
+  // Stores into res[]. Previous were for things[] inout.
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 1
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 6
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]]
+  // These two were post ops, so the original value goes into res[].
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 7
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[add7]]
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 8
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[add8]]
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 9
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]]
+  // CHECK: [[add10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 10
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[add10]]
+  // CHECK: ret void
+
+
+  return res;
+}
+
+// Test arithmetic operators with scalars.
+// CHECK-LABEL: define void @"\01?scarithmetic
+export vector<TYPE, NUM> scarithmetic(inout vector<TYPE, NUM> things[10], TYPE scales[10])[10] {
+  vector<TYPE, NUM> res[10];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 0
+  // CHECK: [[scl0:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add0]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0
+  // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt0]], [[vec0]]
+  res[0] = things[0] + scales[0];
+
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 1
+  // CHECK: [[scl1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add1]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0
+  // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res1:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec1]], [[spt1]]
+  res[1] = things[1] - scales[1];
+
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 2
+  // CHECK: [[scl2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add2]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0
+  // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res2:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt2]], [[vec2]]
+  res[2] = things[2] * scales[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 3
+  // CHECK: [[scl3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add3]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0
+  // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec3]], [[spt3]]
+  res[3] = things[3] / scales[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 4
+  // CHECK: [[scl4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add4]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0
+  // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res4:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt4]], [[vec4]]
+  res[4] = scales[4] + things[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 5
+  // CHECK: [[scl5:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add5]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl5]], i32 0
+  // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res5:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[spt5]], [[vec5]]
+  res[5] = scales[5] - things[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 6
+  // CHECK: [[scl6:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add6]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl6]], i32 0
+  // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res6:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt6]], [[vec6]]
+  res[6] = scales[6] * things[6];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 1
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 6
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: ret void
+
+
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> consequences[10])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32>
+  res[0] = !truth[0];
+
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]]
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]]
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[bres3:%[0-9]*]] = select <[[NUM]] x i1> [[bvec3]], <[[NUM]] x i1> [[bvec4]], <[[NUM]] x i1> [[bvec5]]
+  // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[cmp4:%[0-9]*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]]
+  // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp4]] to <[[NUM]] x i32>
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[cmp5:%[0-9]*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]]
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp5]] to <[[NUM]] x i32>
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[cmp6:%[0-9]*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp6]] to <[[NUM]] x i32>
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[cmp7:%[0-9]*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]]
+  // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp7]] to <[[NUM]] x i32>
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[cmp8:%[0-9]*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp8]] to <[[NUM]] x i32>
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: [[cmp9:%[0-9]*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp9]] to <[[NUM]] x i32>
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2
+  // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3
+  // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6
+  // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]]
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 7
+  // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[add7]]
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 8
+  // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[add8]]
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 9
+  // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[add9]]
+  // CHECK: ret void
+
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export vector<TYPE, NUM> index(vector<TYPE, NUM> things[10], int i, TYPE val)[10] {
+  vector<TYPE, NUM> res[10];
+
+  // CHECK: [[res:%[0-9]*]] = alloca [10 x <[[NUM]] x [[TYPE]]>]
+  // CHECK: [[res0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[res0]]
+  res[0] = 0;
+
+  // CHECK: [[resi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 %i
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(1|0xH3C00).*}}>, <[[NUM]] x [[TYPE]]>* [[resi]]
+  res[i] = 1;
+
+  // CHECK: [[res2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(2|0xH4000).*}}>, <[[NUM]] x [[TYPE]]>* [[res2]]
+  res[Ix] = 2;
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: [[thg0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[res3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thg0]], <[[NUM]] x [[TYPE]]>* [[res3]]
+  res[3] = things[0];
+
+  // CHECK: [[addi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 %i
+  // CHECK: [[thgi:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[addi]]
+  // CHECK: [[res4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thgi]], <[[NUM]] x [[TYPE]]>* [[res4]]
+  res[4] = things[i];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[thg2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[res5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thg2]], <[[NUM]] x [[TYPE]]>* [[res5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+}
+
+#ifdef INT
+// Test bit twiddling operators.
+// INT-LABEL: define void @"\01?bittwiddlers
+export void bittwiddlers(inout vector<TYPE, NUM> things[13]) {
+  // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // INT: [[ld1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // INT: [[res1:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[ld1]], <[[TYPE]] -1
+  // INT: [[adr0:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // INT: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr0]]
+  things[0] = ~things[1];
+
+  // INT: [[adr2:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // INT: [[ld2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // INT: [[adr3:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // INT: [[ld3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // INT: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[ld3]], [[ld2]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr1]]
+  things[1] = things[2] | things[3];
+
+  // INT: [[adr4:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // INT: [[ld4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // INT: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld4]], [[ld3]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]]
+  things[2] = things[3] & things[4];
+
+  // INT: [[adr5:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // INT: [[ld5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // INT: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[ld4]], [[ld5]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]]
+  things[3] = things[4] ^ things[5];
+
+  // INT: [[adr6:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // INT: [[ld6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]]
+  // INT: [[shv6:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld6]]
+  // INT: [[res4:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[ld5]], [[shv6]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]]
+  things[4] = things[5] << things[6];
+
+  // INT: [[adr7:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // INT: [[ld7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]]
+  // INT: [[shv7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld7]]
+  // UNSIG: [[res5:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[ld6]], [[shv7]]
+  // SIG: [[res5:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[ld6]], [[shv7]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]]
+  things[5] = things[6] >> things[7];
+
+  // INT: [[adr8:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // INT: [[ld8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]]
+  // INT: [[res6:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[ld8]], [[ld6]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[adr6]]
+  things[6] |= things[8];
+
+  // INT: [[adr9:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // INT: [[ld9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]]
+  // INT: [[res7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld9]], [[ld7]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[adr7]]
+  things[7] &= things[9];
+
+  // INT: [[adr10:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10
+  // INT: [[ld10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr10]]
+  // INT: [[res8:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[ld8]], [[ld10]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[adr8]]
+  things[8] ^= things[10];
+
+  // INT: [[adr11:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 11
+  // INT: [[ld11:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr11]]
+  // INT: [[shv11:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld11]]
+  // INT: [[res9:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[ld9]], [[shv11]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[adr9]]
+  things[9] <<= things[11];
+
+  // INT: [[adr12:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 12
+  // INT: [[ld12:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr12]]
+  // INT: [[shv12:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld12]]
+  // UNSIG: [[res10:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[ld10]], [[shv12]]
+  // SIG: [[res10:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[ld10]], [[shv12]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[adr10]]
+  things[10] >>= things[12];
+
+  // INT: ret void
+}
+#endif // INT
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll
new file mode 100644
index 0000000000..987f997a2a
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll
@@ -0,0 +1,269 @@
+; RUN: %dxopt %s -dynamic-vector-to-array,ReplaceAllVectors=0 -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%struct.VectRec1 = type { <1 x float> }
+%struct.VectRec2 = type { <2 x float> }
+
+; Vec2s should be preserved.
+; CHECK-DAG: @dyglob2 = internal global <2 x float> zeroinitializer, align 4
+; CHECK-DAG: @dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+; CHECK-DAG: @dygrec2.0 = internal global <2 x float> zeroinitializer, align 4
+
+; CHECK-DAG: @stgrec2.0 = internal global <2 x float> zeroinitializer, align 4
+; CHECK-DAG: @stglob2 = internal global <2 x float> zeroinitializer, align 4
+; CHECK-DAG: @stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+
+; Dynamic Vec1s should be reduced.
+; CHECK-DAG: @dygar1.v = internal global [2 x [1 x float]] zeroinitializer, align 4
+; CHECK-DAG: @dygrec1.0.v = internal global [1 x float] zeroinitializer, align 4
+; CHECK-DAG: @dyglob1.v = internal global [1 x float] zeroinitializer, align 4
+
+; These static accessed Vec1s were already reduced by SROA
+; CHECK-DAG: @stgar1.0 = internal global [2 x float] zeroinitializer, align 4
+; CHECK-DAG: @stglob1.0 = internal global float 0.000000e+00, align 4
+; CHECK-DAG: @stgrec1.0.0 = internal global float 0.000000e+00, align 4
+
+@dyglob1 = internal global <1 x float> zeroinitializer, align 4
+@dyglob2 = internal global <2 x float> zeroinitializer, align 4
+@stglob2 = internal global <2 x float> zeroinitializer, align 4
+@dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+@dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+@stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+@dygrec2.0 = internal global <2 x float> zeroinitializer, align 4
+@stgrec2.0 = internal global <2 x float> zeroinitializer, align 4
+@stgar1.0 = internal global [2 x float] zeroinitializer, align 4
+@dygrec1.0 = internal global <1 x float> zeroinitializer, align 4
+@stglob1.0 = internal global float 0.000000e+00, align 4
+@stgrec1.0.0 = internal global float 0.000000e+00, align 4
+
+; Function Attrs: nounwind
+; CHECK-LOCAL: define <4 x float> @"\01?tester
+define <4 x float> @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z"(i32 %ix, [12 x float]* %vals) #0 {
+bb:
+  ; Vec2s are preserved.
+  ; CHECK-DAG: %dyloc2 = alloca <2 x float>
+  ; CHECK-DAG: %dylar2 = alloca [4 x <2 x float>]
+  ; CHECK-DAG: %dylorc2.0 = alloca <2 x float>
+
+  ; CHECK-DAG: %stloc2 = alloca <2 x float>
+  ; CHECK-DAG: %stlar2 = alloca [4 x <2 x float>]
+  ; CHECK-DAG: %stlorc2.0 = alloca <2 x float>
+
+  ; Statics vec1s are unaltered by dynamic vector to array.
+  ; CHECK-DAG: %stloc1 = alloca <1 x float>
+  ; CHECK-DAG: %stlar1.0 = alloca [3 x float]
+  ; CHECK-DAG: %stlorc1.0 = alloca <1 x float>
+
+  ; Dynamic vec1s are removed and lose their names.
+  ; CHECK-DAG: alloca [1 x float]
+  ; CHECK-DAG: alloca [3 x [1 x float]]
+  ; CHECK-DAG: alloca [1 x float]
+
+  %dylorc1.0 = alloca <1 x float>
+  %stlorc1.0 = alloca <1 x float>
+  %dylorc2.0 = alloca <2 x float>
+  %stlorc2.0 = alloca <2 x float>
+  %stlar1.0 = alloca [3 x float]
+  %tmp = alloca i32, align 4
+  %dyloc1 = alloca <1 x float>, align 4
+  %dyloc2 = alloca <2 x float>, align 4
+  %dylar1 = alloca [3 x <1 x float>], align 4
+  %dylar2 = alloca [4 x <2 x float>], align 4
+  %stloc1 = alloca <1 x float>, align 4
+  %stloc2 = alloca <2 x float>, align 4
+  %stlar2 = alloca [4 x <2 x float>], align 4
+  store i32 %ix, i32* %tmp, align 4
+
+  %tmp13 = load i32, i32* %tmp, align 4 ; line:53 col:7
+  %tmp14 = icmp sgt i32 %tmp13, 0 ; line:53 col:10
+  %tmp15 = icmp ne i1 %tmp14, false ; line:53 col:10
+  %tmp16 = icmp ne i1 %tmp15, false ; line:53 col:10
+  br i1 %tmp16, label %bb17, label %bb76 ; line:53 col:7
+
+bb17:                                             ; preds = %bb
+  %tmp18 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 0 ; line:54 col:30
+  %tmp19 = load float, float* %tmp18, align 4 ; line:54 col:30
+  %tmp20 = load i32, i32* %tmp, align 4 ; line:54 col:24
+  %tmp21 = getelementptr <1 x float>, <1 x float>* %dyloc1, i32 0, i32 %tmp20 ; line:54 col:17
+  store float %tmp19, float* %tmp21 ; line:54 col:28
+  %tmp22 = getelementptr <1 x float>, <1 x float>* %stloc1, i32 0, i32 0 ; line:54 col:5
+  store float %tmp19, float* %tmp22 ; line:54 col:15
+  %tmp23 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 1 ; line:55 col:30
+  %tmp24 = load float, float* %tmp23, align 4 ; line:55 col:30
+  %tmp25 = load i32, i32* %tmp, align 4 ; line:55 col:24
+  %tmp26 = getelementptr <2 x float>, <2 x float>* %dyloc2, i32 0, i32 %tmp25 ; line:55 col:17
+  store float %tmp24, float* %tmp26 ; line:55 col:28
+  %tmp27 = getelementptr <2 x float>, <2 x float>* %stloc2, i32 0, i32 1 ; line:55 col:5
+  store float %tmp24, float* %tmp27 ; line:55 col:15
+  %tmp28 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 2 ; line:56 col:37
+  %tmp29 = load float, float* %tmp28, align 4 ; line:56 col:37
+  %tmp30 = load i32, i32* %tmp, align 4 ; line:56 col:27
+  %tmp31 = load i32, i32* %tmp, align 4 ; line:56 col:31
+  %tmp32 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp30, i32 %tmp31 ; line:56 col:20
+  store float %tmp29, float* %tmp32 ; line:56 col:35
+  %tmp33 = getelementptr inbounds [3 x float], [3 x float]* %stlar1.0, i32 0, i32 1 ; line:56 col:5
+  store float %tmp29, float* %tmp33 ; line:56 col:18
+  %tmp34 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 3 ; line:57 col:37
+  %tmp35 = load float, float* %tmp34, align 4 ; line:57 col:37
+  %tmp36 = load i32, i32* %tmp, align 4 ; line:57 col:27
+  %tmp37 = load i32, i32* %tmp, align 4 ; line:57 col:31
+  %tmp38 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp36, i32 %tmp37 ; line:57 col:20
+  store float %tmp35, float* %tmp38 ; line:57 col:35
+  %tmp39 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 1, i32 0 ; line:57 col:5
+  store float %tmp35, float* %tmp39 ; line:57 col:18
+  %tmp40 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 4 ; line:58 col:36
+  %tmp41 = load float, float* %tmp40, align 4 ; line:58 col:36
+  %tmp42 = load i32, i32* %tmp, align 4 ; line:58 col:30
+  %tmp43 = getelementptr inbounds <1 x float>, <1 x float>* %dylorc1.0, i32 0, i32 %tmp42 ; line:58 col:20
+  store float %tmp41, float* %tmp43 ; line:58 col:34
+  %tmp44 = getelementptr inbounds <1 x float>, <1 x float>* %stlorc1.0, i32 0, i32 0 ; line:58 col:5
+  store float %tmp41, float* %tmp44 ; line:58 col:18
+  %tmp45 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 5 ; line:59 col:36
+  %tmp46 = load float, float* %tmp45, align 4 ; line:59 col:36
+  %tmp47 = load i32, i32* %tmp, align 4 ; line:59 col:30
+  %tmp48 = getelementptr inbounds <2 x float>, <2 x float>* %dylorc2.0, i32 0, i32 %tmp47 ; line:59 col:20
+  store float %tmp46, float* %tmp48 ; line:59 col:34
+  %tmp49 = getelementptr inbounds <2 x float>, <2 x float>* %stlorc2.0, i32 0, i32 1 ; line:59 col:5
+  store float %tmp46, float* %tmp49 ; line:59 col:18
+  %tmp50 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 6 ; line:61 col:32
+  %tmp51 = load float, float* %tmp50, align 4 ; line:61 col:32
+  %tmp52 = load i32, i32* %tmp, align 4 ; line:61 col:26
+  %tmp53 = getelementptr <1 x float>, <1 x float>* @dyglob1, i32 0, i32 %tmp52 ; line:61 col:18
+  store float %tmp51, float* %tmp53 ; line:61 col:30
+  store float %tmp51, float* @stglob1.0 ; line:61 col:16
+  %tmp54 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 7 ; line:62 col:32
+  %tmp55 = load float, float* %tmp54, align 4 ; line:62 col:32
+  %tmp56 = load i32, i32* %tmp, align 4 ; line:62 col:26
+  %tmp57 = getelementptr <2 x float>, <2 x float>* @dyglob2, i32 0, i32 %tmp56 ; line:62 col:18
+  store float %tmp55, float* %tmp57 ; line:62 col:30
+  store float %tmp55, float* getelementptr inbounds (<2 x float>, <2 x float>* @stglob2, i32 0, i32 1) ; line:62 col:16
+  %tmp58 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 8 ; line:63 col:37
+  %tmp59 = load float, float* %tmp58, align 4 ; line:63 col:37
+  %tmp60 = load i32, i32* %tmp, align 4 ; line:63 col:27
+  %tmp61 = load i32, i32* %tmp, align 4 ; line:63 col:31
+  %tmp62 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp60, i32 %tmp61 ; line:63 col:20
+  store float %tmp59, float* %tmp62 ; line:63 col:35
+  store float %tmp59, float* getelementptr inbounds ([2 x float], [2 x float]* @stgar1.0, i32 0, i32 1) ; line:63 col:18
+  %tmp63 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 9 ; line:64 col:37
+  %tmp64 = load float, float* %tmp63, align 4 ; line:64 col:37
+  %tmp65 = load i32, i32* %tmp, align 4 ; line:64 col:27
+  %tmp66 = load i32, i32* %tmp, align 4 ; line:64 col:31
+  %tmp67 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp65, i32 %tmp66 ; line:64 col:20
+  store float %tmp64, float* %tmp67 ; line:64 col:35
+  store float %tmp64, float* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 1, i32 1) ; line:64 col:18
+  %tmp68 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 10 ; line:65 col:36
+  %tmp69 = load float, float* %tmp68, align 4 ; line:65 col:36
+  %tmp70 = load i32, i32* %tmp, align 4 ; line:65 col:30
+  %tmp71 = getelementptr inbounds <1 x float>, <1 x float>* @dygrec1.0, i32 0, i32 %tmp70 ; line:65 col:20
+  store float %tmp69, float* %tmp71 ; line:65 col:34
+  store float %tmp69, float* @stgrec1.0.0 ; line:65 col:18
+  %tmp72 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 11 ; line:66 col:36
+  %tmp73 = load float, float* %tmp72, align 4 ; line:66 col:36
+  %tmp74 = load i32, i32* %tmp, align 4 ; line:66 col:30
+  %tmp75 = getelementptr inbounds <2 x float>, <2 x float>* @dygrec2.0, i32 0, i32 %tmp74 ; line:66 col:20
+  store float %tmp73, float* %tmp75 ; line:66 col:34
+  store float %tmp73, float* getelementptr inbounds (<2 x float>, <2 x float>* @stgrec2.0, i32 0, i32 1) ; line:66 col:18
+  br label %bb76 ; line:67 col:3
+
+bb76:                                             ; preds = %bb17, %bb
+  %tmp77 = load <1 x float>, <1 x float>* %dyloc1, align 4 ; line:68 col:17
+  %tmp78 = extractelement <1 x float> %tmp77, i32 0 ; line:68 col:17
+  %tmp79 = load <2 x float>, <2 x float>* %dyloc2, align 4 ; line:68 col:27
+  %tmp80 = extractelement <2 x float> %tmp79, i32 1 ; line:68 col:27
+  %tmp81 = load <1 x float>, <1 x float>* %stloc1, align 4 ; line:68 col:37
+  %tmp82 = extractelement <1 x float> %tmp81, i32 0 ; line:68 col:37
+  %tmp83 = load <2 x float>, <2 x float>* %stloc2, align 4 ; line:68 col:47
+  %tmp84 = extractelement <2 x float> %tmp83, i32 1 ; line:68 col:47
+  %tmp85 = insertelement <4 x float> undef, float %tmp78, i64 0 ; line:68 col:16
+  %tmp86 = insertelement <4 x float> %tmp85, float %tmp80, i64 1 ; line:68 col:16
+  %tmp87 = insertelement <4 x float> %tmp86, float %tmp82, i64 2 ; line:68 col:16
+  %tmp88 = insertelement <4 x float> %tmp87, float %tmp84, i64 3 ; line:68 col:16
+  %tmp89 = load i32, i32* %tmp, align 4 ; line:68 col:73
+  %tmp90 = load i32, i32* %tmp, align 4 ; line:68 col:77
+  %tmp91 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp89, i32 %tmp90 ; line:68 col:66
+  %tmp92 = load float, float* %tmp91 ; line:68 col:66
+  %tmp93 = load i32, i32* %tmp, align 4 ; line:68 col:89
+  %tmp94 = load i32, i32* %tmp, align 4 ; line:68 col:93
+  %tmp95 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp93, i32 %tmp94 ; line:68 col:82
+  %tmp96 = load float, float* %tmp95 ; line:68 col:82
+  %tmp97 = getelementptr [3 x float], [3 x float]* %stlar1.0, i32 0, i32 0 ; line:68 col:98
+  %load = load float, float* %tmp97 ; line:68 col:98
+  %insert = insertelement <1 x float> undef, float %load, i64 0 ; line:68 col:98
+  %tmp98 = extractelement <1 x float> %insert, i32 0 ; line:68 col:98
+  %tmp99 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 0 ; line:68 col:111
+  %tmp100 = load <2 x float>, <2 x float>* %tmp99, align 4 ; line:68 col:111
+  %tmp101 = extractelement <2 x float> %tmp100, i32 1 ; line:68 col:111
+  %tmp102 = insertelement <4 x float> undef, float %tmp92, i64 0 ; line:68 col:65
+  %tmp103 = insertelement <4 x float> %tmp102, float %tmp96, i64 1 ; line:68 col:65
+  %tmp104 = insertelement <4 x float> %tmp103, float %tmp98, i64 2 ; line:68 col:65
+  %tmp105 = insertelement <4 x float> %tmp104, float %tmp101, i64 3 ; line:68 col:65
+  %tmp106 = fadd <4 x float> %tmp88, %tmp105 ; line:68 col:57
+  %tmp107 = load <1 x float>, <1 x float>* @dyglob1, align 4 ; line:69 col:10
+  %tmp108 = extractelement <1 x float> %tmp107, i32 0 ; line:69 col:10
+  %tmp109 = load <2 x float>, <2 x float>* @dyglob2, align 4 ; line:69 col:21
+  %tmp110 = extractelement <2 x float> %tmp109, i32 1 ; line:69 col:21
+  %load3 = load float, float* @stglob1.0 ; line:69 col:32
+  %insert4 = insertelement <1 x float> undef, float %load3, i64 0 ; line:69 col:32
+  %tmp111 = extractelement <1 x float> %insert4, i32 0 ; line:69 col:32
+  %tmp112 = load <2 x float>, <2 x float>* @stglob2, align 4 ; line:69 col:43
+  %tmp113 = extractelement <2 x float> %tmp112, i32 1 ; line:69 col:43
+  %tmp114 = insertelement <4 x float> undef, float %tmp108, i64 0 ; line:69 col:9
+  %tmp115 = insertelement <4 x float> %tmp114, float %tmp110, i64 1 ; line:69 col:9
+  %tmp116 = insertelement <4 x float> %tmp115, float %tmp111, i64 2 ; line:69 col:9
+  %tmp117 = insertelement <4 x float> %tmp116, float %tmp113, i64 3 ; line:69 col:9
+  %tmp118 = fadd <4 x float> %tmp106, %tmp117 ; line:68 col:124
+  %tmp119 = load i32, i32* %tmp, align 4 ; line:69 col:70
+  %tmp120 = load i32, i32* %tmp, align 4 ; line:69 col:74
+  %tmp121 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp119, i32 %tmp120 ; line:69 col:63
+  %tmp122 = load float, float* %tmp121 ; line:69 col:63
+  %tmp123 = load i32, i32* %tmp, align 4 ; line:69 col:86
+  %tmp124 = load i32, i32* %tmp, align 4 ; line:69 col:90
+  %tmp125 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp123, i32 %tmp124 ; line:69 col:79
+  %tmp126 = load float, float* %tmp125 ; line:69 col:79
+  %load1 = load float, float* getelementptr inbounds ([2 x float], [2 x float]* @stgar1.0, i32 0, i32 0) ; line:69 col:95
+  %insert2 = insertelement <1 x float> undef, float %load1, i64 0 ; line:69 col:95
+  %tmp127 = extractelement <1 x float> %insert2, i32 0 ; line:69 col:95
+  %tmp128 = load <2 x float>, <2 x float>* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 0), align 4 ; line:69 col:108
+  %tmp129 = extractelement <2 x float> %tmp128, i32 1 ; line:69 col:108
+  %tmp130 = insertelement <4 x float> undef, float %tmp122, i64 0 ; line:69 col:62
+  %tmp131 = insertelement <4 x float> %tmp130, float %tmp126, i64 1 ; line:69 col:62
+  %tmp132 = insertelement <4 x float> %tmp131, float %tmp127, i64 2 ; line:69 col:62
+  %tmp133 = insertelement <4 x float> %tmp132, float %tmp129, i64 3 ; line:69 col:62
+  %tmp134 = fadd <4 x float> %tmp118, %tmp133 ; line:69 col:54
+  %tmp135 = load <1 x float>, <1 x float>* %stlorc1.0, align 4 ; line:70 col:20
+  %tmp136 = extractelement <1 x float> %tmp135, i64 0 ; line:70 col:11
+  %tmp137 = getelementptr inbounds <2 x float>, <2 x float>* %stlorc2.0, i32 0, i32 1 ; line:70 col:23
+  %tmp138 = load float, float* %tmp137 ; line:70 col:23
+  %tmp139 = load <1 x float>, <1 x float>* %dylorc1.0, align 4 ; line:70 col:45
+  %tmp140 = extractelement <1 x float> %tmp139, i64 0 ; line:70 col:11
+  %tmp141 = load i32, i32* %tmp, align 4 ; line:70 col:58
+  %tmp142 = getelementptr inbounds <2 x float>, <2 x float>* %dylorc2.0, i32 0, i32 %tmp141 ; line:70 col:48
+  %tmp143 = load float, float* %tmp142 ; line:70 col:48
+  %tmp144 = insertelement <4 x float> undef, float %tmp136, i64 0 ; line:70 col:11
+  %tmp145 = insertelement <4 x float> %tmp144, float %tmp138, i64 1 ; line:70 col:11
+  %tmp146 = insertelement <4 x float> %tmp145, float %tmp140, i64 2 ; line:70 col:11
+  %tmp147 = insertelement <4 x float> %tmp146, float %tmp143, i64 3 ; line:70 col:11
+  %tmp148 = fadd <4 x float> %tmp134, %tmp147 ; line:69 col:121
+  %load5 = load float, float* @stgrec1.0.0 ; line:70 col:80
+  %insert6 = insertelement <1 x float> undef, float %load5, i64 0 ; line:70 col:80
+  %tmp149 = extractelement <1 x float> %insert6, i64 0 ; line:70 col:71
+  %tmp150 = load float, float* getelementptr inbounds (<2 x float>, <2 x float>* @stgrec2.0, i32 0, i32 1) ; line:70 col:83
+  %tmp151 = load <1 x float>, <1 x float>* @dygrec1.0, align 4 ; line:70 col:105
+  %tmp152 = extractelement <1 x float> %tmp151, i64 0 ; line:70 col:71
+  %tmp153 = load i32, i32* %tmp, align 4 ; line:70 col:118
+  %tmp154 = getelementptr inbounds <2 x float>, <2 x float>* @dygrec2.0, i32 0, i32 %tmp153 ; line:70 col:108
+  %tmp155 = load float, float* %tmp154 ; line:70 col:108
+  %tmp156 = insertelement <4 x float> undef, float %tmp149, i64 0 ; line:70 col:71
+  %tmp157 = insertelement <4 x float> %tmp156, float %tmp150, i64 1 ; line:70 col:71
+  %tmp158 = insertelement <4 x float> %tmp157, float %tmp152, i64 2 ; line:70 col:71
+  %tmp159 = insertelement <4 x float> %tmp158, float %tmp155, i64 3 ; line:70 col:71
+  %tmp160 = fadd <4 x float> %tmp148, %tmp159 ; line:70 col:63
+  ret <4 x float> %tmp160 ; line:68 col:3
+}
+
+attributes #0 = { nounwind }
+
+!dx.version = !{!3}
+!3 = !{i32 1, i32 9}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll
new file mode 100644
index 0000000000..95a64a17d4
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll
@@ -0,0 +1,324 @@
+; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s
+
+; Test for SROA reduction of globals and allocas.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%struct.VectRec1 = type { <1 x float> }
+%struct.VectRec2 = type { <2 x float> }
+%ConstantBuffer = type opaque
+
+; Confirm that the dynamic globals are untouched and the statics are scalarized.
+; DAG used to preserve the convenient ordering.
+
+; Dynamic access preserves even vec1s in SROA.
+; CHECK-DAG: @dyglob1 = internal global <1 x float> zeroinitializer, align 4
+; CHECK-DAG: @dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+; CHECK-DAG: @dygrec1.0 = internal global <1 x float> zeroinitializer, align 4
+; CHECK-DAG: @dyglob2 = internal global <2 x float> zeroinitializer, align 4
+; CHECK-DAG: @dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+; CHECK-DAG: @dygrec2.0 = internal global <2 x float> zeroinitializer, align 4
+
+; Having >1 elements preserves even statically-accessed vec2s.
+; CHECK-DAG: @stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+; CHECK-DAG: @stglob2 = internal global <2 x float> zeroinitializer, align 4
+; CHECK-DAG: @stgrec2.0 = internal global <2 x float> zeroinitializer, align 4
+
+; Statically-accessed vec1s should get scalarized.
+; CHECK-DAG: @stgar1.0 = internal global [2 x float] zeroinitializer, align 4
+; CHECK-DAG: @stglob1.0 = internal global float 0.000000e+00, align 4
+; CHECK-DAG: @stgrec1.0.0 = internal global float 0.000000e+00, align 4
+
+@dyglob2 = internal global <2 x float> zeroinitializer, align 4
+@dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+@dygrec2 = internal global %struct.VectRec2 zeroinitializer, align 4
+@dyglob1 = internal global <1 x float> zeroinitializer, align 4
+@dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+@dygrec1 = internal global %struct.VectRec1 zeroinitializer, align 4
+
+@stglob2 = internal global <2 x float> zeroinitializer, align 4
+@stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+@stgrec2 = internal global %struct.VectRec2 zeroinitializer, align 4
+
+@stglob1 = internal global <1 x float> zeroinitializer, align 4
+@stgar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+@stgrec1 = internal global %struct.VectRec1 zeroinitializer, align 4
+
+@"$Globals" = external constant %ConstantBuffer
+
+; Function Attrs: nounwind
+define <4 x float> @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z"(i32 %ix, [12 x float]* %vals) #0 {
+bb:
+  ; Dynamic access preserves even vec1s in SROA.
+  ; CHECK-DAG: %dylorc1.0 = alloca <1 x float>
+  ; CHECK-DAG: %dylorc2.0 = alloca <2 x float>
+  ; CHECK-DAG: %dylorc1.0 = alloca <1 x float>
+  ; CHECK-DAG: %dylorc2.0 = alloca <2 x float>
+  ; CHECK-DAG: %dylar1 = alloca [3 x <1 x float>]
+  ; CHECK-DAG: %dylar2 = alloca [4 x <2 x float>]
+
+  ; SROA doesn't reduce non-array allocas because scalarizer should get them.
+  ; CHECK-DAG: %stlorc1.0 = alloca <1 x float>
+  ; CHECK-DAG: %stlorc2.0 = alloca <2 x float>
+  ; CHECK-DAG: %stloc1 = alloca <1 x float>, align 4
+  ; CHECK-DAG: %stloc2 = alloca <2 x float>, align 4
+
+  ; Statically-accessed arrays should get reduced.
+  ; CHECK-DAG: %stlar2 = alloca [4 x <2 x float>]
+  ; CHECK-DAG: %stlar1.0 = alloca [3 x float]
+
+  %tmp = alloca i32, align 4, !dx.temp !14
+  %dyloc1 = alloca <1 x float>, align 4
+  %dyloc2 = alloca <2 x float>, align 4
+  %dylar1 = alloca [3 x <1 x float>], align 4
+  %dylar2 = alloca [4 x <2 x float>], align 4
+  %dylorc1 = alloca %struct.VectRec1, align 4
+  %dylorc2 = alloca %struct.VectRec2, align 4
+  %stloc1 = alloca <1 x float>, align 4
+  %stloc2 = alloca <2 x float>, align 4
+  %stlar1 = alloca [3 x <1 x float>], align 4
+  %stlar2 = alloca [4 x <2 x float>], align 4
+  %stlorc1 = alloca %struct.VectRec1, align 4
+  %stlorc2 = alloca %struct.VectRec2, align 4
+
+  store i32 %ix, i32* %tmp, align 4
+  %tmp13 = load i32, i32* %tmp, align 4 ; line:53 col:7
+  %tmp14 = icmp sgt i32 %tmp13, 0 ; line:53 col:10
+  %tmp15 = icmp ne i1 %tmp14, false ; line:53 col:10
+  %tmp16 = icmp ne i1 %tmp15, false ; line:53 col:10
+  br i1 %tmp16, label %bb17, label %bb86 ; line:53 col:7
+
+bb17:                                             ; preds = %bb
+  %tmp18 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 0 ; line:54 col:30
+  %tmp19 = load float, float* %tmp18, align 4 ; line:54 col:30
+  %tmp20 = load i32, i32* %tmp, align 4 ; line:54 col:24
+  %tmp21 = getelementptr <1 x float>, <1 x float>* %dyloc1, i32 0, i32 %tmp20 ; line:54 col:17
+  store float %tmp19, float* %tmp21 ; line:54 col:28
+  %tmp22 = getelementptr <1 x float>, <1 x float>* %stloc1, i32 0, i32 0 ; line:54 col:5
+  store float %tmp19, float* %tmp22 ; line:54 col:15
+  %tmp23 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 1 ; line:55 col:30
+  %tmp24 = load float, float* %tmp23, align 4 ; line:55 col:30
+  %tmp25 = load i32, i32* %tmp, align 4 ; line:55 col:24
+  %tmp26 = getelementptr <2 x float>, <2 x float>* %dyloc2, i32 0, i32 %tmp25 ; line:55 col:17
+  store float %tmp24, float* %tmp26 ; line:55 col:28
+  %tmp27 = getelementptr <2 x float>, <2 x float>* %stloc2, i32 0, i32 1 ; line:55 col:5
+  store float %tmp24, float* %tmp27 ; line:55 col:15
+  %tmp28 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 2 ; line:56 col:37
+  %tmp29 = load float, float* %tmp28, align 4 ; line:56 col:37
+  %tmp30 = load i32, i32* %tmp, align 4 ; line:56 col:27
+  %tmp31 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp30 ; line:56 col:20
+  %tmp32 = load i32, i32* %tmp, align 4 ; line:56 col:31
+  %tmp33 = getelementptr <1 x float>, <1 x float>* %tmp31, i32 0, i32 %tmp32 ; line:56 col:20
+  store float %tmp29, float* %tmp33 ; line:56 col:35
+  %tmp34 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %stlar1, i32 0, i32 1 ; line:56 col:5
+  %tmp35 = getelementptr <1 x float>, <1 x float>* %tmp34, i32 0, i32 0 ; line:56 col:5
+  store float %tmp29, float* %tmp35 ; line:56 col:18
+  %tmp36 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 3 ; line:57 col:37
+  %tmp37 = load float, float* %tmp36, align 4 ; line:57 col:37
+  %tmp38 = load i32, i32* %tmp, align 4 ; line:57 col:27
+  %tmp39 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp38 ; line:57 col:20
+  %tmp40 = load i32, i32* %tmp, align 4 ; line:57 col:31
+  %tmp41 = getelementptr <2 x float>, <2 x float>* %tmp39, i32 0, i32 %tmp40 ; line:57 col:20
+  store float %tmp37, float* %tmp41 ; line:57 col:35
+  %tmp42 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 1 ; line:57 col:5
+  %tmp43 = getelementptr <2 x float>, <2 x float>* %tmp42, i32 0, i32 0 ; line:57 col:5
+  store float %tmp37, float* %tmp43 ; line:57 col:18
+  %tmp44 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 4 ; line:58 col:36
+  %tmp45 = load float, float* %tmp44, align 4 ; line:58 col:36
+  %tmp46 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %dylorc1, i32 0, i32 0 ; line:58 col:28
+  %tmp47 = load i32, i32* %tmp, align 4 ; line:58 col:30
+  %tmp48 = getelementptr <1 x float>, <1 x float>* %tmp46, i32 0, i32 %tmp47 ; line:58 col:20
+  store float %tmp45, float* %tmp48 ; line:58 col:34
+  %tmp49 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %stlorc1, i32 0, i32 0 ; line:58 col:13
+  %tmp50 = getelementptr <1 x float>, <1 x float>* %tmp49, i32 0, i32 0 ; line:58 col:5
+  store float %tmp45, float* %tmp50 ; line:58 col:18
+  %tmp51 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 5 ; line:59 col:36
+  %tmp52 = load float, float* %tmp51, align 4 ; line:59 col:36
+  %tmp53 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %dylorc2, i32 0, i32 0 ; line:59 col:28
+  %tmp54 = load i32, i32* %tmp, align 4 ; line:59 col:30
+  %tmp55 = getelementptr <2 x float>, <2 x float>* %tmp53, i32 0, i32 %tmp54 ; line:59 col:20
+  store float %tmp52, float* %tmp55 ; line:59 col:34
+  %tmp56 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %stlorc2, i32 0, i32 0 ; line:59 col:13
+  %tmp57 = getelementptr <2 x float>, <2 x float>* %tmp56, i32 0, i32 1 ; line:59 col:5
+  store float %tmp52, float* %tmp57 ; line:59 col:18
+  %tmp58 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 6 ; line:61 col:32
+  %tmp59 = load float, float* %tmp58, align 4 ; line:61 col:32
+  %tmp60 = load i32, i32* %tmp, align 4 ; line:61 col:26
+  %tmp61 = getelementptr <1 x float>, <1 x float>* @dyglob1, i32 0, i32 %tmp60 ; line:61 col:18
+  store float %tmp59, float* %tmp61 ; line:61 col:30
+  store float %tmp59, float* getelementptr inbounds (<1 x float>, <1 x float>* @stglob1, i32 0, i32 0) ; line:61 col:16
+  %tmp62 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 7 ; line:62 col:32
+  %tmp63 = load float, float* %tmp62, align 4 ; line:62 col:32
+  %tmp64 = load i32, i32* %tmp, align 4 ; line:62 col:26
+  %tmp65 = getelementptr <2 x float>, <2 x float>* @dyglob2, i32 0, i32 %tmp64 ; line:62 col:18
+  store float %tmp63, float* %tmp65 ; line:62 col:30
+  store float %tmp63, float* getelementptr inbounds (<2 x float>, <2 x float>* @stglob2, i32 0, i32 1) ; line:62 col:16
+  %tmp66 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 8 ; line:63 col:37
+  %tmp67 = load float, float* %tmp66, align 4 ; line:63 col:37
+  %tmp68 = load i32, i32* %tmp, align 4 ; line:63 col:27
+  %tmp69 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp68 ; line:63 col:20
+  %tmp70 = load i32, i32* %tmp, align 4 ; line:63 col:31
+  %tmp71 = getelementptr <1 x float>, <1 x float>* %tmp69, i32 0, i32 %tmp70 ; line:63 col:20
+  store float %tmp67, float* %tmp71 ; line:63 col:35
+  store float %tmp67, float* getelementptr inbounds ([2 x <1 x float>], [2 x <1 x float>]* @stgar1, i32 0, i32 1, i32 0) ; line:63 col:18
+  %tmp72 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 9 ; line:64 col:37
+  %tmp73 = load float, float* %tmp72, align 4 ; line:64 col:37
+  %tmp74 = load i32, i32* %tmp, align 4 ; line:64 col:27
+  %tmp75 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp74 ; line:64 col:20
+  %tmp76 = load i32, i32* %tmp, align 4 ; line:64 col:31
+  %tmp77 = getelementptr <2 x float>, <2 x float>* %tmp75, i32 0, i32 %tmp76 ; line:64 col:20
+  store float %tmp73, float* %tmp77 ; line:64 col:35
+  store float %tmp73, float* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 1, i32 1) ; line:64 col:18
+  %tmp78 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 10 ; line:65 col:36
+  %tmp79 = load float, float* %tmp78, align 4 ; line:65 col:36
+  %tmp80 = load i32, i32* %tmp, align 4 ; line:65 col:30
+  %tmp81 = getelementptr <1 x float>, <1 x float>* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @dygrec1, i32 0, i32 0), i32 0, i32 %tmp80 ; line:65 col:20
+  store float %tmp79, float* %tmp81 ; line:65 col:34
+  store float %tmp79, float* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @stgrec1, i32 0, i32 0, i32 0) ; line:65 col:18
+  %tmp82 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 11 ; line:66 col:36
+  %tmp83 = load float, float* %tmp82, align 4 ; line:66 col:36
+  %tmp84 = load i32, i32* %tmp, align 4 ; line:66 col:30
+  %tmp85 = getelementptr <2 x float>, <2 x float>* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @dygrec2, i32 0, i32 0), i32 0, i32 %tmp84 ; line:66 col:20
+  store float %tmp83, float* %tmp85 ; line:66 col:34
+  store float %tmp83, float* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @stgrec2, i32 0, i32 0, i32 1) ; line:66 col:18
+  br label %bb86 ; line:67 col:3
+
+bb86:                                             ; preds = %bb17, %bb
+  %tmp87 = load <1 x float>, <1 x float>* %dyloc1, align 4 ; line:68 col:17
+  %tmp88 = extractelement <1 x float> %tmp87, i32 0 ; line:68 col:17
+  %tmp89 = load <2 x float>, <2 x float>* %dyloc2, align 4 ; line:68 col:27
+  %tmp90 = extractelement <2 x float> %tmp89, i32 1 ; line:68 col:27
+  %tmp91 = load <1 x float>, <1 x float>* %stloc1, align 4 ; line:68 col:37
+  %tmp92 = extractelement <1 x float> %tmp91, i32 0 ; line:68 col:37
+  %tmp93 = load <2 x float>, <2 x float>* %stloc2, align 4 ; line:68 col:47
+  %tmp94 = extractelement <2 x float> %tmp93, i32 1 ; line:68 col:47
+  %tmp95 = insertelement <4 x float> undef, float %tmp88, i64 0 ; line:68 col:16
+  %tmp96 = insertelement <4 x float> %tmp95, float %tmp90, i64 1 ; line:68 col:16
+  %tmp97 = insertelement <4 x float> %tmp96, float %tmp92, i64 2 ; line:68 col:16
+  %tmp98 = insertelement <4 x float> %tmp97, float %tmp94, i64 3 ; line:68 col:16
+  %tmp99 = load i32, i32* %tmp, align 4 ; line:68 col:73
+  %tmp100 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp99 ; line:68 col:66
+  %tmp101 = load i32, i32* %tmp, align 4 ; line:68 col:77
+  %tmp102 = getelementptr <1 x float>, <1 x float>* %tmp100, i32 0, i32 %tmp101 ; line:68 col:66
+  %tmp103 = load float, float* %tmp102 ; line:68 col:66
+  %tmp104 = load i32, i32* %tmp, align 4 ; line:68 col:89
+  %tmp105 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp104 ; line:68 col:82
+  %tmp106 = load i32, i32* %tmp, align 4 ; line:68 col:93
+  %tmp107 = getelementptr <2 x float>, <2 x float>* %tmp105, i32 0, i32 %tmp106 ; line:68 col:82
+  %tmp108 = load float, float* %tmp107 ; line:68 col:82
+  %tmp109 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %stlar1, i32 0, i32 0 ; line:68 col:98
+  %tmp110 = load <1 x float>, <1 x float>* %tmp109, align 4 ; line:68 col:98
+  %tmp111 = extractelement <1 x float> %tmp110, i32 0 ; line:68 col:98
+  %tmp112 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 0 ; line:68 col:111
+  %tmp113 = load <2 x float>, <2 x float>* %tmp112, align 4 ; line:68 col:111
+  %tmp114 = extractelement <2 x float> %tmp113, i32 1 ; line:68 col:111
+  %tmp115 = insertelement <4 x float> undef, float %tmp103, i64 0 ; line:68 col:65
+  %tmp116 = insertelement <4 x float> %tmp115, float %tmp108, i64 1 ; line:68 col:65
+  %tmp117 = insertelement <4 x float> %tmp116, float %tmp111, i64 2 ; line:68 col:65
+  %tmp118 = insertelement <4 x float> %tmp117, float %tmp114, i64 3 ; line:68 col:65
+  %tmp119 = fadd <4 x float> %tmp98, %tmp118 ; line:68 col:57
+  %tmp120 = load <1 x float>, <1 x float>* @dyglob1, align 4 ; line:69 col:10
+  %tmp121 = extractelement <1 x float> %tmp120, i32 0 ; line:69 col:10
+  %tmp122 = load <2 x float>, <2 x float>* @dyglob2, align 4 ; line:69 col:21
+  %tmp123 = extractelement <2 x float> %tmp122, i32 1 ; line:69 col:21
+  %tmp124 = load <1 x float>, <1 x float>* @stglob1, align 4 ; line:69 col:32
+  %tmp125 = extractelement <1 x float> %tmp124, i32 0 ; line:69 col:32
+  %tmp126 = load <2 x float>, <2 x float>* @stglob2, align 4 ; line:69 col:43
+  %tmp127 = extractelement <2 x float> %tmp126, i32 1 ; line:69 col:43
+  %tmp128 = insertelement <4 x float> undef, float %tmp121, i64 0 ; line:69 col:9
+  %tmp129 = insertelement <4 x float> %tmp128, float %tmp123, i64 1 ; line:69 col:9
+  %tmp130 = insertelement <4 x float> %tmp129, float %tmp125, i64 2 ; line:69 col:9
+  %tmp131 = insertelement <4 x float> %tmp130, float %tmp127, i64 3 ; line:69 col:9
+  %tmp132 = fadd <4 x float> %tmp119, %tmp131 ; line:68 col:124
+  %tmp133 = load i32, i32* %tmp, align 4 ; line:69 col:70
+  %tmp134 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp133 ; line:69 col:63
+  %tmp135 = load i32, i32* %tmp, align 4 ; line:69 col:74
+  %tmp136 = getelementptr <1 x float>, <1 x float>* %tmp134, i32 0, i32 %tmp135 ; line:69 col:63
+  %tmp137 = load float, float* %tmp136 ; line:69 col:63
+  %tmp138 = load i32, i32* %tmp, align 4 ; line:69 col:86
+  %tmp139 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp138 ; line:69 col:79
+  %tmp140 = load i32, i32* %tmp, align 4 ; line:69 col:90
+  %tmp141 = getelementptr <2 x float>, <2 x float>* %tmp139, i32 0, i32 %tmp140 ; line:69 col:79
+  %tmp142 = load float, float* %tmp141 ; line:69 col:79
+  %tmp143 = load <1 x float>, <1 x float>* getelementptr inbounds ([2 x <1 x float>], [2 x <1 x float>]* @stgar1, i32 0, i32 0), align 4 ; line:69 col:95
+  %tmp144 = extractelement <1 x float> %tmp143, i32 0 ; line:69 col:95
+  %tmp145 = load <2 x float>, <2 x float>* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 0), align 4 ; line:69 col:108
+  %tmp146 = extractelement <2 x float> %tmp145, i32 1 ; line:69 col:108
+  %tmp147 = insertelement <4 x float> undef, float %tmp137, i64 0 ; line:69 col:62
+  %tmp148 = insertelement <4 x float> %tmp147, float %tmp142, i64 1 ; line:69 col:62
+  %tmp149 = insertelement <4 x float> %tmp148, float %tmp144, i64 2 ; line:69 col:62
+  %tmp150 = insertelement <4 x float> %tmp149, float %tmp146, i64 3 ; line:69 col:62
+  %tmp151 = fadd <4 x float> %tmp132, %tmp150 ; line:69 col:54
+  %tmp152 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %stlorc1, i32 0, i32 0 ; line:70 col:20
+  %tmp153 = load <1 x float>, <1 x float>* %tmp152, align 4 ; line:70 col:20
+  %tmp154 = extractelement <1 x float> %tmp153, i64 0 ; line:70 col:11
+  %tmp155 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %stlorc2, i32 0, i32 0 ; line:70 col:31
+  %tmp156 = getelementptr <2 x float>, <2 x float>* %tmp155, i32 0, i32 1 ; line:70 col:23
+  %tmp157 = load float, float* %tmp156 ; line:70 col:23
+  %tmp158 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %dylorc1, i32 0, i32 0 ; line:70 col:45
+  %tmp159 = load <1 x float>, <1 x float>* %tmp158, align 4 ; line:70 col:45
+  %tmp160 = extractelement <1 x float> %tmp159, i64 0 ; line:70 col:11
+  %tmp161 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %dylorc2, i32 0, i32 0 ; line:70 col:56
+  %tmp162 = load i32, i32* %tmp, align 4 ; line:70 col:58
+  %tmp163 = getelementptr <2 x float>, <2 x float>* %tmp161, i32 0, i32 %tmp162 ; line:70 col:48
+  %tmp164 = load float, float* %tmp163 ; line:70 col:48
+  %tmp165 = insertelement <4 x float> undef, float %tmp154, i64 0 ; line:70 col:11
+  %tmp166 = insertelement <4 x float> %tmp165, float %tmp157, i64 1 ; line:70 col:11
+  %tmp167 = insertelement <4 x float> %tmp166, float %tmp160, i64 2 ; line:70 col:11
+  %tmp168 = insertelement <4 x float> %tmp167, float %tmp164, i64 3 ; line:70 col:11
+  %tmp169 = fadd <4 x float> %tmp151, %tmp168 ; line:69 col:121
+  %tmp170 = load <1 x float>, <1 x float>* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @stgrec1, i32 0, i32 0), align 4 ; line:70 col:80
+  %tmp171 = extractelement <1 x float> %tmp170, i64 0 ; line:70 col:71
+  %tmp172 = load float, float* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @stgrec2, i32 0, i32 0, i32 1) ; line:70 col:83
+  %tmp173 = load <1 x float>, <1 x float>* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @dygrec1, i32 0, i32 0), align 4 ; line:70 col:105
+  %tmp174 = extractelement <1 x float> %tmp173, i64 0 ; line:70 col:71
+  %tmp175 = load i32, i32* %tmp, align 4 ; line:70 col:118
+  %tmp176 = getelementptr <2 x float>, <2 x float>* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @dygrec2, i32 0, i32 0), i32 0, i32 %tmp175 ; line:70 col:108
+  %tmp177 = load float, float* %tmp176 ; line:70 col:108
+  %tmp178 = insertelement <4 x float> undef, float %tmp171, i64 0 ; line:70 col:71
+  %tmp179 = insertelement <4 x float> %tmp178, float %tmp172, i64 1 ; line:70 col:71
+  %tmp180 = insertelement <4 x float> %tmp179, float %tmp174, i64 2 ; line:70 col:71
+  %tmp181 = insertelement <4 x float> %tmp180, float %tmp177, i64 3 ; line:70 col:71
+  %tmp182 = fadd <4 x float> %tmp169, %tmp181 ; line:70 col:63
+  ret <4 x float> %tmp182 ; line:68 col:3
+}
+
+attributes #0 = { nounwind }
+
+!pauseresume = !{!1}
+!dx.version = !{!3}
+!dx.valver = !{!3}
+!dx.shaderModel = !{!4}
+!dx.typeAnnotations = !{!5, !10}
+!dx.entryPoints = !{!19}
+!dx.fnprops = !{}
+!dx.options = !{!23, !24}
+
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!3 = !{i32 1, i32 9}
+!4 = !{!"lib", i32 6, i32 9}
+!5 = !{i32 0, %struct.VectRec1 undef, !6, %struct.VectRec2 undef, !8}
+!6 = !{i32 4, !7}
+!7 = !{i32 6, !"f", i32 3, i32 0, i32 4, !"REC1", i32 7, i32 9, i32 13, i32 1}
+!8 = !{i32 8, !9}
+!9 = !{i32 6, !"f", i32 3, i32 0, i32 4, !"REC2", i32 7, i32 9, i32 13, i32 2}
+!10 = !{i32 1, <4 x float> (i32, [12 x float]*)* @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z", !11}
+!11 = !{!12, !15, !17}
+!12 = !{i32 1, !13, !14}
+!13 = !{i32 7, i32 9, i32 13, i32 4}
+!14 = !{}
+!15 = !{i32 0, !16, !14}
+!16 = !{i32 4, !"IX", i32 7, i32 4}
+!17 = !{i32 0, !18, !14}
+!18 = !{i32 4, !"VAL", i32 7, i32 9}
+!19 = !{null, !"", null, !20, null}
+!20 = !{null, null, !21, null}
+!21 = !{!22}
+!22 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null}
+!23 = !{i32 64}
+!24 = !{i32 -1}
+!25 = !{!26, !26, i64 0}
+!26 = !{!"int", !27, i64 0}
+!27 = !{!"omnipotent char", !28, i64 0}
+!28 = !{!"Simple C/C++ TBAA"}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl
new file mode 100644
index 0000000000..7641cb4f39
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl
@@ -0,0 +1,112 @@
+// RUN: %dxc -fcgl -T lib_6_9 %s | FileCheck %s
+
+// Mainly a source for the ScalarReductionOfAggregatesHLSL(SROA)
+//  and DynamicIndexingVectorToArray(DIVA) IR tests with native vectors
+//  using allocas, static globals, and parameters.
+// Dynamically accessed 1-element vectors should get skipped by SROA,
+//  but addressed by DynamicIndexingVectorToArray (hence the name).
+// Larger vectors should be untouched.
+// Arrays of vectors get some special treatment as well.
+// Verifies that the original code is as expected for the IR tests.
+
+struct VectRec1 {
+  float1 f : REC1;
+};
+struct VectRec2 {
+  float2 f : REC2;
+};
+
+// Vec2s will be preserved.
+// CHECK-DAG: @dyglob2 = internal global <2 x float> zeroinitializer, align 4
+// CHECK-DAG: @dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+// CHECK-DAG: @dygrec2 = internal global %struct.VectRec2 zeroinitializer, align 4
+
+// Dynamic vec1s will get replaced with dynamic vector to array.
+// CHECK-DAG: @dyglob1 = internal global <1 x float> zeroinitializer, align 4
+// CHECK-DAG: @dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+// CHECK-DAG: @dygrec1 = internal global %struct.VectRec1 zeroinitializer, align 4
+
+// Vec2s will be preserved.
+// CHECK-DAG: @stglob2 = internal global <2 x float> zeroinitializer, align 4
+// CHECK-DAG: @stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+// CHECK-DAG: @stgrec2 = internal global %struct.VectRec2 zeroinitializer, align 4
+
+// Static vec1s will get replaced with SROA.
+// CHECK-DAG: @stglob1 = internal global <1 x float> zeroinitializer, align 4
+// CHECK-DAG: @stgar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+// CHECK-DAG: @stgrec1 = internal global %struct.VectRec1 zeroinitializer, align 4
+
+static float1 dyglob1;
+static float2 dyglob2;
+static float1 dygar1[2];
+static float2 dygar2[3];
+static VectRec1 dygrec1;
+static VectRec2 dygrec2;
+
+static float1 stglob1;
+static float2 stglob2;
+static float1 stgar1[2];
+static float2 stgar2[3];
+static VectRec1 stgrec1;
+static VectRec2 stgrec2;
+
+// Test assignment operators.
+// Vec2s should be skipped by SROA and DIVA
+// DIVA will lower statically-indexed vectors and vectors in an array.
+// CHECK-LABEL: define <4 x float> @"\01?tester
+export float4 tester(int ix : IX, float vals[12] : VAL) {
+
+  // Vec2s will be preserved.
+  // CHECK-DAG: %dyloc2 = alloca <2 x float>, align 4
+  // CHECK-DAG: %dylar2 = alloca [4 x <2 x float>], align 4
+  // CHECK-DAG: %dylorc2 = alloca %struct.VectRec2, align 4
+
+  // Dynamic local vec1s will get replaced with dynamic vector to array.
+  // CHECK-DAG: %dyloc1 = alloca <1 x float>, align 4
+  // CHECK-DAG: %dylar1 = alloca [3 x <1 x float>], align 4
+  // CHECK-DAG: %dylorc1 = alloca %struct.VectRec1, align 4
+
+  // Vec2s will be preserved.
+  // CHECK-DAG: %stloc2 = alloca <2 x float>, align 4
+  // CHECK-DAG: %stlar2 = alloca [4 x <2 x float>], align 4
+  // CHECK-DAG: %stlorc2 = alloca %struct.VectRec2, align 4
+
+  // Static local vec1s will get replaced by various passes.
+  // CHECK-DAG: %stloc1 = alloca <1 x float>, align 4
+  // CHECK-DAG: %stlar1 = alloca [3 x <1 x float>], align 4
+  // CHECK-DAG: %stlorc1 = alloca %struct.VectRec1, align 4
+
+  float1 dyloc1;
+  float2 dyloc2;
+  float1 dylar1[3];
+  float2 dylar2[4];
+  VectRec1 dylorc1;
+  VectRec2 dylorc2;
+
+  float1 stloc1;
+  float2 stloc2;
+  float1 stlar1[3];
+  float2 stlar2[4];
+  VectRec1 stlorc1;
+  VectRec2 stlorc2;
+
+  if (ix > 0) {
+    stloc1[0] = dyloc1[ix] = vals[0];
+    stloc2[1] = dyloc2[ix] = vals[1];
+    stlar1[1][0] = dylar1[ix][ix] = vals[2];
+    stlar2[1][0] = dylar2[ix][ix] = vals[3];
+    stlorc1.f[0] = dylorc1.f[ix] = vals[4];
+    stlorc2.f[1] = dylorc2.f[ix] = vals[5];
+
+    stglob1[0] = dyglob1[ix] = vals[6];
+    stglob2[1] = dyglob2[ix] = vals[7];
+    stgar1[1][0] = dygar1[ix][ix] = vals[8];
+    stgar2[1][1] = dygar2[ix][ix] = vals[9];
+    stgrec1.f[0] = dygrec1.f[ix] = vals[10];
+    stgrec2.f[1] = dygrec2.f[ix] = vals[11];
+  }
+  return float4(dyloc1.x, dyloc2.y, stloc1.x, stloc2.y) + float4(dylar1[ix][ix], dylar2[ix][ix], stlar1[0].x, stlar2[0].y) +
+  float4(dyglob1.x, dyglob2.y, stglob1.x, stglob2.y) + float4(dygar1[ix][ix], dygar2[ix][ix], stgar1[0].x, stgar2[0].y) +
+    float4(stlorc1.f, stlorc2.f[1], dylorc1.f, dylorc2.f[ix]) + float4(stgrec1.f, stgrec2.f[1], dygrec1.f, dygrec2.f[ix]);
+}
+
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll
new file mode 100644
index 0000000000..1fe7c17621
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll
@@ -0,0 +1,660 @@
+; RUN: %dxopt %s -scalarizer -S | FileCheck %s
+
+; Vectors of length greather than 1 should get no changes from scalarizer,
+; so this unusual test, verifies that the pass makes no changes at all.
+; Still justified because prior to 6.9, many changes would result.
+; Compiled mostly for float7 vectors with int7 for the integer specific parts.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%"class.RWStructuredBuffer<float>" = type { float }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
+
+@"\01?buf@@3PAV?$RWStructuredBuffer@M@@A" = external global [7 x %"class.RWStructuredBuffer<float>"], align 4
+@llvm.used = appending global [1 x i8*] [i8* bitcast ([7 x %"class.RWStructuredBuffer<float>"]* @"\01?buf@@3PAV?$RWStructuredBuffer@M@@A" to i8*)], section "llvm.metadata"
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?assignments
+define void @"\01?assignments@@YAXY09$$CAV?$vector@M$06@@@Z"([10 x <7 x float>]* noalias %things) #0 {
+bb:
+  %tmp = load %"class.RWStructuredBuffer<float>", %"class.RWStructuredBuffer<float>"* getelementptr inbounds ([7 x %"class.RWStructuredBuffer<float>"], [7 x %"class.RWStructuredBuffer<float>"]* @"\01?buf@@3PAV?$RWStructuredBuffer@M@@A", i32 0, i32 0)
+  %tmp1 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<float>"(i32 160, %"class.RWStructuredBuffer<float>" %tmp)
+  %tmp2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+
+  ; CHECK: [[buf:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 1, i32 0, i8 1, i32 4)
+  ; CHECK: [[val:%.*]] = extractvalue %dx.types.ResRet.f32 [[buf]], 0
+  ; CHECK: [[vec:%.*]] = insertelement <7 x float> undef, float [[val]], i32 0
+  ; CHECK: [[res0:%.*]] = shufflevector <7 x float> [[vec]], <7 x float> undef, <7 x i32> zeroinitializer
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0
+  ; CHECK: store <7 x float> [[res0]], <7 x float>* [[adr0]], align 4
+  %RawBufferLoad = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp2, i32 1, i32 0, i8 1, i32 4)
+  %tmp3 = extractvalue %dx.types.ResRet.f32 %RawBufferLoad, 0
+  %tmp4 = insertelement <7 x float> undef, float %tmp3, i32 0
+  %tmp5 = shufflevector <7 x float> %tmp4, <7 x float> undef, <7 x i32> zeroinitializer
+  %tmp6 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0
+  store <7 x float> %tmp5, <7 x float>* %tmp6, align 4
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4
+  ; CHECK: [[res1:%.*]] = fadd fast <7 x float> [[ld1]], [[ld5]]
+  ; CHECK: store <7 x float> [[res1]], <7 x float>* [[adr1]], align 4
+  %tmp7 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5
+  %tmp8 = load <7 x float>, <7 x float>* %tmp7, align 4
+  %tmp9 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 1
+  %tmp10 = load <7 x float>, <7 x float>* %tmp9, align 4
+  %tmp11 = fadd fast <7 x float> %tmp10, %tmp8
+  store <7 x float> %tmp11, <7 x float>* %tmp9, align 4
+
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x float>, <7 x float>* [[adr6]], align 4
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[res2:%.*]] = fsub fast <7 x float> [[ld2]], [[ld6]]
+  ; CHECK: store <7 x float> [[res2]], <7 x float>* [[adr2]], align 4
+  %tmp12 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 6
+  %tmp13 = load <7 x float>, <7 x float>* %tmp12, align 4
+  %tmp14 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2
+  %tmp15 = load <7 x float>, <7 x float>* %tmp14, align 4
+  %tmp16 = fsub fast <7 x float> %tmp15, %tmp13
+  store <7 x float> %tmp16, <7 x float>* %tmp14, align 4
+
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <7 x float>, <7 x float>* [[adr7]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4
+  ; CHECK: [[res3:%.*]] = fmul fast <7 x float> [[ld3]], [[ld7]]
+  ; CHECK: store <7 x float> [[res3]], <7 x float>* [[adr3]], align 4
+  %tmp17 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 7
+  %tmp18 = load <7 x float>, <7 x float>* %tmp17, align 4
+  %tmp19 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 3
+  %tmp20 = load <7 x float>, <7 x float>* %tmp19, align 4
+  %tmp21 = fmul fast <7 x float> %tmp20, %tmp18
+  store <7 x float> %tmp21, <7 x float>* %tmp19, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <7 x float>, <7 x float>* [[adr8]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4
+  ; CHECK: [[res4:%.*]] = fdiv fast <7 x float> [[ld4]], [[ld8]]
+  ; CHECK: store <7 x float> [[res4]], <7 x float>* [[adr4]], align 4
+  %tmp22 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 8
+  %tmp23 = load <7 x float>, <7 x float>* %tmp22, align 4
+  %tmp24 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 4
+  %tmp25 = load <7 x float>, <7 x float>* %tmp24, align 4
+  %tmp26 = fdiv fast <7 x float> %tmp25, %tmp23
+  store <7 x float> %tmp26, <7 x float>* %tmp24, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load <7 x float>, <7 x float>* [[adr9]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[res5:%.*]] = frem fast <7 x float> [[ld5]], [[ld9]]
+  ; CHECK: store <7 x float> [[res5]], <7 x float>* [[adr5]], align 4
+  %tmp27 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 9
+  %tmp28 = load <7 x float>, <7 x float>* %tmp27, align 4
+  %tmp29 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5
+  %tmp30 = load <7 x float>, <7 x float>* %tmp29, align 4
+  %tmp31 = frem fast <7 x float> %tmp30, %tmp28
+  store <7 x float> %tmp31, <7 x float>* %tmp29, align 4
+
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?arithmetic
+define void @"\01?arithmetic@@YA$$BY0L@V?$vector@M$06@@Y0L@$$CAV1@@Z"([11 x <7 x float>]* noalias sret %agg.result, [11 x <7 x float>]* noalias %things) #0 {
+bb:
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4
+  ; CHECK: [[res0:%.*]] = fsub fast <7 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[ld0]]
+  %tmp = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0
+  %tmp1 = load <7 x float>, <7 x float>* %tmp, align 4
+  %tmp2 = fsub fast <7 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp1
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[res1:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4
+  %tmp3 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0
+  %tmp4 = load <7 x float>, <7 x float>* %tmp3, align 4
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[res2:%.*]] = fadd fast <7 x float> [[ld1]], [[ld2]]
+  %tmp5 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 1
+  %tmp6 = load <7 x float>, <7 x float>* %tmp5, align 4
+  %tmp7 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2
+  %tmp8 = load <7 x float>, <7 x float>* %tmp7, align 4
+  %tmp9 = fadd fast <7 x float> %tmp6, %tmp8
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4
+  ; CHECK: [[res3:%.*]] = fsub fast <7 x float> [[ld2]], [[ld3]]
+  %tmp10 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2
+  %tmp11 = load <7 x float>, <7 x float>* %tmp10, align 4
+  %tmp12 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3
+  %tmp13 = load <7 x float>, <7 x float>* %tmp12, align 4
+  %tmp14 = fsub fast <7 x float> %tmp11, %tmp13
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4
+  ; CHECK: [[res4:%.*]] = fmul fast <7 x float> [[ld3]], [[ld4]]
+  %tmp15 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3
+  %tmp16 = load <7 x float>, <7 x float>* %tmp15, align 4
+  %tmp17 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4
+  %tmp18 = load <7 x float>, <7 x float>* %tmp17, align 4
+  %tmp19 = fmul fast <7 x float> %tmp16, %tmp18
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[res5:%.*]] = fdiv fast <7 x float> [[ld4]], [[ld5]]
+  %tmp20 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4
+  %tmp21 = load <7 x float>, <7 x float>* %tmp20, align 4
+  %tmp22 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5
+  %tmp23 = load <7 x float>, <7 x float>* %tmp22, align 4
+  %tmp24 = fdiv fast <7 x float> %tmp21, %tmp23
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x float>, <7 x float>* [[adr6]], align 4
+  ; CHECK: [[res6:%.*]] = frem fast <7 x float> [[ld5]], [[ld6]]
+  %tmp25 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5
+  %tmp26 = load <7 x float>, <7 x float>* %tmp25, align 4
+  %tmp27 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 6
+  %tmp28 = load <7 x float>, <7 x float>* %tmp27, align 4
+  %tmp29 = frem fast <7 x float> %tmp26, %tmp28
+
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <7 x float>, <7 x float>* [[adr7]], align 4
+  ; CHECK: [[res7:%.*]] = fadd fast <7 x float> [[ld7]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  ; CHECK: store <7 x float> [[res7]], <7 x float>* [[adr7]], align 4
+  %tmp30 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 7
+  %tmp31 = load <7 x float>, <7 x float>* %tmp30, align 4
+  %tmp32 = fadd fast <7 x float> %tmp31, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  store <7 x float> %tmp32, <7 x float>* %tmp30, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <7 x float>, <7 x float>* [[adr8]], align 4
+  ; CHECK: [[res8:%.*]] = fadd fast <7 x float> [[ld8]], <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  ; CHECK: store <7 x float> [[res8]], <7 x float>* [[adr8]], align 4
+  %tmp33 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 8
+  %tmp34 = load <7 x float>, <7 x float>* %tmp33, align 4
+  %tmp35 = fadd fast <7 x float> %tmp34, <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  store <7 x float> %tmp35, <7 x float>* %tmp33, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load <7 x float>, <7 x float>* [[adr9]], align 4
+  ; CHECK: [[res9:%.*]] = fadd fast <7 x float> [[ld9]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  ; CHECK: store <7 x float> [[res9]], <7 x float>* [[adr9]], align 4
+  %tmp36 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 9
+  %tmp37 = load <7 x float>, <7 x float>* %tmp36, align 4
+  %tmp38 = fadd fast <7 x float> %tmp37, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  store <7 x float> %tmp38, <7 x float>* %tmp36, align 4
+
+  ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 10
+  ; CHECK: [[ld10:%.*]] = load <7 x float>, <7 x float>* [[adr10]], align 4
+  ; CHECK: [[res10:%.*]] = fadd fast <7 x float> [[ld10]], <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  ; CHECK: store <7 x float> [[res10]], <7 x float>* [[adr10]], align 4
+  %tmp39 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 10
+  %tmp40 = load <7 x float>, <7 x float>* %tmp39, align 4
+  %tmp41 = fadd fast <7 x float> %tmp40, <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  store <7 x float> %tmp41, <7 x float>* %tmp39, align 4
+
+  %tmp42 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 0
+  store <7 x float> %tmp2, <7 x float>* %tmp42
+  %tmp43 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 1
+  store <7 x float> %tmp4, <7 x float>* %tmp43
+  %tmp44 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 2
+  store <7 x float> %tmp9, <7 x float>* %tmp44
+  %tmp45 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 3
+  store <7 x float> %tmp14, <7 x float>* %tmp45
+  %tmp46 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 4
+  store <7 x float> %tmp19, <7 x float>* %tmp46
+  %tmp47 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 5
+  store <7 x float> %tmp24, <7 x float>* %tmp47
+  %tmp48 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 6
+  store <7 x float> %tmp29, <7 x float>* %tmp48
+  %tmp49 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 7
+  store <7 x float> %tmp31, <7 x float>* %tmp49
+  %tmp50 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 8
+  store <7 x float> %tmp34, <7 x float>* %tmp50
+  %tmp51 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 9
+  store <7 x float> %tmp38, <7 x float>* %tmp51
+  %tmp52 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 10
+  store <7 x float> %tmp41, <7 x float>* %tmp52
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?logic
+define void @"\01?logic@@YA$$BY09V?$vector@_N$06@@Y09V1@Y09V?$vector@M$06@@@Z"([10 x <7 x i32>]* noalias sret %agg.result, [10 x <7 x i32>]* %truth, [10 x <7 x float>]* %consequences) #0 {
+bb:
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <7 x i32>, <7 x i32>* [[adr0]], align 4
+  ; CHECK: [[nres0:%.*]] = icmp ne <7 x i32> [[ld0]], zeroinitializer
+  ; CHECK: [[bres0:%.*]] = icmp eq <7 x i1> [[nres0:%.*]], zeroinitializer
+  ; CHECK: [[res0:%.*]] = zext <7 x i1> [[bres0]] to <7 x i32>
+  %tmp = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 0
+  %tmp1 = load <7 x i32>, <7 x i32>* %tmp, align 4
+  %tmp2 = icmp ne <7 x i32> %tmp1, zeroinitializer
+  %tmp3 = icmp eq <7 x i1> %tmp2, zeroinitializer
+  %tmp4 = zext <7 x i1> %tmp3 to <7 x i32>
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x i32>, <7 x i32>* [[adr1]], align 4
+  ; CHECK: [[bld1:%.*]] = icmp ne <7 x i32> [[ld1]], zeroinitializer
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x i32>, <7 x i32>* [[adr2]], align 4
+  ; CHECK: [[bld2:%.*]] = icmp ne <7 x i32> [[ld2]], zeroinitializer
+  ; CHECK: [[bres1:%.*]] = or <7 x i1> [[bld1]], [[bld2]]
+  ; CHECK: [[res1:%.*]] = zext <7 x i1> [[bres1]] to <7 x i32>
+  %tmp5 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 1
+  %tmp6 = load <7 x i32>, <7 x i32>* %tmp5, align 4
+  %tmp7 = icmp ne <7 x i32> %tmp6, zeroinitializer
+  %tmp8 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2
+  %tmp9 = load <7 x i32>, <7 x i32>* %tmp8, align 4
+  %tmp10 = icmp ne <7 x i32> %tmp9, zeroinitializer
+  %tmp11 = or <7 x i1> %tmp7, %tmp10
+  %tmp12 = zext <7 x i1> %tmp11 to <7 x i32>
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x i32>, <7 x i32>* [[adr2]], align 4
+  ; CHECK: [[bld2:%.*]] = icmp ne <7 x i32> [[ld2]], zeroinitializer
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4
+  ; CHECK: [[bld3:%.*]] = icmp ne <7 x i32> [[ld3]], zeroinitializer
+  ; CHECK: [[bres2:%.*]] = and <7 x i1> [[bld2]], [[bld3]]
+  ; CHECK: [[res2:%.*]] = zext <7 x i1> [[bres2]] to <7 x i32>
+  %tmp13 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2
+  %tmp14 = load <7 x i32>, <7 x i32>* %tmp13, align 4
+  %tmp15 = icmp ne <7 x i32> %tmp14, zeroinitializer
+  %tmp16 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3
+  %tmp17 = load <7 x i32>, <7 x i32>* %tmp16, align 4
+  %tmp18 = icmp ne <7 x i32> %tmp17, zeroinitializer
+  %tmp19 = and <7 x i1> %tmp15, %tmp18
+  %tmp20 = zext <7 x i1> %tmp19 to <7 x i32>
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4
+  ; CHECK: [[bld3:%.*]] = icmp ne <7 x i32> [[ld3]], zeroinitializer
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x i32>, <7 x i32>* [[adr4]], align 4
+  ; CHECK: [[bld4:%.*]] = icmp ne <7 x i32> [[ld4]], zeroinitializer
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x i32>, <7 x i32>* [[adr5]], align 4
+  ; CHECK: [[bld5:%.*]] = icmp ne <7 x i32> [[ld5]], zeroinitializer
+  ; CHECK: [[bres3:%.*]] = select <7 x i1> [[bld3]], <7 x i1> [[bld4]], <7 x i1> [[bld5]]
+  ; CHECK: [[res3:%.*]] = zext <7 x i1> [[bres3]] to <7 x i32>
+  %tmp21 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3
+  %tmp22 = load <7 x i32>, <7 x i32>* %tmp21, align 4
+  %tmp23 = icmp ne <7 x i32> %tmp22, zeroinitializer
+  %tmp24 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 4
+  %tmp25 = load <7 x i32>, <7 x i32>* %tmp24, align 4
+  %tmp26 = icmp ne <7 x i32> %tmp25, zeroinitializer
+  %tmp27 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 5
+  %tmp28 = load <7 x i32>, <7 x i32>* %tmp27, align 4
+  %tmp29 = icmp ne <7 x i32> %tmp28, zeroinitializer
+  %tmp30 = select <7 x i1> %tmp23, <7 x i1> %tmp26, <7 x i1> %tmp29
+  %tmp31 = zext <7 x i1> %tmp30 to <7 x i32>
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4
+  ; CHECK: [[bres1:%.*]] = fcmp fast oeq <7 x float> [[ld0]], [[ld1]]
+  ; CHECK: [[res1:%.*]] = zext <7 x i1> [[bres1]] to <7 x i32>
+  %tmp32 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 0
+  %tmp33 = load <7 x float>, <7 x float>* %tmp32, align 4
+  %tmp34 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1
+  %tmp35 = load <7 x float>, <7 x float>* %tmp34, align 4
+  %tmp36 = fcmp fast oeq <7 x float> %tmp33, %tmp35
+  %tmp37 = zext <7 x i1> %tmp36 to <7 x i32>
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[bres2:%.*]] = fcmp fast une <7 x float> [[ld1]], [[ld2]]
+  ; CHECK: [[res2:%.*]] = zext <7 x i1> [[bres2]] to <7 x i32>
+  %tmp38 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1
+  %tmp39 = load <7 x float>, <7 x float>* %tmp38, align 4
+  %tmp40 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2
+  %tmp41 = load <7 x float>, <7 x float>* %tmp40, align 4
+  %tmp42 = fcmp fast une <7 x float> %tmp39, %tmp41
+  %tmp43 = zext <7 x i1> %tmp42 to <7 x i32>
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4
+  ; CHECK: [[bres3:%.*]] = fcmp fast olt <7 x float> [[ld2]], [[ld3]]
+  ; CHECK: [[res3:%.*]] = zext <7 x i1> [[bres3]] to <7 x i32>
+  %tmp44 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2
+  %tmp45 = load <7 x float>, <7 x float>* %tmp44, align 4
+  %tmp46 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3
+  %tmp47 = load <7 x float>, <7 x float>* %tmp46, align 4
+  %tmp48 = fcmp fast olt <7 x float> %tmp45, %tmp47
+  %tmp49 = zext <7 x i1> %tmp48 to <7 x i32>
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4
+  ; CHECK: [[bres4:%.*]] = fcmp fast ogt <7 x float> [[ld3]], [[ld4]]
+  ; CHECK: [[res4:%.*]] = zext <7 x i1> [[bres4]] to <7 x i32>
+  %tmp50 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3
+  %tmp51 = load <7 x float>, <7 x float>* %tmp50, align 4
+  %tmp52 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4
+  %tmp53 = load <7 x float>, <7 x float>* %tmp52, align 4
+  %tmp54 = fcmp fast ogt <7 x float> %tmp51, %tmp53
+  %tmp55 = zext <7 x i1> %tmp54 to <7 x i32>
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[bres5:%.*]] = fcmp fast ole <7 x float> [[ld4]], [[ld5]]
+  ; CHECK: [[res5:%.*]] = zext <7 x i1> [[bres5]] to <7 x i32>
+  %tmp56 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4
+  %tmp57 = load <7 x float>, <7 x float>* %tmp56, align 4
+  %tmp58 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5
+  %tmp59 = load <7 x float>, <7 x float>* %tmp58, align 4
+  %tmp60 = fcmp fast ole <7 x float> %tmp57, %tmp59
+  %tmp61 = zext <7 x i1> %tmp60 to <7 x i32>
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x float>, <7 x float>* [[adr6]], align 4
+  ; CHECK: [[bres6:%.*]] = fcmp fast oge <7 x float> [[ld5]], [[ld6]]
+  ; CHECK: [[res6:%.*]] = zext <7 x i1> [[bres6]] to <7 x i32>
+  %tmp62 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5
+  %tmp63 = load <7 x float>, <7 x float>* %tmp62, align 4
+  %tmp64 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 6
+  %tmp65 = load <7 x float>, <7 x float>* %tmp64, align 4
+  %tmp66 = fcmp fast oge <7 x float> %tmp63, %tmp65
+  %tmp67 = zext <7 x i1> %tmp66 to <7 x i32>
+
+  %tmp68 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 0
+  store <7 x i32> %tmp4, <7 x i32>* %tmp68
+  %tmp69 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 1
+  store <7 x i32> %tmp12, <7 x i32>* %tmp69
+  %tmp70 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 2
+  store <7 x i32> %tmp20, <7 x i32>* %tmp70
+  %tmp71 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 3
+  store <7 x i32> %tmp31, <7 x i32>* %tmp71
+  %tmp72 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 4
+  store <7 x i32> %tmp37, <7 x i32>* %tmp72
+  %tmp73 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 5
+  store <7 x i32> %tmp43, <7 x i32>* %tmp73
+  %tmp74 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 6
+  store <7 x i32> %tmp49, <7 x i32>* %tmp74
+  %tmp75 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 7
+  store <7 x i32> %tmp55, <7 x i32>* %tmp75
+  %tmp76 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 8
+  store <7 x i32> %tmp61, <7 x i32>* %tmp76
+  %tmp77 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 9
+  store <7 x i32> %tmp67, <7 x i32>* %tmp77
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?index
+define void @"\01?index@@YA$$BY09V?$vector@M$06@@Y09V1@H@Z"([10 x <7 x float>]* noalias sret %agg.result, [10 x <7 x float>]* %things, i32 %i) #0 {
+bb:
+  %res = alloca [10 x <7 x float>], align 4
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 0
+  ; CHECK: store <7 x float> zeroinitializer, <7 x float>* [[adr0]], align 4
+  %tmp1 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 0
+  store <7 x float> zeroinitializer, <7 x float>* %tmp1, align 4
+
+  ; CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 %i
+  ; CHECK: store <7 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <7 x float>* [[adri]], align 4
+  %tmp2 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 %i
+  store <7 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <7 x float>* %tmp2, align 4
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 2
+  ; CHECK: store <7 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <7 x float>* [[adr2]], align 4
+  %tmp3 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 2
+  store <7 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <7 x float>* %tmp3, align 4
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[res3:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 3
+  ; CHECK: store <7 x float> [[res3]], <7 x float>* [[adr3]], align 4
+  %tmp4 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0
+  %tmp5 = load <7 x float>, <7 x float>* %tmp4, align 4
+  %tmp6 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 3
+  store <7 x float> %tmp5, <7 x float>* %tmp6, align 4
+
+  ; CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 %i
+  ; CHECK: [[res4:%.*]] = load <7 x float>, <7 x float>* [[adri]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 4
+  ; CHECK: store <7 x float> [[res4]], <7 x float>* [[adr4]], align 4
+  %tmp7 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 %i
+  %tmp8 = load <7 x float>, <7 x float>* %tmp7, align 4
+  %tmp9 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 4
+  store <7 x float> %tmp8, <7 x float>* %tmp9, align 4
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[res5:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 5
+  ; CHECK: store <7 x float> [[res5]], <7 x float>* [[adr5]], align 4
+  %tmp10 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2
+  %tmp11 = load <7 x float>, <7 x float>* %tmp10, align 4
+  %tmp12 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 5
+  store <7 x float> %tmp11, <7 x float>* %tmp12, align 4
+
+  %tmp13 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 0
+  %tmp14 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 0
+  %tmp15 = load <7 x float>, <7 x float>* %tmp14
+  store <7 x float> %tmp15, <7 x float>* %tmp13
+
+  %tmp16 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 1
+  %tmp17 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 1
+  %tmp18 = load <7 x float>, <7 x float>* %tmp17
+  store <7 x float> %tmp18, <7 x float>* %tmp16
+
+  %tmp19 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 2
+  %tmp20 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 2
+  %tmp21 = load <7 x float>, <7 x float>* %tmp20
+  store <7 x float> %tmp21, <7 x float>* %tmp19
+
+  %tmp22 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 3
+  %tmp23 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 3
+  %tmp24 = load <7 x float>, <7 x float>* %tmp23
+  store <7 x float> %tmp24, <7 x float>* %tmp22
+
+  %tmp25 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 4
+  %tmp26 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 4
+  %tmp27 = load <7 x float>, <7 x float>* %tmp26
+  store <7 x float> %tmp27, <7 x float>* %tmp25
+
+  %tmp28 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 5
+  %tmp29 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 5
+  %tmp30 = load <7 x float>, <7 x float>* %tmp29
+  store <7 x float> %tmp30, <7 x float>* %tmp28
+
+  %tmp31 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 6
+  %tmp32 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 6
+  %tmp33 = load <7 x float>, <7 x float>* %tmp32
+  store <7 x float> %tmp33, <7 x float>* %tmp31
+
+  %tmp34 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 7
+  %tmp35 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 7
+  %tmp36 = load <7 x float>, <7 x float>* %tmp35
+  store <7 x float> %tmp36, <7 x float>* %tmp34
+
+  %tmp37 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 8
+  %tmp38 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 8
+  %tmp39 = load <7 x float>, <7 x float>* %tmp38
+  store <7 x float> %tmp39, <7 x float>* %tmp37
+
+  %tmp40 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 9
+  %tmp41 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 9
+  %tmp42 = load <7 x float>, <7 x float>* %tmp41
+  store <7 x float> %tmp42, <7 x float>* %tmp40
+
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?bittwiddlers
+define void @"\01?bittwiddlers@@YAXY0L@$$CAV?$vector@I$06@@@Z"([11 x <7 x i32>]* noalias %things) #0 {
+bb:
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x i32>, <7 x i32>* [[adr1]], align 4
+  ; CHECK: [[res0:%.*]] = xor <7 x i32> [[ld1]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 0
+  ; CHECK: store <7 x i32> [[res0]], <7 x i32>* [[adr0]], align 4
+  %tmp = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1
+  %tmp1 = load <7 x i32>, <7 x i32>* %tmp, align 4
+  %tmp2 = xor <7 x i32> %tmp1, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %tmp3 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 0
+  store <7 x i32> %tmp2, <7 x i32>* %tmp3, align 4
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x i32>, <7 x i32>* [[adr2]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4
+  ; CHECK: [[res1:%.*]] = or <7 x i32> [[ld2]], [[ld3]]
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1
+  ; CHECK: store <7 x i32> [[res1]], <7 x i32>* [[adr1]], align 4
+  %tmp4 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2
+  %tmp5 = load <7 x i32>, <7 x i32>* %tmp4, align 4
+  %tmp6 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  %tmp7 = load <7 x i32>, <7 x i32>* %tmp6, align 4
+  %tmp8 = or <7 x i32> %tmp5, %tmp7
+  %tmp9 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1
+  store <7 x i32> %tmp8, <7 x i32>* %tmp9, align 4
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x i32>, <7 x i32>* [[adr4]], align 4
+  ; CHECK: [[res2:%.*]] = and <7 x i32> [[ld3]], [[ld4]]
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2
+  ; CHECK: store <7 x i32> [[res2]], <7 x i32>* [[adr2]], align 4
+  %tmp10 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  %tmp11 = load <7 x i32>, <7 x i32>* %tmp10, align 4
+  %tmp12 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  %tmp13 = load <7 x i32>, <7 x i32>* %tmp12, align 4
+  %tmp14 = and <7 x i32> %tmp11, %tmp13
+  %tmp15 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2
+  store <7 x i32> %tmp14, <7 x i32>* %tmp15, align 4
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x i32>, <7 x i32>* [[adr4]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x i32>, <7 x i32>* [[adr5]], align 4
+  ; CHECK: [[res3:%.*]] = xor <7 x i32> [[ld4]], [[ld5]]
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  ; CHECK: store <7 x i32> [[res3]], <7 x i32>* [[adr3]], align 4
+  %tmp16 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  %tmp17 = load <7 x i32>, <7 x i32>* %tmp16, align 4
+  %tmp18 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  %tmp19 = load <7 x i32>, <7 x i32>* %tmp18, align 4
+  %tmp20 = xor <7 x i32> %tmp17, %tmp19
+  %tmp21 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  store <7 x i32> %tmp20, <7 x i32>* %tmp21, align 4
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x i32>, <7 x i32>* [[adr5]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x i32>, <7 x i32>* [[adr6]], align 4
+  ; CHECK: [[shv6:%.*]] = and <7 x i32> [[ld6]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  ; CHECK: [[res4:%.*]] = shl <7 x i32> [[ld5]], [[shv6]]
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  ; CHECK: store <7 x i32> [[res4]], <7 x i32>* [[adr4]], align 4
+  %tmp22 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  %tmp23 = load <7 x i32>, <7 x i32>* %tmp22, align 4
+  %tmp24 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  %tmp25 = load <7 x i32>, <7 x i32>* %tmp24, align 4
+  %tmp26 = and <7 x i32> %tmp25, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  %tmp27 = shl <7 x i32> %tmp23, %tmp26
+  %tmp28 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  store <7 x i32> %tmp27, <7 x i32>* %tmp28, align 4
+
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x i32>, <7 x i32>* [[adr6]], align 4
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <7 x i32>, <7 x i32>* [[adr7]], align 4
+  ; CHECK: [[shv7:%.*]] = and <7 x i32> [[ld7]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  ; CHECK: [[res5:%.*]] = lshr <7 x i32> [[ld6]], [[shv7]]
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  ; CHECK: store <7 x i32> [[res5]], <7 x i32>* [[adr5]], align 4
+  %tmp29 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  %tmp30 = load <7 x i32>, <7 x i32>* %tmp29, align 4
+  %tmp31 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7
+  %tmp32 = load <7 x i32>, <7 x i32>* %tmp31, align 4
+  %tmp33 = and <7 x i32> %tmp32, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  %tmp34 = lshr <7 x i32> %tmp30, %tmp33
+  %tmp35 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  store <7 x i32> %tmp34, <7 x i32>* %tmp35, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <7 x i32>, <7 x i32>* [[adr8]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x i32>, <7 x i32>* [[adr6]], align 4
+  ; CHECK: [[res6:%.*]] = or <7 x i32> [[ld6]], [[ld8]]
+  ; CHECK: store <7 x i32> [[res6]], <7 x i32>* [[adr6]], align 4
+  %tmp36 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8
+  %tmp37 = load <7 x i32>, <7 x i32>* %tmp36, align 4
+  %tmp38 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  %tmp39 = load <7 x i32>, <7 x i32>* %tmp38, align 4
+  %tmp40 = or <7 x i32> %tmp39, %tmp37
+  store <7 x i32> %tmp40, <7 x i32>* %tmp38, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load <7 x i32>, <7 x i32>* [[adr9]], align 4
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <7 x i32>, <7 x i32>* [[adr7]], align 4
+  ; CHECK: [[res7:%.*]] = and <7 x i32> [[ld7]], [[ld9]]
+  ; CHECK: store <7 x i32> [[res7]], <7 x i32>* [[adr7]], align 4
+  %tmp41 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 9
+  %tmp42 = load <7 x i32>, <7 x i32>* %tmp41, align 4
+  %tmp43 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7
+  %tmp44 = load <7 x i32>, <7 x i32>* %tmp43, align 4
+  %tmp45 = and <7 x i32> %tmp44, %tmp42
+  store <7 x i32> %tmp45, <7 x i32>* %tmp43, align 4
+
+  ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 10
+  ; CHECK: [[ld10:%.*]] = load <7 x i32>, <7 x i32>* [[adr10]], align 4
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <7 x i32>, <7 x i32>* [[adr8]], align 4
+  ; CHECK: [[res8:%.*]] = xor <7 x i32> [[ld8]], [[ld10]]
+  ; CHECK: store <7 x i32> [[res8]], <7 x i32>* [[adr8]], align 4
+  %tmp46 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 10
+  %tmp47 = load <7 x i32>, <7 x i32>* %tmp46, align 4
+  %tmp48 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8
+  %tmp49 = load <7 x i32>, <7 x i32>* %tmp48, align 4
+  %tmp50 = xor <7 x i32> %tmp49, %tmp47
+  store <7 x i32> %tmp50, <7 x i32>* %tmp48, align 4
+
+  ret void
+}
+
+declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
+declare %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<float>"(i32, %"class.RWStructuredBuffer<float>") #1
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
+
+!dx.version = !{!3}
+
+!3 = !{i32 1, i32 9}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
new file mode 100644
index 0000000000..9734b85b12
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
@@ -0,0 +1,745 @@
+; RUN: %dxopt %s -scalarizer -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%"class.RWStructuredBuffer<vector<float, 1> >" = type { <1 x float> }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
+
+@"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A" = external global %"class.RWStructuredBuffer<vector<float, 1> >", align 4
+@llvm.used = appending global [1 x i8*] [i8* bitcast (%"class.RWStructuredBuffer<vector<float, 1> >"* @"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A" to i8*)], section "llvm.metadata"
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?assignments
+define void @"\01?assignments@@YAXY09$$CAV?$vector@M$00@@@Z"([10 x <1 x float>]* noalias %things) #0 {
+bb:
+  %tmp = load %"class.RWStructuredBuffer<vector<float, 1> >", %"class.RWStructuredBuffer<vector<float, 1> >"* @"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A"
+  %tmp1 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 1> >"(i32 160, %"class.RWStructuredBuffer<vector<float, 1> >" %tmp)
+  %tmp2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  %RawBufferLoad = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp2, i32 1, i32 0, i8 1, i32 4)
+  %tmp3 = extractvalue %dx.types.ResRet.f32 %RawBufferLoad, 0
+  %tmp4 = insertelement <1 x float> undef, float %tmp3, i64 0
+  %tmp5 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 0
+  store <1 x float> %tmp4, <1 x float>* %tmp5, align 4
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]]
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]]
+  ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0
+  ; CHECK: [[res1:%.*]] = fadd fast float [[val1]], [[val5]]
+  ; CHECK: [[vec1:%.*]] = insertelement <1 x float> undef, float [[res1]], i32 0
+  ; CHECK: store <1 x float> [[vec1]], <1 x float>* [[adr1]], align 4
+  %tmp6 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5
+  %tmp7 = load <1 x float>, <1 x float>* %tmp6, align 4
+  %tmp8 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 1
+  %tmp9 = load <1 x float>, <1 x float>* %tmp8, align 4
+  %tmp10 = fadd fast <1 x float> %tmp9, %tmp7
+  store <1 x float> %tmp10, <1 x float>* %tmp8, align 4
+
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <1 x float>, <1 x float>* [[adr6]]
+  ; CHECK: [[val6:%.*]] = extractelement <1 x float> [[ld6]], i32 0
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]]
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0
+  ; CHECK: [[res2:%.*]] = fsub fast float [[val2]], [[val6]]
+  ; CHECK: [[vec2:%.*]] = insertelement <1 x float> undef, float [[res2]], i32 0
+  ; CHECK: store <1 x float> [[vec2]], <1 x float>* [[adr2]], align 4
+  %tmp11 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 6
+  %tmp12 = load <1 x float>, <1 x float>* %tmp11, align 4
+  %tmp13 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2
+  %tmp14 = load <1 x float>, <1 x float>* %tmp13, align 4
+  %tmp15 = fsub fast <1 x float> %tmp14, %tmp12
+  store <1 x float> %tmp15, <1 x float>* %tmp13, align 4
+
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <1 x float>, <1 x float>* [[adr7]]
+  ; CHECK: [[val7:%.*]] = extractelement <1 x float> [[ld7]], i32 0
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]]
+  ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0
+  ; CHECK: [[res3:%.*]] = fmul fast float [[val3]], [[val7]]
+  ; CHECK: [[vec3:%.*]] = insertelement <1 x float> undef, float [[res3]], i32 0
+  ; CHECK: store <1 x float> [[vec3]], <1 x float>* [[adr3]], align 4
+  %tmp16 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 7
+  %tmp17 = load <1 x float>, <1 x float>* %tmp16, align 4
+  %tmp18 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 3
+  %tmp19 = load <1 x float>, <1 x float>* %tmp18, align 4
+  %tmp20 = fmul fast <1 x float> %tmp19, %tmp17
+  store <1 x float> %tmp20, <1 x float>* %tmp18, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <1 x float>, <1 x float>* [[adr8]]
+  ; CHECK: [[val8:%.*]] = extractelement <1 x float> [[ld8]], i32 0
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]]
+  ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0
+  ; CHECK: [[res4:%.*]] = fdiv fast float [[val4]], [[val8]]
+  ; CHECK: [[vec4:%.*]] = insertelement <1 x float> undef, float [[res4]], i32 0
+  ; CHECK: store <1 x float> [[vec4]], <1 x float>* [[adr4]], align 4
+  %tmp21 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 8
+  %tmp22 = load <1 x float>, <1 x float>* %tmp21, align 4
+  %tmp23 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 4
+  %tmp24 = load <1 x float>, <1 x float>* %tmp23, align 4
+  %tmp25 = fdiv fast <1 x float> %tmp24, %tmp22
+  store <1 x float> %tmp25, <1 x float>* %tmp23, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load <1 x float>, <1 x float>* [[adr9]]
+  ; CHECK: [[val9:%.*]] = extractelement <1 x float> [[ld9]], i32 0
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]]
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[res5:%.*]] = frem fast float [[val5]], [[val9]]
+  ; CHECK: [[vec5:%.*]] = insertelement <1 x float> undef, float [[res5]], i32 0
+  ; CHECK: store <1 x float> [[vec5]], <1 x float>* [[adr5]], align 4
+  %tmp26 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 9
+  %tmp27 = load <1 x float>, <1 x float>* %tmp26, align 4
+  %tmp28 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5
+  %tmp29 = load <1 x float>, <1 x float>* %tmp28, align 4
+  %tmp30 = frem fast <1 x float> %tmp29, %tmp27
+  store <1 x float> %tmp30, <1 x float>* %tmp28, align 4
+
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?arithmetic
+define void @"\01?arithmetic@@YA$$BY0L@V?$vector@M$00@@Y0L@$$CAV1@@Z"([11 x <1 x float>]* noalias sret %agg.result, [11 x <1 x float>]* noalias %things) #0 {
+bb:
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]], align 4
+  ; CHECK-DAG: [[zero:%.*]] = extractelement <1 x float> <float -0.000000e+00>, i32 0
+  ; CHECK-DAG: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i32 0
+  ; CHECK: [[sub0:%.*]] = fsub fast float [[zero]], [[val0]]
+  ; CHECK: [[res0:%.*]] = insertelement <1 x float> undef, float [[sub0]], i32 0
+  %tmp = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0
+  %tmp1 = load <1 x float>, <1 x float>* %tmp, align 4
+  %tmp2 = fsub fast <1 x float> <float -0.000000e+00>, %tmp1
+  %tmp3 = extractelement <1 x float> %tmp2, i64 0
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[res1:%.*]] = load <1 x float>, <1 x float>* [[adr0]], align 4
+  ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[res1]], i64 0
+  %tmp4 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0
+  %tmp5 = load <1 x float>, <1 x float>* %tmp4, align 4
+  %tmp6 = extractelement <1 x float> %tmp5, i64 0
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]], align 4
+  ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]], align 4
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0
+  ; CHECK: [[add1:%.*]] = fadd fast float [[val1]], [[val2]]
+  ; CHECK: [[res1:%.*]] = insertelement <1 x float> undef, float [[add1]], i32 0
+  %tmp7 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 1
+  %tmp8 = load <1 x float>, <1 x float>* %tmp7, align 4
+  %tmp9 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2
+  %tmp10 = load <1 x float>, <1 x float>* %tmp9, align 4
+  %tmp11 = fadd fast <1 x float> %tmp8, %tmp10
+  %tmp12 = extractelement <1 x float> %tmp11, i64 0
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]], align 4
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]], align 4
+  ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0
+  ; CHECK: [[sub2:%.*]] = fsub fast float [[val2]], [[val3]]
+  ; CHECK: [[res2:%.*]] = insertelement <1 x float> undef, float [[sub2]], i32 0
+  %tmp13 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2
+  %tmp14 = load <1 x float>, <1 x float>* %tmp13, align 4
+  %tmp15 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3
+  %tmp16 = load <1 x float>, <1 x float>* %tmp15, align 4
+  %tmp17 = fsub fast <1 x float> %tmp14, %tmp16
+  %tmp18 = extractelement <1 x float> %tmp17, i64 0
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]], align 4
+  ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]], align 4
+  ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0
+  ; CHECK: [[mul3:%.*]] = fmul fast float [[val3]], [[val4]]
+  ; CHECK: [[res3:%.*]] = insertelement <1 x float> undef, float [[mul3]], i32 0
+  %tmp19 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3
+  %tmp20 = load <1 x float>, <1 x float>* %tmp19, align 4
+  %tmp21 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4
+  %tmp22 = load <1 x float>, <1 x float>* %tmp21, align 4
+  %tmp23 = fmul fast <1 x float> %tmp20, %tmp22
+  %tmp24 = extractelement <1 x float> %tmp23, i64 0
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]], align 4
+  ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]], align 4
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[div4:%.*]] = fdiv fast float [[val4]], [[val5]]
+  ; CHECK: [[res4:%.*]] = insertelement <1 x float> undef, float [[div4]], i32 0
+  %tmp25 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4
+  %tmp26 = load <1 x float>, <1 x float>* %tmp25, align 4
+  %tmp27 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5
+  %tmp28 = load <1 x float>, <1 x float>* %tmp27, align 4
+  %tmp29 = fdiv fast <1 x float> %tmp26, %tmp28
+  %tmp30 = extractelement <1 x float> %tmp29, i64 0
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]], align 4
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <1 x float>, <1 x float>* [[adr6]], align 4
+  ; CHECK: [[val6:%.*]] = extractelement <1 x float> [[ld6]], i32 0
+  ; CHECK: [[rem5:%.*]] = frem fast float [[val5]], [[val6]]
+  ; CHECK: [[res5:%.*]] = insertelement <1 x float> undef, float [[rem5]], i32 0
+  %tmp31 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5
+  %tmp32 = load <1 x float>, <1 x float>* %tmp31, align 4
+  %tmp33 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 6
+  %tmp34 = load <1 x float>, <1 x float>* %tmp33, align 4
+  %tmp35 = frem fast <1 x float> %tmp32, %tmp34
+  %tmp36 = extractelement <1 x float> %tmp35, i64 0
+
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <1 x float>, <1 x float>* [[adr7]], align 4
+  ; CHECK-DAG: [[val7:%.*]] = extractelement <1 x float> [[ld7]], i32 0
+  ; CHECK-DAG: [[pos1:%.*]] = extractelement <1 x float> <float 1.000000e+00>, i32 0
+  ; CHECK: [[add6:%.*]] = fadd fast float [[val7]], [[pos1]]
+  ; CHECK: [[res6:%.*]] = insertelement <1 x float> undef, float [[add6]], i32 0
+  %tmp37 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 7
+  %tmp38 = load <1 x float>, <1 x float>* %tmp37, align 4
+  %tmp39 = fadd fast <1 x float> %tmp38, <float 1.000000e+00>
+  store <1 x float> %tmp39, <1 x float>* %tmp37, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <1 x float>, <1 x float>* [[adr8]], align 4
+  ; CHECK-DAG: [[val8:%.*]] = extractelement <1 x float> [[ld8]], i32 0
+  ; CHECK-DAG: [[neg1:%.*]] = extractelement <1 x float> <float -1.000000e+00>, i32 0
+  ; CHECK: [[add7:%.*]] = fadd fast float [[val8]], [[neg1]]
+  ; CHECK: [[res7:%.*]] = insertelement <1 x float> undef, float [[add7]], i32 0
+  %tmp40 = extractelement <1 x float> %tmp38, i64 0
+  %tmp41 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 8
+  %tmp42 = load <1 x float>, <1 x float>* %tmp41, align 4
+  %tmp43 = fadd fast <1 x float> %tmp42, <float -1.000000e+00>
+  store <1 x float> %tmp43, <1 x float>* %tmp41, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load <1 x float>, <1 x float>* [[adr9]], align 4
+  ; CHECK-DAG: [[val9:%.*]] = extractelement <1 x float> [[ld9]], i32 0
+  ; CHECK-DAG: [[pos1:%.*]] = extractelement <1 x float> <float 1.000000e+00>, i32 0
+  ; CHECK: [[add8:%.*]] = fadd fast float [[val9]], [[pos1]]
+  ; CHECK: [[res8:%.*]] = insertelement <1 x float> undef, float [[add8]], i32 0
+  %tmp44 = extractelement <1 x float> %tmp42, i64 0
+  %tmp45 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 9
+  %tmp46 = load <1 x float>, <1 x float>* %tmp45, align 4
+  %tmp47 = fadd fast <1 x float> %tmp46, <float 1.000000e+00>
+  store <1 x float> %tmp47, <1 x float>* %tmp45, align 4
+
+  ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 10
+  ; CHECK: [[ld10:%.*]] = load <1 x float>, <1 x float>* [[adr10]], align 4
+  ; CHECK-DAG: [[val10:%.*]] = extractelement <1 x float> [[ld10]], i32 0
+  ; CHECK-DAG: [[neg1:%.*]] = extractelement <1 x float> <float -1.000000e+00>, i32 0
+  ; CHECK: [[add9:%.*]] = fadd fast float [[val10]], [[neg1]]
+  ; CHECK: [[res9:%.*]] = insertelement <1 x float> undef, float [[add9]], i32 0
+  %tmp48 = extractelement <1 x float> %tmp47, i64 0
+  %tmp49 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 10
+  %tmp50 = load <1 x float>, <1 x float>* %tmp49, align 4
+  %tmp51 = fadd fast <1 x float> %tmp50, <float -1.000000e+00>
+  store <1 x float> %tmp51, <1 x float>* %tmp49, align 4
+
+  %tmp52 = extractelement <1 x float> %tmp51, i64 0
+  %tmp53 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 0
+  %insert20 = insertelement <1 x float> undef, float %tmp3, i64 0
+  store <1 x float> %insert20, <1 x float>* %tmp53
+  %tmp54 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 1
+  %insert18 = insertelement <1 x float> undef, float %tmp6, i64 0
+  store <1 x float> %insert18, <1 x float>* %tmp54
+  %tmp55 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 2
+  %insert16 = insertelement <1 x float> undef, float %tmp12, i64 0
+  store <1 x float> %insert16, <1 x float>* %tmp55
+  %tmp56 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 3
+  %insert14 = insertelement <1 x float> undef, float %tmp18, i64 0
+  store <1 x float> %insert14, <1 x float>* %tmp56
+  %tmp57 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 4
+  %insert12 = insertelement <1 x float> undef, float %tmp24, i64 0
+  store <1 x float> %insert12, <1 x float>* %tmp57
+  %tmp58 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 5
+  %insert10 = insertelement <1 x float> undef, float %tmp30, i64 0
+  store <1 x float> %insert10, <1 x float>* %tmp58
+  %tmp59 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 6
+  %insert8 = insertelement <1 x float> undef, float %tmp36, i64 0
+  store <1 x float> %insert8, <1 x float>* %tmp59
+  %tmp60 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 7
+  %insert6 = insertelement <1 x float> undef, float %tmp40, i64 0
+  store <1 x float> %insert6, <1 x float>* %tmp60
+  %tmp61 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 8
+  %insert4 = insertelement <1 x float> undef, float %tmp44, i64 0
+  store <1 x float> %insert4, <1 x float>* %tmp61
+  %tmp62 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 9
+  %insert2 = insertelement <1 x float> undef, float %tmp48, i64 0
+  store <1 x float> %insert2, <1 x float>* %tmp62
+  %tmp63 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 10
+  %insert = insertelement <1 x float> undef, float %tmp52, i64 0
+  store <1 x float> %insert, <1 x float>* %tmp63
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?logic
+define void @"\01?logic@@YA$$BY09_NY09_NY09V?$vector@M$00@@@Z"([10 x i32]* noalias sret %agg.result, [10 x i32]* %truth, [10 x <1 x float>]* %consequences) #0 {
+bb:
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load i32, i32* [[adr0]], align 4
+  ; CHECK: [[cmp0:%.*]] = icmp ne i32 [[ld0]], 0
+  ; CHECK: [[bres0:%.*]] = xor i1 [[cmp0]], true
+  ; CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32
+  %tmp = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = xor i1 %tmp2, true
+  %tmp4 = zext i1 %tmp3 to i32
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]], align 4
+  ; CHECK: [[cmp1:%.*]] = icmp ne i32 [[ld1]], 0
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4
+  ; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0
+  ; CHECK: [[bres1:%.*]] = or i1 [[cmp1]], [[cmp2]]
+  ; CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32
+  %tmp5 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  %tmp6 = load i32, i32* %tmp5, align 4
+  %tmp7 = icmp ne i32 %tmp6, 0
+  %tmp9 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  %tmp10 = load i32, i32* %tmp9, align 4
+  %tmp11 = icmp ne i32 %tmp10, 0
+  %tmp13 = or i1 %tmp7, %tmp11
+  %tmp14 = zext i1 %tmp13 to i32
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4
+  ; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
+  ; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0
+  ; CHECK: [[bres2:%.*]] = and i1 [[cmp2]], [[cmp3]]
+  ; CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  %tmp15 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  %tmp16 = load i32, i32* %tmp15, align 4
+  %tmp17 = icmp ne i32 %tmp16, 0
+  %tmp19 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  %tmp20 = load i32, i32* %tmp19, align 4
+  %tmp21 = icmp ne i32 %tmp20, 0
+  %tmp23 = and i1 %tmp17, %tmp21
+  %tmp24 = zext i1 %tmp23 to i32
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
+  ; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4
+  ; CHECK: [[cmp4:%.*]] = icmp ne i32 [[ld4]], 0
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4
+  ; CHECK: [[cmp5:%.*]] = icmp ne i32 [[ld5]], 0
+  ; CHECK: [[bres3:%.*]] = select i1 [[cmp3]], i1 [[cmp4]], i1 [[cmp5]]
+  ; CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
+  %tmp25 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  %tmp26 = load i32, i32* %tmp25, align 4
+  %tmp27 = icmp ne i32 %tmp26, 0
+  %tmp29 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  %tmp30 = load i32, i32* %tmp29, align 4
+  %tmp31 = icmp ne i32 %tmp30, 0
+  %tmp32 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  %tmp33 = load i32, i32* %tmp32, align 4
+  %tmp34 = icmp ne i32 %tmp33, 0
+  %tmp35 = select i1 %tmp27, i1 %tmp31, i1 %tmp34
+  %tmp36 = zext i1 %tmp35 to i32
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]]
+  ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i32 0
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]]
+  ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0
+  ; CHECK: [[bres4:%.*]] = fcmp fast oeq float [[val0]], [[val1]]
+  ; CHECK: [[res4:%.*]] = zext i1 [[bres4]] to i32
+  %tmp37 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 0
+  %tmp38 = load <1 x float>, <1 x float>* %tmp37, align 4
+  %tmp39 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1
+  %tmp40 = load <1 x float>, <1 x float>* %tmp39, align 4
+  %tmp41 = fcmp fast oeq <1 x float> %tmp38, %tmp40
+  %tmp42 = extractelement <1 x i1> %tmp41, i64 0
+  %tmp43 = zext i1 %tmp42 to i32
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]]
+  ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]]
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0
+  ; CHECK: [[bres5:%.*]] = fcmp fast une float [[val1]], [[val2]]
+  ; CHECK: [[res5:%.*]] = zext i1 [[bres5]] to i32
+  %tmp44 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1
+  %tmp45 = load <1 x float>, <1 x float>* %tmp44, align 4
+  %tmp46 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2
+  %tmp47 = load <1 x float>, <1 x float>* %tmp46, align 4
+  %tmp48 = fcmp fast une <1 x float> %tmp45, %tmp47
+  %tmp49 = extractelement <1 x i1> %tmp48, i64 0
+  %tmp50 = zext i1 %tmp49 to i32
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]]
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]]
+  ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0
+  ; CHECK: [[bres6:%.*]] = fcmp fast olt float [[val2]], [[val3]]
+  ; CHECK: [[res6:%.*]] = zext i1 [[bres6]] to i32
+  %tmp51 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2
+  %tmp52 = load <1 x float>, <1 x float>* %tmp51, align 4
+  %tmp53 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3
+  %tmp54 = load <1 x float>, <1 x float>* %tmp53, align 4
+  %tmp55 = fcmp fast olt <1 x float> %tmp52, %tmp54
+  %tmp56 = extractelement <1 x i1> %tmp55, i64 0
+  %tmp57 = zext i1 %tmp56 to i32
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]]
+  ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]]
+  ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0
+  ; CHECK: [[bres7:%.*]] = fcmp fast ogt float [[val3]], [[val4]]
+  ; CHECK: [[res7:%.*]] = zext i1 [[bres7]] to i32
+  %tmp58 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3
+  %tmp59 = load <1 x float>, <1 x float>* %tmp58, align 4
+  %tmp60 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4
+  %tmp61 = load <1 x float>, <1 x float>* %tmp60, align 4
+  %tmp62 = fcmp fast ogt <1 x float> %tmp59, %tmp61
+  %tmp63 = extractelement <1 x i1> %tmp62, i64 0
+  %tmp64 = zext i1 %tmp63 to i32
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]]
+  ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]]
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[bres8:%.*]] = fcmp fast ole float [[val4]], [[val5]]
+  ; CHECK: [[res8:%.*]] = zext i1 [[bres8]] to i32
+  %tmp65 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4
+  %tmp66 = load <1 x float>, <1 x float>* %tmp65, align 4
+  %tmp67 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5
+  %tmp68 = load <1 x float>, <1 x float>* %tmp67, align 4
+  %tmp69 = fcmp fast ole <1 x float> %tmp66, %tmp68
+  %tmp70 = extractelement <1 x i1> %tmp69, i64 0
+  %tmp71 = zext i1 %tmp70 to i32
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]]
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <1 x float>, <1 x float>* [[adr6]]
+  ; CHECK: [[val6:%.*]] = extractelement <1 x float> [[ld6]], i32 0
+  ; CHECK: [[bres9:%.*]] = fcmp fast oge float [[val5]], [[val6]]
+  ; CHECK: [[res9:%.*]] = zext i1 [[bres9]] to i32
+  %tmp72 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5
+  %tmp73 = load <1 x float>, <1 x float>* %tmp72, align 4
+  %tmp74 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 6
+  %tmp75 = load <1 x float>, <1 x float>* %tmp74, align 4
+  %tmp76 = fcmp fast oge <1 x float> %tmp73, %tmp75
+  %tmp77 = extractelement <1 x i1> %tmp76, i64 0
+  %tmp78 = zext i1 %tmp77 to i32
+
+  %tmp79 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 0
+  store i32 %tmp4, i32* %tmp79
+  %tmp80 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 1
+  store i32 %tmp14, i32* %tmp80
+  %tmp81 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 2
+  store i32 %tmp24, i32* %tmp81
+  %tmp82 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 3
+  store i32 %tmp36, i32* %tmp82
+  %tmp83 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 4
+  store i32 %tmp43, i32* %tmp83
+  %tmp84 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 5
+  store i32 %tmp50, i32* %tmp84
+  %tmp85 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 6
+  store i32 %tmp57, i32* %tmp85
+  %tmp86 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 7
+  store i32 %tmp64, i32* %tmp86
+  %tmp87 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 8
+  store i32 %tmp71, i32* %tmp87
+  %tmp88 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 9
+  store i32 %tmp78, i32* %tmp88
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?index
+define void @"\01?index@@YA$$BY09V?$vector@M$00@@Y09V1@H@Z"([10 x <1 x float>]* noalias sret %agg.result, [10 x <1 x float>]* %things, i32 %i) #0 {
+bb:
+  ; CHECK: %res.0 = alloca [10 x float]
+  %res.0 = alloca [10 x float]
+
+  ; CHECK: [[adr0:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 0
+  ; CHECK: store float 0.000000e+00, float* [[adr0]]
+  %tmp1 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 0
+  store float 0.000000e+00, float* %tmp1
+
+  ; CHECK: [[adri:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 %i
+  ; CHECK: store float 1.000000e+00, float* [[adri]]
+  %tmp2 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 %i
+  store float 1.000000e+00, float* %tmp2
+
+  ; CHECK: [[adr2:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 2
+  ; CHECK: store float 2.000000e+00, float* [[adr2]]
+  %tmp3 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 2
+  store float 2.000000e+00, float* %tmp3
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]]
+  ; CHECK: [[adr3:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 3
+  ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i64 0
+  ; CHECK: store float [[val0]], float* [[adr3]]
+  %tmp4 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 0
+  %tmp5 = load <1 x float>, <1 x float>* %tmp4, align 4
+  %tmp6 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 3
+  %tmp7 = extractelement <1 x float> %tmp5, i64 0
+  store float %tmp7, float* %tmp6
+
+  ; CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 %i
+  ; CHECK: [[ldi:%.*]] = load <1 x float>, <1 x float>* [[adri]]
+  ; CHECK: [[adr4:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 4
+  ; CHECK: [[vali:%.*]] = extractelement <1 x float> [[ldi]], i64 0
+  ; CHECK: store float [[vali]], float* [[adr4]]
+  %tmp8 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 %i
+  %tmp9 = load <1 x float>, <1 x float>* %tmp8, align 4
+  %tmp10 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 4
+  %tmp11 = extractelement <1 x float> %tmp9, i64 0
+  store float %tmp11, float* %tmp10
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]]
+  ; CHECK: [[adr5:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 5
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i64 0
+  ; CHECK: store float [[val2]], float* [[adr5]]
+  %tmp12 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2
+  %tmp13 = load <1 x float>, <1 x float>* %tmp12, align 4
+  %tmp14 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 5
+  %tmp15 = extractelement <1 x float> %tmp13, i64 0
+  store float %tmp15, float* %tmp14
+
+  %tmp16 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 0
+  %tmp17 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 0
+  %load17 = load float, float* %tmp17
+  %insert18 = insertelement <1 x float> undef, float %load17, i64 0
+  store <1 x float> %insert18, <1 x float>* %tmp16
+
+  %tmp18 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 1
+  %tmp19 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 1
+  %load15 = load float, float* %tmp19
+  %insert16 = insertelement <1 x float> undef, float %load15, i64 0
+  store <1 x float> %insert16, <1 x float>* %tmp18
+
+  %tmp20 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 2
+  %tmp21 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 2
+  %load13 = load float, float* %tmp21
+  %insert14 = insertelement <1 x float> undef, float %load13, i64 0
+  store <1 x float> %insert14, <1 x float>* %tmp20
+
+  %tmp22 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 3
+  %tmp23 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 3
+  %load11 = load float, float* %tmp23
+  %insert12 = insertelement <1 x float> undef, float %load11, i64 0
+  store <1 x float> %insert12, <1 x float>* %tmp22
+
+  %tmp24 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 4
+  %tmp25 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 4
+  %load9 = load float, float* %tmp25
+  %insert10 = insertelement <1 x float> undef, float %load9, i64 0
+  store <1 x float> %insert10, <1 x float>* %tmp24
+
+  %tmp26 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 5
+  %tmp27 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 5
+  %load7 = load float, float* %tmp27
+  %insert8 = insertelement <1 x float> undef, float %load7, i64 0
+  store <1 x float> %insert8, <1 x float>* %tmp26
+
+  %tmp28 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 6
+  %tmp29 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 6
+  %load5 = load float, float* %tmp29
+  %insert6 = insertelement <1 x float> undef, float %load5, i64 0
+  store <1 x float> %insert6, <1 x float>* %tmp28
+
+  %tmp30 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 7
+  %tmp31 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 7
+  %load3 = load float, float* %tmp31
+  %insert4 = insertelement <1 x float> undef, float %load3, i64 0
+  store <1 x float> %insert4, <1 x float>* %tmp30
+
+  %tmp32 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 8
+  %tmp33 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 8
+  %load1 = load float, float* %tmp33
+  %insert2 = insertelement <1 x float> undef, float %load1, i64 0
+  store <1 x float> %insert2, <1 x float>* %tmp32
+
+  %tmp34 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 9
+  %tmp35 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 9
+  %load = load float, float* %tmp35
+  %insert = insertelement <1 x float> undef, float %load, i64 0
+  store <1 x float> %insert, <1 x float>* %tmp34
+
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?bittwiddlers
+define void @"\01?bittwiddlers@@YAXY0L@$$CAI@Z"([11 x i32]* noalias %things) #0 {
+bb:
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]], align 4
+  ; CHECK: [[res0:%.*]] = xor i32 [[ld1]], -1
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 0
+  ; CHECK: store i32 [[res0]], i32* [[adr0]], align 4
+  %tmp = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = xor i32 %tmp1, -1
+  %tmp3 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 0
+  store i32 %tmp2, i32* %tmp3, align 4
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
+  ; CHECK: [[res1:%.*]] = or i32 [[ld2]], [[ld3]]
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1
+  ; CHECK: store i32 [[res1]], i32* [[adr1]], align 4
+  %tmp4 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  %tmp5 = load i32, i32* %tmp4, align 4
+  %tmp6 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  %tmp7 = load i32, i32* %tmp6, align 4
+  %tmp8 = or i32 %tmp5, %tmp7
+  %tmp9 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1
+  store i32 %tmp8, i32* %tmp9, align 4
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4
+  ; CHECK: [[res2:%.*]] = and i32 [[ld3]], [[ld4]]
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  ; CHECK: store i32 [[res2]], i32* [[adr2]], align 4
+  %tmp10 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  %tmp11 = load i32, i32* %tmp10, align 4
+  %tmp12 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  %tmp13 = load i32, i32* %tmp12, align 4
+  %tmp14 = and i32 %tmp11, %tmp13
+  %tmp15 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  store i32 %tmp14, i32* %tmp15, align 4
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4
+  ; CHECK: [[res3:%.*]] = xor i32 [[ld4]], [[ld5]]
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  ; CHECK: store i32 [[res3]], i32* [[adr3]], align 4
+  %tmp16 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  %tmp17 = load i32, i32* %tmp16, align 4
+  %tmp18 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  %tmp19 = load i32, i32* %tmp18, align 4
+  %tmp20 = xor i32 %tmp17, %tmp19
+  %tmp21 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  store i32 %tmp20, i32* %tmp21, align 4
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]], align 4
+  ; CHECK: [[and4:%.*]] = and i32 [[ld6]], 31
+  ; CHECK: [[res4:%.*]] = shl i32 [[ld5]], [[and4]]
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  ; CHECK: store i32 [[res4]], i32* [[adr4]], align 4
+  %tmp22 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  %tmp23 = load i32, i32* %tmp22, align 4
+  %tmp24 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  %tmp25 = load i32, i32* %tmp24, align 4
+  %tmp26 = and i32 %tmp25, 31
+  %tmp27 = shl i32 %tmp23, %tmp26
+  %tmp28 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  store i32 %tmp27, i32* %tmp28, align 4
+
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]], align 4
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]], align 4
+  ; CHECK: [[and5:%.*]] = and i32 [[ld7]], 31
+  ; CHECK: [[res5:%.*]] = lshr i32 [[ld6]], [[and5]]
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  ; CHECK: store i32 [[res5]], i32* [[adr5]], align 4
+  %tmp29 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  %tmp30 = load i32, i32* %tmp29, align 4
+  %tmp31 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  %tmp32 = load i32, i32* %tmp31, align 4
+  %tmp33 = and i32 %tmp32, 31
+  %tmp34 = lshr i32 %tmp30, %tmp33
+  %tmp35 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  store i32 %tmp34, i32* %tmp35, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]], align 4
+  ; CHECK: [[res6:%.*]] = or i32 [[ld6]], [[ld8]]
+  ; CHECK: store i32 [[res6]], i32* [[adr6]], align 4
+  %tmp36 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  %tmp37 = load i32, i32* %tmp36, align 4
+  %tmp38 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  %tmp39 = load i32, i32* %tmp38, align 4
+  %tmp40 = or i32 %tmp39, %tmp37
+  store i32 %tmp40, i32* %tmp38, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load i32, i32* [[adr9]], align 4
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]], align 4
+  ; CHECK: [[res7:%.*]] = and i32 [[ld7]], [[ld9]]
+  ; CHECK: store i32 [[res7]], i32* [[adr7]], align 4
+  %tmp41 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 9
+  %tmp42 = load i32, i32* %tmp41, align 4
+  %tmp43 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  %tmp44 = load i32, i32* %tmp43, align 4
+  %tmp45 = and i32 %tmp44, %tmp42
+  store i32 %tmp45, i32* %tmp43, align 4
+
+  ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 10
+  ; CHECK: [[ld10:%.*]] = load i32, i32* [[adr10]], align 4
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]], align 4
+  ; CHECK: [[res8:%.*]] = xor i32 [[ld8]], [[ld10]]
+  ; CHECK: store i32 [[res8]], i32* [[adr8]], align 4
+  %tmp46 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 10
+  %tmp47 = load i32, i32* %tmp46, align 4
+  %tmp48 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  %tmp49 = load i32, i32* %tmp48, align 4
+  %tmp50 = xor i32 %tmp49, %tmp47
+  store i32 %tmp50, i32* %tmp48, align 4
+
+  ret void
+}
+
+declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #2
+declare %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 1> >"(i32, %"class.RWStructuredBuffer<vector<float, 1> >") #2
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!3}
+!3 = !{i32 1, i32 9}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl
new file mode 100644
index 0000000000..66382af2d5
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl
@@ -0,0 +1,425 @@
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float1 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int1      %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=double1   -DDBL %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=uint64_t1 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float16_t1 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int16_t1  -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+
+// Mainly a source for the vec1 scalarizer IR test.
+// Serves to verify some codegen as well.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// Need to capture once for the full vector type, again for the element type.
+// CHECK-DAG: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:<[0-9]* x [a-z0-9_]*>]] }
+// CHECK-DAG: %"class.RWStructuredBuffer<{{.*}}>" = type { <{{[0-9]*}} x [[ELTY:[a-z0-9_]*]]> }
+RWStructuredBuffer<TYPE> buf;
+
+export void assignments(inout TYPE things[10], TYPE scales[10]);
+export TYPE arithmetic(inout TYPE things[11])[11];
+export bool logic(bool truth[10], TYPE consequences[10])[10];
+export TYPE index(TYPE things[10], int i, TYPE val)[10];
+
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout TYPE things[10]) {
+
+  // CHECK: [[res0:%.*]] =  call [[TYPE]] @"dx.hl.op.ro.[[TYPE]] (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle {{%.*}}, i32 1)
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]]
+  things[0] = buf.Load(1);
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]] [[TYPE]] [[vec1]], [[vec5]]
+  // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  things[1] += things[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]] [[TYPE]] [[vec2]], [[vec6]]
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  things[2] -= things[6];
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]] [[TYPE]] [[vec3]], [[vec7]]
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  things[3] *= things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]] [[TYPE]] [[vec4]], [[vec8]]
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  things[4] /= things[8];
+
+#ifndef DBL
+  // NODBL: [[adr9:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 9
+  // NODBL: [[vec9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // NODBL: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5
+  // NODBL: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[TYPE]] [[vec5]], [[vec9]]
+  // NODBL: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  things[5] %= things[9];
+#endif
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export TYPE arithmetic(inout TYPE things[11])[11] {
+  TYPE res[11];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[res1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[res0:%.*]] = [[SUB]] [[TYPE]]
+  res[0] = -things[0];
+  res[1] = +things[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[ADD]] [[TYPE]] [[vec1]], [[vec2]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 2
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[SUB]] [[TYPE]] [[vec2]], [[vec3]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 3
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[MUL]] [[TYPE]] [[vec3]], [[vec4]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 4
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[res5:%.*]] = [[DIV]] [[TYPE]] [[vec4]], [[vec5]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 5
+  // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  res[5] = things[4] / things[5];
+
+#ifndef DBL
+  // NODBL: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5
+  // NODBL: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6
+  // NODBL: [[vec6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[vec5]], [[vec6]]
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 6
+  // NODBL: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[res7:%.*]] = [[ADD]] [[TYPE]] [[vec7]], <[[ELTY]] [[POS1:(1|1\.0*e\+0*|0xH3C00)]]>
+  // CHECK: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]]
+  // This is a post op, so the original value goes into res[].
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 7
+  // CHECK: store [[TYPE]] [[vec7]], [[TYPE]]* [[adr7]]
+  res[7] = things[7]++;
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[res8:%.*]] = [[ADD]] [[TYPE]] [[vec8]]
+  // CHECK: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]]
+  // This is a post op, so the original value goes into res[].
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 8
+  // CHECK: store [[TYPE]] [[vec8]], [[TYPE]]* [[adr8]]
+  res[8] = things[8]--;
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // CHECK: [[res9:%.*]] = [[ADD]] [[TYPE]] [[vec9]]
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 9
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  res[9] = ++things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10
+  // CHECK: [[vec10:%.*]] = load [[TYPE]], [[TYPE]]* [[adr10]]
+  // CHECK: [[res10:%.*]] = [[ADD]] [[TYPE]] [[vec10]]
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 10
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  res[10] = --things[10];
+
+  // Memcpy res into return value.
+  // CHECK: [[retptr:%.*]] = bitcast [11 x [[TYPE]]]* %agg.result to i8*
+  // CHECK: [[resptr:%.*]] = bitcast [11 x [[TYPE]]]* %res to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]]
+  // CHECK: ret void
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export bool logic(bool truth[10], TYPE consequences[10])[10] {
+  bool res[10];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load i32, i32* [[adr0]]
+  // CHECK: [[bvec0:%.*]] = icmp ne i32 [[vec0]], 0
+  // CHECK: [[bres0:%.*]] = xor i1 [[bvec0]], true
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 0
+  // CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32
+  // CHECK: store i32 [[res0]], i32* [[adr0]]
+  res[0] = !truth[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load i32, i32* [[adr1]]
+  // CHECK: [[bvec1:%.*]] = icmp ne i32 [[vec1]], 0
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load i32, i32* [[adr2]]
+  // CHECK: [[bvec2:%.*]] = icmp ne i32 [[vec2]], 0
+  // CHECK: [[bres1:%.*]] = or i1 [[bvec1]], [[bvec2]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 1
+  // CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32
+  // CHECK: store i32 [[res1]], i32* [[adr1]]
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load i32, i32* [[adr2]]
+  // CHECK: [[bvec2:%.*]] = icmp ne i32 [[vec2]], 0
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[bvec3:%.*]] = icmp ne i32 [[vec3]], 0
+  // CHECK: [[bres2:%.*]] = and i1 [[bvec2]], [[bvec3]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 2
+  // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  // CHECK: store i32 [[res2]], i32* [[adr2]]
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[bvec3:%.*]] = icmp ne i32 [[vec3]], 0
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load i32, i32* [[adr4]]
+  // CHECK: [[bvec4:%.*]] = icmp ne i32 [[vec4]], 0
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load i32, i32* [[adr5]]
+  // CHECK: [[bvec5:%.*]] = icmp ne i32 [[vec5]], 0
+  // CHECK: [[bres3:%.*]] = select i1 [[bvec3]], i1 [[bvec4]], i1 [[bvec5]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 3
+  // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
+  // CHECK: store i32 [[res3]], i32* [[adr3]]
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[TYPE]] [[vec0]], [[vec1]]
+  // CHECK: [[bres4:%.*]] = extractelement <1 x i1> [[cmp4]], i64 0
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 4
+  // CHECK: [[res4:%.*]] = zext i1 [[bres4]] to i32
+  // CHECK: store i32 [[res4]], i32* [[adr4]]
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[vec1]], [[vec2]]
+  // CHECK: [[bres5:%.*]] = extractelement <1 x i1> [[cmp5]], i64 0
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 5
+  // CHECK: [[res5:%.*]] = zext i1 [[bres5]] to i32
+  // CHECK: store i32 [[res5]], i32* [[adr5]]
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[vec2]], [[vec3]]
+  // CHECK: [[bres6:%.*]] = extractelement <1 x i1> [[cmp6]], i64 0
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 6
+  // CHECK: [[res6:%.*]] = zext i1 [[bres6]] to i32
+  // CHECK: store i32 [[res6]], i32* [[adr6]]
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[vec3]], [[vec4]]
+  // CHECK: [[bres7:%.*]] = extractelement <1 x i1> [[cmp7]], i64 0
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 7
+  // CHECK: [[res7:%.*]] = zext i1 [[bres7]] to i32
+  // CHECK: store i32 [[res7]], i32* [[adr7]]
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[vec4]], [[vec5]]
+  // CHECK: [[bres8:%.*]] = extractelement <1 x i1> [[cmp8]], i64 0
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 8
+  // CHECK: [[res8:%.*]] = zext i1 [[bres8]] to i32
+  // CHECK: store i32 [[res8]], i32* [[adr8]]
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 6
+  // CHECK: [[vec6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[vec5]], [[vec6]]
+  // CHECK: [[bres9:%.*]] = extractelement <1 x i1> [[cmp9]], i64 0
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 9
+  // CHECK: [[res9:%.*]] = zext i1 [[bres9]] to i32
+  // CHECK: store i32 [[res9]], i32* [[adr9]]
+  res[9] = consequences[5] >= consequences[6];
+
+  // Memcpy res into return value.
+  // CHECK: [[retptr:%.*]] = bitcast [10 x i32]* %agg.result to i8*
+  // CHECK: [[resptr:%.*]] = bitcast [10 x i32]* %res to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]]
+  // CHECK: ret void
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export TYPE index(TYPE things[10], int i)[10] {
+  // CHECK: [[res:%.*]] = alloca [10 x [[TYPE]]]
+  // CHECK: store i32 %i, i32* [[iadd:%.[0-9]*]]
+  TYPE res[10];
+
+  // CHECK: [[res0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 0
+  // CHECK: store [[TYPE]] zeroinitializer, [[TYPE]]* [[res0]]
+  res[0] = 0;
+
+  // CHECK: [[i:%.*]] = load i32, i32* [[iadd]]
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 [[i]]
+  // CHECK: store [[TYPE]] <[[ELTY]] {{(1|1\.0*e\+0*|0xH3C00).*}}>, [[TYPE]]* [[adri]]
+  res[i] = 1;
+
+  // CHECK: [[res2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 2
+  // CHECK: store [[TYPE]] <[[ELTY]] {{(2|2\.0*e\+0*|0xH4000).*}}>, [[TYPE]]* [[res2]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[thg0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[res3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 3
+  // CHECK: store [[TYPE]] [[thg0]], [[TYPE]]* [[res3]]
+  res[3] = things[0];
+
+  // CHECK: [[i:%.*]] = load i32, i32* [[iadd]]
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 [[i]]
+  // CHECK: [[thgi:%.*]] = load [[TYPE]], [[TYPE]]* [[adri]]
+  // CHECK: [[res4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 4
+  // CHECK: store [[TYPE]] [[thgi]], [[TYPE]]* [[res4]]
+  res[4] = things[i];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[thg2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[res5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 5
+  // CHECK: store [[TYPE]] [[thg2]], [[TYPE]]* [[res5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+}
+
+// Test bit twiddling operators.
+// INT-LABEL: define void @"\01?bittwiddlers
+export void bittwiddlers(inout uint things[11]) {
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1
+  // CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]]
+  // CHECK: [[res1:%.*]] = xor i32 [[ld1]], -1
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 0
+  // CHECK: store i32 [[res1]], i32* [[adr0]]
+  things[0] = ~things[1];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[res1:%.*]] = or i32 [[ld2]], [[ld3]]
+  // CHECK: store i32 [[res1]], i32* [[adr1]]
+  things[1] = things[2] | things[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]]
+  // CHECK: [[res2:%.*]] = and i32 [[ld3]], [[ld4]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  // CHECK: store i32 [[res2]], i32* [[adr2]]
+  things[2] = things[3] & things[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]]
+  // CHECK: [[res3:%.*]] = xor i32 [[ld4]], [[ld5]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  // CHECK: store i32 [[res3]], i32* [[adr3]]
+  things[3] = things[4] ^ things[5];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]]
+  // CHECK: [[shv6:%.*]] = and i32 [[ld6]], 31
+  // CHECK: [[res4:%.*]] = shl i32 [[ld5]], [[shv6]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  // CHECK: store i32 [[res4]], i32* [[adr4]]
+  things[4] = things[5] << things[6];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]]
+  // CHECK: [[shv7:%.*]] = and i32 [[ld7]], 31
+  // CHECK: [[res5:%.*]] = lshr i32 [[ld6]], [[shv7]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  // CHECK: store i32 [[res5]], i32* [[adr5]]
+  things[5] = things[6] >> things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]]
+  // CHECK: [[res6:%.*]] = or i32 [[ld6]], [[ld8]]
+  // CHECK: store i32 [[res6]], i32* [[adr6]]
+  things[6] |= things[8];
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 9
+  // CHECK: [[ld9:%.*]] = load i32, i32* [[adr9]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]]
+  // CHECK: [[res7:%.*]] = and i32 [[ld7]], [[ld9]]
+  // CHECK: store i32 [[res7]], i32* [[adr7]]
+  things[7] &= things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 10
+  // CHECK: [[ld10:%.*]] = load i32, i32* [[adr10]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]]
+  // CHECK: [[res8:%.*]] = xor i32 [[ld8]], [[ld10]]
+  // CHECK: store i32 [[res8]], i32* [[adr8]]
+  things[8] ^= things[10];
+
+  // CHECK: ret void
+}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl
new file mode 100644
index 0000000000..2c2ef01b8a
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl
@@ -0,0 +1,420 @@
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float     -DNUM=4 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int       -DNUM=7 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=double    -DNUM=16 -DDBL %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=uint64_t  -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float16_t -DNUM=34 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int16_t   -DNUM=129 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+
+// Mainly a source for the longvec scalarizer IR test.
+// Serves to verify some codegen as well.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// CHECK: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:[a-z0-9]*]] }
+// CHECK: external global {{\[}}[[NUM:[0-9]*]] x %"class.RWStructuredBuffer
+RWStructuredBuffer<TYPE> buf[NUM];
+
+
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout vector<TYPE, NUM> things[10]) {
+
+  // CHECK: [[res0:%.*]] =  call [[TYPE]] @"dx.hl.op.ro.[[TYPE]] (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle {{%.*}}, i32 1)
+  // CHECK: [[vec0:%.*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[res0]], i32 0
+  // CHECK: [[res0:%.*]] = shufflevector <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[adr0]]
+  things[0] = buf[0].Load(1);
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec5]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr1]]
+  things[1] += things[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]]
+  things[2] -= things[6];
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec7]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]]
+  things[3] *= things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]]
+  things[4] /= things[8];
+
+#ifndef DBL
+  // NODBL: [[adr9:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // NODBL: [[vec9:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]]
+  // NODBL: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // NODBL: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]]
+  // NODBL: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]]
+  things[5] %= things[9];
+#endif
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11] {
+  vector<TYPE, NUM> res[11];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: [[res1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]]
+  // CHECK: [[res0:%.*]] = [[SUB]] <[[NUM]] x [[TYPE]]>
+  res[0] = -things[0];
+  res[1] = +things[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // CHECK: [[res5:%.*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]]
+  res[5] = things[4] / things[5];
+
+#ifndef DBL
+  // NODBL: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // NODBL: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // NODBL: [[vec6:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]]
+  // NODBL: [[res6:%.*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 6
+  // NODBL: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[adr6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]]
+  // CHECK: [[res7:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]], <[[TYPE]] [[POS1:(1|1\.0*e\+0*|0xH3C00)]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[adr7]]
+  // This is a post op, so the original value goes into res[].
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 7
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[adr7]]
+  res[7] = things[7]++;
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]]
+  // CHECK: [[res8:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[adr8]]
+  // This is a post op, so the original value goes into res[].
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 8
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[adr8]]
+  res[8] = things[8]--;
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]]
+  // CHECK: [[res9:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[adr9]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 9
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[adr9]]
+  res[9] = ++things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10
+  // CHECK: [[vec10:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr10]]
+  // CHECK: [[res10:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[adr10]]
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 10
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[adr10]]
+  res[10] = --things[10];
+
+  // Memcpy res into return value.
+  // CHECK: [[retptr:%.*]] = bitcast [11 x <[[NUM]] x [[TYPE]]>]* %agg.result to i8*
+  // CHECK: [[resptr:%.*]] = bitcast [11 x <[[NUM]] x [[TYPE]]>]* %res to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]]
+  // CHECK: ret void
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export vector<bool,NUM> logic(vector<bool,NUM> truth[10], vector<TYPE, NUM> consequences[10])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr0]]
+  // CHECK: [[bvec0:%.*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[bres0:%.*]] = icmp eq <[[NUM]] x i1> [[bvec0]], zeroinitializer
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 0
+  // CHECK: [[res0:%.*]] = zext <[[NUM]] x i1> [[bres0]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[adr0]]
+  res[0] = !truth[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr1]]
+  // CHECK: [[bvec1:%.*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr2]]
+  // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[val1:%.*]] = icmp ne <[[NUM]] x i1> [[bvec1]], zeroinitializer
+  // CHECK: [[val2:%.*]] = icmp ne <[[NUM]] x i1> [[bvec2]], zeroinitializer
+  // CHECK: [[bres1:%.*]] = or <[[NUM]] x i1> [[val1]], [[val2]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 1
+  // CHECK: [[res1:%.*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[adr1]]
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr2]]
+  // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]]
+  // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[val2:%.*]] = icmp ne <[[NUM]] x i1> [[bvec2]], zeroinitializer
+  // CHECK: [[val3:%.*]] = icmp ne <[[NUM]] x i1> [[bvec3]], zeroinitializer
+  // CHECK: [[bres2:%.*]] = and <[[NUM]] x i1> [[val2]], [[val3]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 2
+  // CHECK: [[res2:%.*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[adr2]]
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]]
+  // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr4]]
+  // CHECK: [[bvec4:%.*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr5]]
+  // CHECK: [[bvec5:%.*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[bres3:%.*]] = select <[[NUM]] x i1> [[bvec3]], <[[NUM]] x i1> [[bvec4]], <[[NUM]] x i1> [[bvec5]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 3
+  // CHECK: [[res3:%.*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[adr3]]
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // CHECK: [[bres4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 4
+  // CHECK: [[res4:%.*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[adr4]]
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[bres5:%.*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 5
+  // CHECK: [[res5:%.*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[adr5]]
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[bres6:%.*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 6
+  // CHECK: [[res6:%.*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[adr6]]
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[bres7:%.*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 7
+  // CHECK: [[res7:%.*]] = zext <[[NUM]] x i1> [[bres7]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[adr7]]
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // CHECK: [[bres8:%.*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 8
+  // CHECK: [[res8:%.*]] = zext <[[NUM]] x i1> [[bres8]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[adr8]]
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 6
+  // CHECK: [[vec6:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]]
+  // CHECK: [[bres9:%.*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 9
+  // CHECK: [[res9:%.*]] = zext <[[NUM]] x i1> [[bres9]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[adr9]]
+  res[9] = consequences[5] >= consequences[6];
+
+  // Memcpy res into return value.
+  // CHECK: [[retptr:%.*]] = bitcast [10 x <[[NUM]] x i32>]* %agg.result to i8*
+  // CHECK: [[resptr:%.*]] = bitcast [10 x <[[NUM]] x i32>]* %res to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]]
+  // CHECK: ret void
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export vector<TYPE, NUM> index(vector<TYPE, NUM> things[10], int i)[10] {
+  // CHECK: [[res:%.*]] = alloca [10 x <[[NUM]] x [[TYPE]]>]
+  // CHECK: store i32 %i, i32* [[iadd:%.[0-9]*]]
+  vector<TYPE, NUM> res[10];
+
+  // CHECK: [[res0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[res0]]
+  res[0] = 0;
+
+  // CHECK: [[i:%.*]] = load i32, i32* [[iadd]]
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 [[i]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(1|1\.0*e\+0*|0xH3C00).*}}, <[[NUM]] x [[TYPE]]>* [[adri]]
+  res[i] = 1;
+
+  // CHECK: [[res2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(2|2\.0*e\+0*|0xH4000).*}}, <[[NUM]] x [[TYPE]]>* [[res2]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: [[thg0:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]]
+  // CHECK: [[res3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thg0]], <[[NUM]] x [[TYPE]]>* [[res3]]
+  res[3] = things[0];
+
+  // CHECK: [[i:%.*]] = load i32, i32* [[iadd]]
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 [[i]]
+  // CHECK: [[thgi:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adri]]
+  // CHECK: [[res4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thgi]], <[[NUM]] x [[TYPE]]>* [[res4]]
+  res[4] = things[i];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[thg2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[res5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thg2]], <[[NUM]] x [[TYPE]]>* [[res5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+}
+
+// Test bit twiddling operators.
+// INT-LABEL: define void @"\01?bittwiddlers
+export void bittwiddlers(inout vector<uint, NUM> things[11]) {
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 1
+  // CHECK: [[ld1:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr1]]
+  // CHECK: [[res1:%.*]] = xor <[[NUM]] x i32> [[ld1]], <i32 -1
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[adr0]]
+  things[0] = ~things[1];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]]
+  // CHECK: [[res1:%.*]] = or <[[NUM]] x i32> [[ld2]], [[ld3]]
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[adr1]]
+  things[1] = things[2] | things[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr4]]
+  // CHECK: [[res2:%.*]] = and <[[NUM]] x i32> [[ld3]], [[ld4]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 2
+  // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[adr2]]
+  things[2] = things[3] & things[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr5]]
+  // CHECK: [[res3:%.*]] = xor <[[NUM]] x i32> [[ld4]], [[ld5]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 3
+  // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[adr3]]
+  things[3] = things[4] ^ things[5];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr6]]
+  // CHECK: [[shv6:%.*]] = and <[[NUM]] x i32> [[ld6]], <i32 31
+  // CHECK: [[res4:%.*]] = shl <[[NUM]] x i32> [[ld5]], [[shv6]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 4
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[adr4]]
+  things[4] = things[5] << things[6];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr7]]
+  // CHECK: [[shv7:%.*]] = and <[[NUM]] x i32> [[ld7]], <i32 31
+  // CHECK: [[res5:%.*]] = lshr <[[NUM]] x i32> [[ld6]], [[shv7]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[adr5]]
+  things[5] = things[6] >> things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr8]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr6]]
+  // CHECK: [[res6:%.*]] = or <[[NUM]] x i32> [[ld6]], [[ld8]]
+  // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[adr6]]
+  things[6] |= things[8];
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 9
+  // CHECK: [[ld9:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr9]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr7]]
+  // CHECK: [[res7:%.*]] = and <[[NUM]] x i32> [[ld7]], [[ld9]]
+  // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[adr7]]
+  things[7] &= things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 10
+  // CHECK: [[ld10:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr10]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr8]]
+  // CHECK: [[res8:%.*]] = xor <[[NUM]] x i32> [[ld8]], [[ld10]]
+  // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[adr8]]
+  things[8] ^= things[10];
+
+  // CHECK: ret void
+}
diff --git a/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll b/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll
index 35fd0d6b1d..d5b0bbb2a7 100644
--- a/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll
+++ b/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll
@@ -30,4 +30,3 @@ entry:
 declare float @"\01?foo@@YAMY02V?$vector@M$02@@@Z"([3 x <3 x float>]*)
 
 attributes #0 = { nounwind }
-
diff --git a/tools/clang/unittests/HLSL/LinkerTest.cpp b/tools/clang/unittests/HLSL/LinkerTest.cpp
index 7cafa0db06..df8bb644e1 100644
--- a/tools/clang/unittests/HLSL/LinkerTest.cpp
+++ b/tools/clang/unittests/HLSL/LinkerTest.cpp
@@ -526,6 +526,11 @@ TEST_F(LinkerTest, RunLinkMatArrayParam) {
   Link(L"main", L"ps_6_0", pLinker, {libName, libName2},
        {"alloca [24 x float]", "getelementptr [12 x float], [12 x float]*"},
        {});
+
+  Link(L"main", L"ps_6_9", pLinker, {libName, libName2},
+       {"alloca [2 x <12 x float>]",
+        "getelementptr [12 x float], [12 x float]*"},
+       {});
 }
 
 TEST_F(LinkerTest, RunLinkMatParam) {
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index fc4c427580..e32ab1915a 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -1184,6 +1184,37 @@ def populate_llvm_instructions(self):
         self.add_llvm_instr(
             "OTHER", 53, "VAArg", "VAArgInst", "vaarg instruction", "", []
         )
+
+        self.add_llvm_instr(
+            "OTHER",
+            54,
+            "ExtractElement",
+            "ExtractElementInst",
+            "extracts from vector",
+            "",
+            [],
+        )
+
+        self.add_llvm_instr(
+            "OTHER",
+            55,
+            "InsertElement",
+            "InsertElementInst",
+            "inserts into vector",
+            "",
+            [],
+        )
+
+        self.add_llvm_instr(
+            "OTHER",
+            56,
+            "ShuffleVector",
+            "ShuffleVectorInst",
+            "Shuffle two vectors",
+            "",
+            [],
+        )
+
         self.add_llvm_instr(
             "OTHER",
             57,

From d8aad78191b3f179601babc3183fd7c98f50df17 Mon Sep 17 00:00:00 2001
From: Iago Calvo Lista <iagocltrabajo@gmail.com>
Date: Wed, 26 Mar 2025 14:12:44 +0000
Subject: [PATCH 50/88] Add support for KHR_compute_shader_derivatives (#7249)

Add support for KHR_compute_shader_derivatives
- DirectxShaderCompiler already supports `NV_compute_shader_derivatives`
which is functionality identical to `KHR_compute_shader_derivatives`
- The KHR extension will be used by default instead of the NV one
following the same approach as the RT extension.
- We currently explain this in a comment in
`tools/clang/lib/SPIRV/FeatureManager.cpp`
`FeatureManager::enabledByDefault`.
- Check commit introducing RT for more info
04a84f05a54949d2075daec656a6a4c0c6829c43

Fixes #7179
---
 docs/SPIR-V.rst                               |  1 +
 .../include/clang/SPIRV/FeatureManager.h      |  1 +
 tools/clang/lib/SPIRV/CapabilityVisitor.cpp   |  6 ++++
 tools/clang/lib/SPIRV/FeatureManager.cpp      |  8 +++++
 tools/clang/lib/SPIRV/SpirvEmitter.cpp        |  4 +++
 .../test/CodeGenSPIRV/ddx.compute.khr.hlsl    | 29 +++++++++++++++++++
 ...ture.calculate.lod.compute.linear.khr.hlsl | 23 +++++++++++++++
 7 files changed, 72 insertions(+)
 create mode 100644 tools/clang/test/CodeGenSPIRV/ddx.compute.khr.hlsl
 create mode 100644 tools/clang/test/CodeGenSPIRV/texture.calculate.lod.compute.linear.khr.hlsl

diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst
index 9a8150a0e8..899b587492 100644
--- a/docs/SPIR-V.rst
+++ b/docs/SPIR-V.rst
@@ -315,6 +315,7 @@ Supported extensions
 * SPV_KHR_fragment_shader_barycentric
 * SPV_KHR_physical_storage_buffer
 * SPV_KHR_vulkan_memory_model
+* SPV_KHR_compute_shader_derivatives
 * SPV_NV_compute_shader_derivatives
 * SPV_KHR_maximal_reconvergence
 * SPV_KHR_float_controls
diff --git a/tools/clang/include/clang/SPIRV/FeatureManager.h b/tools/clang/include/clang/SPIRV/FeatureManager.h
index 841708d8d5..8a9755ae79 100644
--- a/tools/clang/include/clang/SPIRV/FeatureManager.h
+++ b/tools/clang/include/clang/SPIRV/FeatureManager.h
@@ -59,6 +59,7 @@ enum class Extension {
   KHR_physical_storage_buffer,
   KHR_vulkan_memory_model,
   NV_compute_shader_derivatives,
+  KHR_compute_shader_derivatives,
   KHR_fragment_shader_barycentric,
   KHR_maximal_reconvergence,
   KHR_float_controls,
diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
index 50a7ab0905..c2b5acff53 100644
--- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
+++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
@@ -852,6 +852,12 @@ bool CapabilityVisitor::visit(SpirvModule *, Visitor::Phase phase) {
           spv::Capability::FragmentShaderShadingRateInterlockEXT,
       });
 
+  addExtensionAndCapabilitiesIfEnabled(
+      Extension::KHR_compute_shader_derivatives,
+      {
+          spv::Capability::ComputeDerivativeGroupQuadsKHR,
+          spv::Capability::ComputeDerivativeGroupLinearKHR,
+      });
   addExtensionAndCapabilitiesIfEnabled(
       Extension::NV_compute_shader_derivatives,
       {
diff --git a/tools/clang/lib/SPIRV/FeatureManager.cpp b/tools/clang/lib/SPIRV/FeatureManager.cpp
index c459f7af0f..a8ee1de000 100644
--- a/tools/clang/lib/SPIRV/FeatureManager.cpp
+++ b/tools/clang/lib/SPIRV/FeatureManager.cpp
@@ -215,6 +215,8 @@ Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) {
       .Case("SPV_KHR_physical_storage_buffer",
             Extension::KHR_physical_storage_buffer)
       .Case("SPV_KHR_vulkan_memory_model", Extension::KHR_vulkan_memory_model)
+      .Case("SPV_KHR_compute_shader_derivatives",
+            Extension::KHR_compute_shader_derivatives)
       .Case("SPV_NV_compute_shader_derivatives",
             Extension::NV_compute_shader_derivatives)
       .Case("SPV_KHR_fragment_shader_barycentric",
@@ -283,6 +285,8 @@ const char *FeatureManager::getExtensionName(Extension symbol) {
     return "SPV_KHR_physical_storage_buffer";
   case Extension::KHR_vulkan_memory_model:
     return "SPV_KHR_vulkan_memory_model";
+  case Extension::KHR_compute_shader_derivatives:
+    return "SPV_KHR_compute_shader_derivatives";
   case Extension::NV_compute_shader_derivatives:
     return "SPV_NV_compute_shader_derivatives";
   case Extension::KHR_fragment_shader_barycentric:
@@ -370,6 +374,10 @@ bool FeatureManager::enabledByDefault(Extension ext) {
     // KHR_ray_tracing and NV_ray_tracing are mutually exclusive so enable only
     // KHR extension by default
   case Extension::NV_ray_tracing:
+    return false;
+    // KHR_compute_shader_derivatives and NV_compute_shader_derivatives are
+    // mutually exclusive so enable only KHR extension by default
+  case Extension::NV_compute_shader_derivatives:
     return false;
     // Enabling EXT_demote_to_helper_invocation changes the code generation
     // behavior for the 'discard' statement. Therefore we will only enable it if
diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
index e1124999ec..04d1a6d556 100644
--- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@@ -15049,6 +15049,10 @@ void SpirvEmitter::addDerivativeGroupExecutionMode() {
   // to 2D quad rules. Using derivative operations in any numthreads
   // configuration not matching either of these is invalid and will produce an
   // error.
+  static_assert(spv::ExecutionMode::DerivativeGroupQuadsNV ==
+                spv::ExecutionMode::DerivativeGroupQuadsKHR);
+  static_assert(spv::ExecutionMode::DerivativeGroupLinearNV ==
+                spv::ExecutionMode::DerivativeGroupLinearKHR);
   spv::ExecutionMode em = spv::ExecutionMode::DerivativeGroupQuadsNV;
   if (numThreads[0] % 4 == 0 && numThreads[1] == 1 && numThreads[2] == 1) {
     em = spv::ExecutionMode::DerivativeGroupLinearNV;
diff --git a/tools/clang/test/CodeGenSPIRV/ddx.compute.khr.hlsl b/tools/clang/test/CodeGenSPIRV/ddx.compute.khr.hlsl
new file mode 100644
index 0000000000..9e2246e6a5
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/ddx.compute.khr.hlsl
@@ -0,0 +1,29 @@
+// RUN: %dxc -T cs_6_6 -E main -fspv-extension=SPV_KHR_compute_shader_derivatives -fcgl  %s -spirv  2>&1 | FileCheck %s
+
+// CHECK: OpCapability ComputeDerivativeGroupQuadsKHR
+// CHECK: OpExtension "SPV_KHR_compute_shader_derivatives"
+// CHECK: OpExecutionMode %main DerivativeGroupQuadsKHR
+
+
+SamplerState ss : register(s2);
+SamplerComparisonState scs;
+
+RWStructuredBuffer<uint> o;
+Texture1D        <float>  t1;
+
+[numthreads(2,2,1)]
+void main(uint3 id : SV_GroupThreadID)
+{
+    // CHECK: OpDPdx %float %float_0_5
+    o[0] = ddx(0.5);
+    // CHECK: OpDPdxCoarse %float %float_0_5
+    o[1] = ddx_coarse(0.5);
+    // CHECK: OpDPdy %float %float_0_5
+    o[2] = ddy(0.5);
+    // CHECK: OpDPdyCoarse %float %float_0_5
+    o[3] = ddy_coarse(0.5);
+    // CHECK: OpDPdxFine %float %float_0_5
+    o[4] = ddx_fine(0.5);
+    // CHECK: OpDPdyFine %float %float_0_5
+    o[5] = ddy_fine(0.5);
+}
\ No newline at end of file
diff --git a/tools/clang/test/CodeGenSPIRV/texture.calculate.lod.compute.linear.khr.hlsl b/tools/clang/test/CodeGenSPIRV/texture.calculate.lod.compute.linear.khr.hlsl
new file mode 100644
index 0000000000..23f52ad4b5
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/texture.calculate.lod.compute.linear.khr.hlsl
@@ -0,0 +1,23 @@
+// RUN: %dxc -T cs_6_6 -E main -fspv-extension=SPV_KHR_compute_shader_derivatives -fcgl  %s -spirv  2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %dxc -T cs_6_6 -E main -fspv-extension=SPV_KHR_compute_shader_derivatives %s -spirv  2>&1 | FileCheck %s --check-prefix=CHECK
+
+// CHECK: OpCapability ComputeDerivativeGroupLinearKHR
+// CHECK: OpExtension "SPV_KHR_compute_shader_derivatives"
+// CHECK: OpExecutionMode %main DerivativeGroupLinearKHR
+
+SamplerState ss : register(s2);
+SamplerComparisonState scs;
+
+RWStructuredBuffer<uint> o;
+Texture1D        <float>  t1;
+
+[numthreads(16,1,1)]
+void main(uint3 id : SV_GroupThreadID)
+{
+    //CHECK:          [[t1:%[0-9]+]] = OpLoad %type_1d_image %t1
+    //CHECK-NEXT:    [[ss1:%[0-9]+]] = OpLoad %type_sampler %ss
+    //CHECK-NEXT:    [[si1:%[0-9]+]] = OpSampledImage %type_sampled_image [[t1]] [[ss1]]
+    //CHECK-NEXT: [[query1:%[0-9]+]] = OpImageQueryLod %v2float [[si1]] %float_0_5
+    //CHECK-NEXT:        {{%[0-9]+}} = OpCompositeExtract %float [[query1]] 0
+    o[0] = t1.CalculateLevelOfDetail(ss, 0.5);
+}

From 31a2f581a9eb48e295c20df9be334981f8951b1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= <brioche@google.com>
Date: Thu, 27 Mar 2025 18:31:34 +0100
Subject: [PATCH 51/88] [SPIR-V] Fix usage of indices in subfunctions (#7242)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The parameters tagged with indices are linked to a builtin. Because
their layout is different between HLSL and SPIR-V, there is a common
mechanism to handle those 'stage I/O variables'.

Usually, a local variable with the correct HLSL layout is created, and
when required, the value is copied in and copied out in the entrypoint
wrapper. Then, a function-scoped pointer is passed to sub-functions.

The issue is that `indices` marks an array which is also shared across
invocations. Meaning we cannot simple copy-in/copy-out. We are only
allowed to write to the indices touched by the shader.

This required pushing the handling to the assignment expression
handling: when a value is assigned to such builtin, the layout
transformation is done, and the builtin written to.

Issue was how to find back the Builtin from an assignment: the code
assumed the ParmDecl of the entrypoint was the only way to access this
variable, but nothing prevents the user to pass this indice array to
another function.
The simple solution is to move this out of the generic map, and have a
new field which stored the SpirvVariable we created, and allow any HLSL
function to access this as soon as the HLSLIndices attribute is found.

Fixes #7009

---------

Signed-off-by: Nathan Gauër <brioche@google.com>
---
 tools/clang/lib/SPIRV/DeclResultIdMapper.cpp  |  4 +-
 tools/clang/lib/SPIRV/DeclResultIdMapper.h    | 24 +++++++
 tools/clang/lib/SPIRV/SpirvEmitter.cpp        | 27 +++++---
 .../meshshading.nv.triangle.indices.out.hlsl  | 65 +++++++++++++++++++
 4 files changed, 108 insertions(+), 12 deletions(-)
 create mode 100644 tools/clang/test/CodeGenSPIRV/meshshading.nv.triangle.indices.out.hlsl

diff --git a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
index fd0fa8a3d0..0358873589 100644
--- a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
+++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
@@ -860,7 +860,7 @@ bool DeclResultIdMapper::createStageOutputVar(const DeclaratorDecl *decl,
         QualType arrayType = astContext.getConstantArrayType(
             type, llvm::APInt(32, arraySize), clang::ArrayType::Normal, 0);
 
-        stageVarInstructions[cast<DeclaratorDecl>(decl)] =
+        msOutIndicesBuiltin =
             getBuiltinVar(builtinID, arrayType, decl->getLocation());
       } else {
         // For NV_mesh_shader, the built type is PrimitiveIndicesNV
@@ -871,7 +871,7 @@ bool DeclResultIdMapper::createStageOutputVar(const DeclaratorDecl *decl,
             astContext.UnsignedIntTy, llvm::APInt(32, arraySize),
             clang::ArrayType::Normal, 0);
 
-        stageVarInstructions[cast<DeclaratorDecl>(decl)] =
+        msOutIndicesBuiltin =
             getBuiltinVar(builtinID, arrayType, decl->getLocation());
       }
 
diff --git a/tools/clang/lib/SPIRV/DeclResultIdMapper.h b/tools/clang/lib/SPIRV/DeclResultIdMapper.h
index 80723393ce..6ac17fde9d 100644
--- a/tools/clang/lib/SPIRV/DeclResultIdMapper.h
+++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.h
@@ -559,6 +559,11 @@ class DeclResultIdMapper {
     return value;
   }
 
+  SpirvVariable *getMSOutIndicesBuiltin() {
+    assert(msOutIndicesBuiltin && "Variable usage before decl parsing.");
+    return msOutIndicesBuiltin;
+  }
+
   /// Decorate with spirv intrinsic attributes with lamda function variable
   /// check
   void decorateWithIntrinsicAttrs(
@@ -1014,6 +1019,25 @@ class DeclResultIdMapper {
   /// creating that stage variable, so that we don't need to query them again
   /// for reading and writing.
   llvm::DenseMap<const ValueDecl *, SpirvVariable *> stageVarInstructions;
+
+  /// Special case for the Indices builtin:
+  /// - this builtin has a different layout in HLSL & SPIR-V, meaning it
+  /// requires
+  ///   the same kind of handling as classic stageVarInstructions:
+  ///   -> load into a HLSL compatible tmp
+  ///   -> write back into the SPIR-V compatible layout.
+  /// - but the builtin is shared across invocations (not only lanes).
+  ///   -> we must only write/read from the indices requested by the user.
+  /// - the variable can be passed to other functions as a out param
+  ///   -> we cannot copy-in/copy-out because shared across invocations.
+  ///   -> we cannot pass a simple pointer: layout differences between
+  ///   HLSL/SPIR-V.
+  ///
+  /// All this means we must keep track of the builtin, and each assignment to
+  /// this will have to handle the layout differences. The easiest solution is
+  /// to keep this builtin global to the module if present.
+  SpirvVariable *msOutIndicesBuiltin = nullptr;
+
   /// Vector of all defined resource variables.
   llvm::SmallVector<ResourceVar, 8> resourceVars;
   /// Mapping from {RW|Append|Consume}StructuredBuffers to their
diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
index 04d1a6d556..579af04ea6 100644
--- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@@ -8133,17 +8133,21 @@ void SpirvEmitter::assignToMSOutIndices(
   if (indices.size() > 1) {
     vecComponent = indices.back();
   }
-  auto *var = declIdMapper.getStageVarInstruction(decl);
-  const auto *varTypeDecl = astContext.getAsConstantArrayType(decl->getType());
-  QualType varType = varTypeDecl->getElementType();
+  SpirvVariable *var = declIdMapper.getMSOutIndicesBuiltin();
+
   uint32_t numVertices = 1;
-  if (!isVectorType(varType, nullptr, &numVertices)) {
-    assert(isScalarType(varType));
-  }
-  QualType valueType = value->getAstResultType();
   uint32_t numValues = 1;
-  if (!isVectorType(valueType, nullptr, &numValues)) {
-    assert(isScalarType(valueType));
+  {
+    const auto *varTypeDecl =
+        astContext.getAsConstantArrayType(decl->getType());
+    QualType varType = varTypeDecl->getElementType();
+    if (!isVectorType(varType, nullptr, &numVertices)) {
+      assert(isScalarType(varType));
+    }
+    QualType valueType = value->getAstResultType();
+    if (!isVectorType(valueType, nullptr, &numValues)) {
+      assert(isScalarType(valueType));
+    }
   }
 
   const auto loc = decl->getLocation();
@@ -8190,7 +8194,10 @@ void SpirvEmitter::assignToMSOutIndices(
       assert(numValues == numVertices);
       if (extMesh) {
         // create accesschain for Primitive*IndicesEXT[vertIndex].
-        auto *ptr = spvBuilder.createAccessChain(varType, var, vertIndex, loc);
+        const ConstantArrayType *CAT =
+            astContext.getAsConstantArrayType(var->getAstResultType());
+        auto *ptr = spvBuilder.createAccessChain(CAT->getElementType(), var,
+                                                 vertIndex, loc);
         // finally create store for Primitive*IndicesEXT[vertIndex] = value.
         spvBuilder.createStore(ptr, value, loc);
       } else {
diff --git a/tools/clang/test/CodeGenSPIRV/meshshading.nv.triangle.indices.out.hlsl b/tools/clang/test/CodeGenSPIRV/meshshading.nv.triangle.indices.out.hlsl
new file mode 100644
index 0000000000..05d9d8fb1c
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/meshshading.nv.triangle.indices.out.hlsl
@@ -0,0 +1,65 @@
+// RUN: %dxc -T ms_6_5 -E outie -fcgl  %s -spirv | FileCheck %s
+// RUN: %dxc -T ms_6_5 -E innie -fcgl  %s -spirv | FileCheck %s
+
+// CHECK-DAG: [[v4_n05_05_0_1:%[0-9]+]] = OpConstantComposite %v4float %float_n0_5 %float_0_5 %float_0 %float_1
+// CHECK-DAG:  [[v4_05_05_0_1:%[0-9]+]] = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0 %float_1
+// CHECK-DAG:  [[v4_0_n05_0_1:%[0-9]+]] = OpConstantComposite %v4float %float_0 %float_n0_5 %float_0 %float_1
+// CHECK-DAG:  [[v3_1_0_0:%[0-9]+]] = OpConstantComposite %v3float %float_1 %float_0 %float_0
+// CHECK-DAG:  [[v3_0_1_0:%[0-9]+]] = OpConstantComposite %v3float %float_0 %float_1 %float_0
+// CHECK-DAG:  [[v3_0_0_1:%[0-9]+]] = OpConstantComposite %v3float %float_0 %float_0 %float_1
+// CHECK-DAG:  [[u3_0_1_2:%[0-9]+]] = OpConstantComposite %v3uint %uint_0 %uint_1 %uint_2
+
+// CHECK-DAG:  OpDecorate [[indices:%[0-9]+]] BuiltIn PrimitiveIndicesNV
+
+struct MeshOutput {
+  float4 position : SV_Position;
+  float3 color : COLOR0;
+};
+
+[outputtopology("triangle")]
+[numthreads(1, 1, 1)]
+void innie(out indices uint3 triangles[1], out vertices MeshOutput verts[3]) {
+    SetMeshOutputCounts(3, 2);
+
+    triangles[0] = uint3(0, 1, 2);
+// CHECK: [[off:%[0-9]+]] = OpIMul %uint %uint_0 %uint_3
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_uint [[indices]] [[off]]
+// CHECK: [[tmp:%[0-9]+]] = OpCompositeExtract %uint [[u3_0_1_2]] 0
+// CHECK:                   OpStore [[ptr]] [[tmp]]
+// CHECK: [[idx:%[0-9]+]] = OpIAdd %uint [[off]] %uint_1
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_uint [[indices]] [[idx]]
+// CHECK: [[tmp:%[0-9]+]] = OpCompositeExtract %uint [[u3_0_1_2]] 1
+// CHECK:                   OpStore [[ptr]] [[tmp]]
+// CHECK: [[idx:%[0-9]+]] = OpIAdd %uint [[off]] %uint_2
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_uint [[indices]] [[idx]]
+// CHECK: [[tmp:%[0-9]+]] = OpCompositeExtract %uint [[u3_0_1_2]] 2
+// CHECK:                   OpStore [[ptr]] [[tmp]]
+
+    verts[0].position = float4(-0.5, 0.5, 0.0, 1.0);
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v4float %gl_Position %int_0
+// CHECK:                   OpStore [[ptr]] [[v4_n05_05_0_1]]
+    verts[0].color = float3(1.0, 0.0, 0.0);
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v3float %out_var_COLOR0 %int_0
+// CHECK:                   OpStore [[ptr]] [[v3_1_0_0]]
+
+    verts[1].position = float4(0.5, 0.5, 0.0, 1.0);
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v4float %gl_Position %int_1
+// CHECK:                   OpStore [[ptr]] [[v4_05_05_0_1]]
+    verts[1].color = float3(0.0, 1.0, 0.0);
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v3float %out_var_COLOR0 %int_1
+// CHECK:                   OpStore [[ptr]] [[v3_0_1_0]]
+
+    verts[2].position = float4(0.0, -0.5, 0.0, 1.0);
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v4float %gl_Position %int_2
+// CHECK:                   OpStore [[ptr]] [[v4_0_n05_0_1]]
+    verts[2].color = float3(0.0, 0.0, 1.0);
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v3float %out_var_COLOR0 %int_2
+// CHECK:                   OpStore [[ptr]] [[v3_0_0_1]]
+
+}
+
+[outputtopology("triangle")]
+[numthreads(1, 1, 1)]
+void outie(out indices uint3 triangles[1], out vertices MeshOutput verts[3]) {
+	innie(triangles, verts);
+}

From 0fa207a4cd537f6a47d0570993a5cc4e43482042 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Thu, 27 Mar 2025 11:09:03 -0700
Subject: [PATCH 52/88] Update DXIL.rst (#7254)

Minor grammar fixes
---
 docs/DXIL.rst | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/DXIL.rst b/docs/DXIL.rst
index c3baf4e454..a68e31d0a9 100644
--- a/docs/DXIL.rst
+++ b/docs/DXIL.rst
@@ -225,10 +225,10 @@ DXIL uses 32-bit pointers in its representation.
 Out-of-bounds behavior
 ----------------------
 
-Indexable thread-local accesses are done via LLVM pointer and have C-like OOB semantics.
-Groupshared accesses are done via LLVM pointer too. The origin of a groupshared pointer must be a single TGSM allocation.
-If a groupshared pointer uses in-bound GEP instruction, it should not OOB. The behavior for an OOB access for in-bound pointer is undefined.
-For groupshared pointer from regular GEP, OOB will has same behavior as DXBC. Loads return 0 for OOB accesses; OOB stores are silently dropped.
+Indexable thread-local accesses are done via LLVM pointers and have C-like OOB semantics.
+Groupshared accesses are done via LLVM pointers too. The origin of a groupshared pointer must be a single TGSM allocation.
+If a groupshared pointer uses an in-bound GEP instruction, it should not OOB. The behavior for an OOB access for in-bound pointer is undefined.
+For a groupshared pointer from regular GEP, OOB will have the same behavior as DXBC. Loads return 0 for OOB accesses; OOB stores are silently dropped.
 
 Resource accesses keeps the same out-of-bounds behavior as DXBC. Loads return 0 for OOB accesses; OOB stores are silently dropped.
 
@@ -3294,9 +3294,9 @@ Modules and Linking
 ===================
 
 HLSL has linking capabilities to enable third-party libraries. The linking step happens before shader DXIL is given to the driver compilers.
-Experimental library generation is added in DXIL1.1. A library could be created by compile with lib_6_1 profile.
-A library is a dxil container like the compile result of other shader profiles. The difference is library will keep information for linking like resource link info and entry function signatures.
-Library support is not part of DXIL spec. Only requirement is linked shader must be valid DXIL.
+Experimental library generation is added in DXIL1.1. A library could be created by compiling with the lib_6_1 profile.
+A library is a dxil container like the compile result of other shader profiles. The difference is a library will keep information for linking like resource link info and entry function signatures.
+Library support is not part of the DXIL spec. The only requirement is that the linked shader must be valid DXIL.
 
 
 Additional Notes

From b7b532b145b7d40a2b4e44104f60040a97f5a13b Mon Sep 17 00:00:00 2001
From: Cassandra Beckley <cbeckley@google.com>
Date: Thu, 27 Mar 2025 11:58:44 -0700
Subject: [PATCH 53/88] [SPIR-V] Update submodules (#7269)

spirv-val has added a validation that enforces a minimum version of
SPIR-V 1.3 when using VulkanMemoryModel, so I've updated the tests that
use it to use the correct target environment.

Needed for #7266.
---
 external/SPIRV-Headers                                          | 2 +-
 external/SPIRV-Tools                                            | 2 +-
 tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl          | 2 +-
 .../test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl    | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/external/SPIRV-Headers b/external/SPIRV-Headers
index 54a521dd13..0e71067798 160000
--- a/external/SPIRV-Headers
+++ b/external/SPIRV-Headers
@@ -1 +1 @@
-Subproject commit 54a521dd130ae1b2f38fef79b09515702d135bdd
+Subproject commit 0e710677989b4326ac974fd80c5308191ed80965
diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools
index ada1771a9f..393d5c7df1 160000
--- a/external/SPIRV-Tools
+++ b/external/SPIRV-Tools
@@ -1 +1 @@
-Subproject commit ada1771a9f7a125573aa94fe551fdc44b45769bd
+Subproject commit 393d5c7df150532045c50affffea2df22e8231b0
diff --git a/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl b/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl
index a8578f7377..5815981057 100644
--- a/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl
@@ -1,5 +1,5 @@
 // RUN: %dxc -T ps_6_0 -E main -fcgl  %s -spirv | FileCheck %s -check-prefix=GLSL450
-// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model %s -spirv | FileCheck %s -check-prefix=VULKAN
+// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model -fspv-target-env=vulkan1.1 %s -spirv | FileCheck %s -check-prefix=VULKAN
 
 // When the GLSL450 memory model is used, there should be no memory operands on the loads and stores.
 // When the Vulkan memory model is used, there should be no decorations. There should be memory operands on the loads and stores instead.
diff --git a/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl b/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl
index e9a1813f31..a0b2ab7207 100644
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl
@@ -1,5 +1,5 @@
 // RUN: %dxc -T ps_6_0 -E main -fcgl  %s -spirv | FileCheck %s -check-prefix=CHECK -check-prefix=GLSL450
-// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model %s -spirv | FileCheck %s -check-prefix=CHECK -check-prefix=VULKAN
+// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model -fspv-target-env=vulkan1.1 %s -spirv | FileCheck %s -check-prefix=CHECK -check-prefix=VULKAN
 
 RWTexture1D <int>   g_tTex1di1;
 RWTexture1D <uint>  g_tTex1du1;

From eb169591adbc1403f09fb769d5c8f98e929e6f62 Mon Sep 17 00:00:00 2001
From: raoanag <127366241+raoanag@users.noreply.github.com>
Date: Thu, 27 Mar 2025 12:07:17 -0700
Subject: [PATCH 54/88] Update print statements to be compatible with Python 3
 (#7268)

Update the print statements to be compatible with Python 3
---
 tools/clang/utils/check_cfc/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/clang/utils/check_cfc/setup.py b/tools/clang/utils/check_cfc/setup.py
index b5fc473639..7405513f0a 100644
--- a/tools/clang/utils/check_cfc/setup.py
+++ b/tools/clang/utils/check_cfc/setup.py
@@ -8,10 +8,10 @@
     import platform
     import sys
     if platform.system() == 'Windows':
-        print "Could not find py2exe. Please install then run setup.py py2exe."
+        print("Could not find py2exe. Please install then run setup.py py2exe.")
         raise
     else:
-        print "setup.py only required on Windows."
+        print("setup.py only required on Windows.")
         sys.exit(1)
 
 setup(

From 5ff9cbc7cb83ab2d5f52255412f61ac3226c4a08 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista@microsoft.com>
Date: Thu, 27 Mar 2025 16:40:51 -0700
Subject: [PATCH 55/88] [Sema] Add and test new Subobject Attribute (#7258)

This PR adds and tests a new subobject attribute. It will be useful for
checking if a given decl is a subobject decl. This functionality will be
used in https://github.com/microsoft/DirectXShaderCompiler/pull/7239
We need an attribute in order to determine whether to check its
initializer for availability attributes or not.

Fixes https://github.com/microsoft/DirectXShaderCompiler/issues/7257
---
 tools/clang/include/clang/Basic/Attr.td       |   7 +
 tools/clang/lib/AST/HlslTypes.cpp             |  64 ++-------
 tools/clang/lib/Sema/SemaHLSL.cpp             |  39 +++--
 .../test/SemaHLSL/subobjects-ast-dump.hlsl    | 136 ++++++++++++++++++
 4 files changed, 181 insertions(+), 65 deletions(-)
 create mode 100644 tools/clang/test/SemaHLSL/subobjects-ast-dump.hlsl

diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td
index 48193f7077..7a009aa7e1 100644
--- a/tools/clang/include/clang/Basic/Attr.td
+++ b/tools/clang/include/clang/Basic/Attr.td
@@ -1157,6 +1157,13 @@ def HLSLRayQueryObject : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def HLSLSubObject : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+  let Args = [UnsignedArgument<"SubObjKindUint">, UnsignedArgument<"HitGroupType">];
+}
+
 // HLSL HitObject Attribute
 
 def HLSLHitObject : InheritableAttr {
diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp
index 8f9460ce63..eaf8273413 100644
--- a/tools/clang/lib/AST/HlslTypes.cpp
+++ b/tools/clang/lib/AST/HlslTypes.cpp
@@ -684,64 +684,20 @@ bool DoesTypeDefineOverloadedOperator(clang::QualType typeWithOperator,
 bool GetHLSLSubobjectKind(clang::QualType type,
                           DXIL::SubobjectKind &subobjectKind,
                           DXIL::HitGroupType &hgType) {
-  hgType = (DXIL::HitGroupType)(-1);
   type = type.getCanonicalType();
   if (const RecordType *RT = type->getAs<RecordType>()) {
-    StringRef name = RT->getDecl()->getName();
-    switch (name.size()) {
-    case 17:
-      return name == "StateObjectConfig"
-                 ? (subobjectKind = DXIL::SubobjectKind::StateObjectConfig,
-                    true)
-                 : false;
-    case 18:
-      return name == "LocalRootSignature"
-                 ? (subobjectKind = DXIL::SubobjectKind::LocalRootSignature,
-                    true)
-                 : false;
-    case 19:
-      return name == "GlobalRootSignature"
-                 ? (subobjectKind = DXIL::SubobjectKind::GlobalRootSignature,
-                    true)
-                 : false;
-    case 29:
-      return name == "SubobjectToExportsAssociation"
-                 ? (subobjectKind =
-                        DXIL::SubobjectKind::SubobjectToExportsAssociation,
-                    true)
-                 : false;
-    case 22:
-      return name == "RaytracingShaderConfig"
-                 ? (subobjectKind = DXIL::SubobjectKind::RaytracingShaderConfig,
-                    true)
-                 : false;
-    case 24:
-      return name == "RaytracingPipelineConfig"
-                 ? (subobjectKind =
-                        DXIL::SubobjectKind::RaytracingPipelineConfig,
-                    true)
-                 : false;
-    case 25:
-      return name == "RaytracingPipelineConfig1"
-                 ? (subobjectKind =
-                        DXIL::SubobjectKind::RaytracingPipelineConfig1,
-                    true)
-                 : false;
-    case 16:
-      if (name == "TriangleHitGroup") {
-        subobjectKind = DXIL::SubobjectKind::HitGroup;
-        hgType = DXIL::HitGroupType::Triangle;
-        return true;
-      }
-      return false;
-    case 27:
-      if (name == "ProceduralPrimitiveHitGroup") {
-        subobjectKind = DXIL::SubobjectKind::HitGroup;
-        hgType = DXIL::HitGroupType::ProceduralPrimitive;
-        return true;
-      }
+    RecordDecl *RD = RT->getDecl();
+    if (!RD->hasAttr<HLSLSubObjectAttr>()) {
       return false;
     }
+
+    HLSLSubObjectAttr *Attr = RD->getAttr<HLSLSubObjectAttr>();
+    subobjectKind = static_cast<DXIL::SubobjectKind>(Attr->getSubObjKindUint());
+    hgType = static_cast<DXIL::HitGroupType>(Attr->getHitGroupType());
+    if (subobjectKind == DXIL::SubobjectKind::HitGroup)
+      DXASSERT(DXIL::IsValidHitGroupType(hgType), "invalid hit group type");
+
+    return true;
   }
   return false;
 }
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 243471bc55..d20daa0ac0 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -2785,13 +2785,17 @@ AddBuiltInTriangleIntersectionAttributes(ASTContext &context,
 //
 // Subobjects
 
-static CXXRecordDecl *StartSubobjectDecl(ASTContext &context,
-                                         const char *name) {
+static CXXRecordDecl *
+StartSubobjectDecl(ASTContext &context, const char *name,
+                   DXIL::SubobjectKind Kind,
+                   DXIL::HitGroupType HGT = DXIL::HitGroupType::LastEntry) {
   IdentifierInfo &id =
       context.Idents.get(StringRef(name), tok::TokenKind::identifier);
   CXXRecordDecl *decl = CXXRecordDecl::Create(
       context, TagTypeKind::TTK_Struct, context.getTranslationUnitDecl(), NoLoc,
       NoLoc, &id, nullptr, DelayTypeCreationTrue);
+  decl->addAttr(HLSLSubObjectAttr::CreateImplicit(
+      context, static_cast<unsigned>(Kind), static_cast<unsigned>(HGT)));
   decl->addAttr(FinalAttr::CreateImplicit(context, FinalAttr::Keyword_final));
   decl->startDefinition();
   return decl;
@@ -2808,7 +2812,8 @@ void FinishSubobjectDecl(ASTContext &context, CXXRecordDecl *decl) {
 //   uint32_t Flags;
 // };
 static CXXRecordDecl *CreateSubobjectStateObjectConfig(ASTContext &context) {
-  CXXRecordDecl *decl = StartSubobjectDecl(context, "StateObjectConfig");
+  CXXRecordDecl *decl = StartSubobjectDecl(
+      context, "StateObjectConfig", DXIL::SubobjectKind::StateObjectConfig);
   CreateSimpleField(context, decl, "Flags", context.UnsignedIntTy,
                     AccessSpecifier::AS_private);
   FinishSubobjectDecl(context, decl);
@@ -2822,7 +2827,10 @@ static CXXRecordDecl *CreateSubobjectStateObjectConfig(ASTContext &context) {
 static CXXRecordDecl *CreateSubobjectRootSignature(ASTContext &context,
                                                    bool global) {
   CXXRecordDecl *decl = StartSubobjectDecl(
-      context, global ? "GlobalRootSignature" : "LocalRootSignature");
+      context, global ? "GlobalRootSignature" : "LocalRootSignature",
+      global ? DXIL::SubobjectKind::GlobalRootSignature
+             : DXIL::SubobjectKind::LocalRootSignature);
+
   CreateSimpleField(context, decl, "Data", context.HLSLStringTy,
                     AccessSpecifier::AS_private);
   FinishSubobjectDecl(context, decl);
@@ -2837,7 +2845,8 @@ static CXXRecordDecl *CreateSubobjectRootSignature(ASTContext &context,
 static CXXRecordDecl *
 CreateSubobjectSubobjectToExportsAssoc(ASTContext &context) {
   CXXRecordDecl *decl =
-      StartSubobjectDecl(context, "SubobjectToExportsAssociation");
+      StartSubobjectDecl(context, "SubobjectToExportsAssociation",
+                         DXIL::SubobjectKind::SubobjectToExportsAssociation);
   CreateSimpleField(context, decl, "Subobject", context.HLSLStringTy,
                     AccessSpecifier::AS_private);
   CreateSimpleField(context, decl, "Exports", context.HLSLStringTy,
@@ -2853,7 +2862,9 @@ CreateSubobjectSubobjectToExportsAssoc(ASTContext &context) {
 // };
 static CXXRecordDecl *
 CreateSubobjectRaytracingShaderConfig(ASTContext &context) {
-  CXXRecordDecl *decl = StartSubobjectDecl(context, "RaytracingShaderConfig");
+  CXXRecordDecl *decl =
+      StartSubobjectDecl(context, "RaytracingShaderConfig",
+                         DXIL::SubobjectKind::RaytracingShaderConfig);
   CreateSimpleField(context, decl, "MaxPayloadSizeInBytes",
                     context.UnsignedIntTy, AccessSpecifier::AS_private);
   CreateSimpleField(context, decl, "MaxAttributeSizeInBytes",
@@ -2868,7 +2879,9 @@ CreateSubobjectRaytracingShaderConfig(ASTContext &context) {
 // };
 static CXXRecordDecl *
 CreateSubobjectRaytracingPipelineConfig(ASTContext &context) {
-  CXXRecordDecl *decl = StartSubobjectDecl(context, "RaytracingPipelineConfig");
+  CXXRecordDecl *decl =
+      StartSubobjectDecl(context, "RaytracingPipelineConfig",
+                         DXIL::SubobjectKind::RaytracingPipelineConfig);
   CreateSimpleField(context, decl, "MaxTraceRecursionDepth",
                     context.UnsignedIntTy, AccessSpecifier::AS_private);
   FinishSubobjectDecl(context, decl);
@@ -2883,7 +2896,8 @@ CreateSubobjectRaytracingPipelineConfig(ASTContext &context) {
 static CXXRecordDecl *
 CreateSubobjectRaytracingPipelineConfig1(ASTContext &context) {
   CXXRecordDecl *decl =
-      StartSubobjectDecl(context, "RaytracingPipelineConfig1");
+      StartSubobjectDecl(context, "RaytracingPipelineConfig1",
+                         DXIL::SubobjectKind::RaytracingPipelineConfig1);
   CreateSimpleField(context, decl, "MaxTraceRecursionDepth",
                     context.UnsignedIntTy, AccessSpecifier::AS_private);
   CreateSimpleField(context, decl, "Flags", context.UnsignedIntTy,
@@ -2898,7 +2912,9 @@ CreateSubobjectRaytracingPipelineConfig1(ASTContext &context) {
 //   string ClosestHit;
 // };
 static CXXRecordDecl *CreateSubobjectTriangleHitGroup(ASTContext &context) {
-  CXXRecordDecl *decl = StartSubobjectDecl(context, "TriangleHitGroup");
+  CXXRecordDecl *decl = StartSubobjectDecl(context, "TriangleHitGroup",
+                                           DXIL::SubobjectKind::HitGroup,
+                                           DXIL::HitGroupType::Triangle);
   CreateSimpleField(context, decl, "AnyHit", context.HLSLStringTy,
                     AccessSpecifier::AS_private);
   CreateSimpleField(context, decl, "ClosestHit", context.HLSLStringTy,
@@ -2915,8 +2931,9 @@ static CXXRecordDecl *CreateSubobjectTriangleHitGroup(ASTContext &context) {
 // };
 static CXXRecordDecl *
 CreateSubobjectProceduralPrimitiveHitGroup(ASTContext &context) {
-  CXXRecordDecl *decl =
-      StartSubobjectDecl(context, "ProceduralPrimitiveHitGroup");
+  CXXRecordDecl *decl = StartSubobjectDecl(
+      context, "ProceduralPrimitiveHitGroup", DXIL::SubobjectKind::HitGroup,
+      DXIL::HitGroupType::ProceduralPrimitive);
   CreateSimpleField(context, decl, "AnyHit", context.HLSLStringTy,
                     AccessSpecifier::AS_private);
   CreateSimpleField(context, decl, "ClosestHit", context.HLSLStringTy,
diff --git a/tools/clang/test/SemaHLSL/subobjects-ast-dump.hlsl b/tools/clang/test/SemaHLSL/subobjects-ast-dump.hlsl
new file mode 100644
index 0000000000..6133847fb8
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/subobjects-ast-dump.hlsl
@@ -0,0 +1,136 @@
+// RUN: %dxc -T lib_6_9  -ast-dump-implicit %s | FileCheck -check-prefix=ASTIMPL %s
+// RUN: %dxc -T lib_6_9  -ast-dump %s | FileCheck -check-prefix=AST %s
+// The HLSL source is just a copy of 
+// tools\clang\test\HLSLFileCheck\shader_targets\raytracing\subobjects_raytracingPipelineConfig1.hlsl
+
+// This test tests that the HLSLSubObjectAttr attribute is present on all
+// HLSL subobjects, and tests the ast representation of subobjects
+
+// ASTIMPL: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct StateObjectConfig definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 0 2
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit Flags 'unsigned int'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct GlobalRootSignature definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 1 2
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit Data 'string'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct LocalRootSignature definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 2 2
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit Data 'string'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct SubobjectToExportsAssociation definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 8 2
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit Subobject 'string'
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit Exports 'string'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct RaytracingShaderConfig definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 9 2
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit MaxPayloadSizeInBytes 'unsigned int'
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit MaxAttributeSizeInBytes 'unsigned int'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit struct RaytracingPipelineConfig definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 10 2
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit MaxTraceRecursionDepth 'unsigned int'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct TriangleHitGroup definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 11 0
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit AnyHit 'string'
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit ClosestHit 'string'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct ProceduralPrimitiveHitGroup definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 11 1
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit AnyHit 'string'
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit ClosestHit 'string'
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit Intersection 'string'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct RaytracingPipelineConfig1 definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 12 2
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit MaxTraceRecursionDepth 'unsigned int'
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit Flags 'unsigned int'
+
+// AST: VarDecl 0x{{.+}} grs 'GlobalRootSignature' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'GlobalRootSignature'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "CBV(b0)"
+// AST-NEXT: VarDecl 0x{{.+}} soc 'StateObjectConfig' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'StateObjectConfig'
+// AST-NEXT: BinaryOperator 0x{{.+}} 'unsigned int' '|'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <LValueToRValue>
+// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'STATE_OBJECT_FLAGS_ALLOW_LOCAL_DEPENDENCIES_ON_EXTERNAL_DEFINITONS' 'const unsigned int'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <LValueToRValue>
+// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS' 'const unsigned int'
+// AST-NEXT: VarDecl 0x{{.+}} lrs 'LocalRootSignature' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'LocalRootSignature'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "UAV(u0, visibility = SHADER_VISIBILITY_GEOMETRY), RootFlags(LOCAL_ROOT_SIGNATURE)"
+// AST-NEXT: VarDecl 0x{{.+}} sea 'SubobjectToExportsAssociation' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "grs"
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "a;b;foo;c"
+// AST-NEXT: VarDecl 0x{{.+}} sea2 'SubobjectToExportsAssociation' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "grs"
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue ";"
+// AST-NEXT: VarDecl 0x{{.+}} sea3 'SubobjectToExportsAssociation' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "grs"
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue ""
+// AST-NEXT: VarDecl 0x{{.+}} rsc 'RaytracingShaderConfig' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingShaderConfig'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <IntegralCast>
+// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 128
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <IntegralCast>
+// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 64
+// AST-NEXT: VarDecl 0x{{.+}} rpc 'RaytracingPipelineConfig1' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingPipelineConfig1'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <IntegralCast>
+// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 32
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <LValueToRValue>
+// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'RAYTRACING_PIPELINE_FLAG_SKIP_TRIANGLES' 'const unsigned int'
+// AST-NEXT: VarDecl 0x{{.+}} sea4 'SubobjectToExportsAssociation' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "rpc"
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue ";"
+// AST-NEXT: VarDecl 0x{{.+}} rpc2 'RaytracingPipelineConfig1' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingPipelineConfig1'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <IntegralCast>
+// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 32
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <LValueToRValue>
+// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'RAYTRACING_PIPELINE_FLAG_NONE' 'const unsigned int'
+// AST-NEXT: VarDecl 0x{{.+}} trHitGt 'TriangleHitGroup' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'TriangleHitGroup'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "a"
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "b"
+// AST-NEXT: VarDecl 0x{{.+}} ppHitGt 'ProceduralPrimitiveHitGroup' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'ProceduralPrimitiveHitGroup'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "a"
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "b"
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "c"
+
+GlobalRootSignature grs = {"CBV(b0)"};	
+StateObjectConfig soc = { STATE_OBJECT_FLAGS_ALLOW_LOCAL_DEPENDENCIES_ON_EXTERNAL_DEFINITONS | STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS };
+LocalRootSignature lrs = {"UAV(u0, visibility = SHADER_VISIBILITY_GEOMETRY), RootFlags(LOCAL_ROOT_SIGNATURE)"};
+SubobjectToExportsAssociation sea = { "grs", "a;b;foo;c" };
+// Empty association is well-defined: it creates a default association
+SubobjectToExportsAssociation sea2 = { "grs", ";" };
+SubobjectToExportsAssociation sea3 = { "grs", "" };
+RaytracingShaderConfig rsc = { 128, 64 };
+RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_SKIP_TRIANGLES };
+SubobjectToExportsAssociation sea4 = {"rpc", ";"};
+RaytracingPipelineConfig1 rpc2 = {32, RAYTRACING_PIPELINE_FLAG_NONE };
+TriangleHitGroup trHitGt = {"a", "b"};
+ProceduralPrimitiveHitGroup ppHitGt = { "a", "b", "c"};

From 206b77577d15fc5798eb7ad52290388539b7146d Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista@microsoft.com>
Date: Fri, 28 Mar 2025 15:37:35 -0700
Subject: [PATCH 56/88] [OMM] Add D3D Flag
 RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS, and run d3dreflect tests
 (#7239)

This PR adds a new flag,
RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS, according to the spec.
It can be used with `RaytracingPipelineConfig1` subobjects. We expect
this new flag to be represented in the output.
Additionally, d3dreflect tests are run to ensure that when a rayquery
object is using the new OMM enablement flags, that
the minimum shader model target is 6.9.

Fixes https://github.com/microsoft/DirectXShaderCompiler/issues/7190
---
 tools/clang/lib/AST/ASTContextHLSL.cpp        |  4 +++
 tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp   | 32 +++++++++++++++----
 .../d3dreflect/raytracingpipelineconfig1.hlsl | 19 +++++++++++
 .../raytracingpipelineconfig1-warnings.hlsl   |  6 ++++
 .../tools/dxcompiler/dxcdisassembler.cpp      |  2 ++
 5 files changed, 56 insertions(+), 7 deletions(-)
 create mode 100644 tools/clang/test/HLSLFileCheck/d3dreflect/raytracingpipelineconfig1.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/raytracingpipelineconfig1-warnings.hlsl

diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp
index dcd3e89e9a..3748f8f8f8 100644
--- a/tools/clang/lib/AST/ASTContextHLSL.cpp
+++ b/tools/clang/lib/AST/ASTContextHLSL.cpp
@@ -699,6 +699,10 @@ void hlsl::AddRaytracingConstants(ASTContext &context) {
   AddConstUInt(
       context, StringRef("RAYTRACING_PIPELINE_FLAG_SKIP_PROCEDURAL_PRIMITIVES"),
       (unsigned)DXIL::RaytracingPipelineFlags::SkipProceduralPrimitives);
+  AddConstUInt(context, context.getTranslationUnitDecl(),
+               StringRef("RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS"),
+               (unsigned)DXIL::RaytracingPipelineFlags::AllowOpacityMicromaps,
+               ConstructAvailabilityAttribute(context, VT69));
 }
 
 /// <summary> Adds all constants and enums for sampler feedback </summary>
diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
index ed727af149..c562ee8d52 100644
--- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
+++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
@@ -301,7 +301,8 @@ std::vector<FunctionDecl *> GetAllExportedFDecls(clang::Sema *self) {
 }
 
 void GatherGlobalsWithInitializers(
-    DeclContext *DC, llvm::SmallVectorImpl<VarDecl *> &GlobalsWithInit) {
+    DeclContext *DC, llvm::SmallVectorImpl<VarDecl *> &GlobalsWithInit,
+    llvm::SmallVectorImpl<VarDecl *> &SubObjects) {
   for (auto *D : DC->decls()) {
     // Skip built-ins and function decls.
     if (D->isImplicit() || isa<FunctionDecl>(D))
@@ -310,11 +311,19 @@ void GatherGlobalsWithInitializers(
       // Add if user-defined static or groupshared global with initializer.
       if (VD->hasInit() && VD->hasGlobalStorage() &&
           (VD->getStorageClass() == SC_Static ||
-           VD->hasAttr<HLSLGroupSharedAttr>()))
+           VD->hasAttr<HLSLGroupSharedAttr>())) {
+        // Place subobjects in a separate collection.
+        if (const RecordType *RT = VD->getType()->getAs<RecordType>()) {
+          if (RT->getDecl()->hasAttr<HLSLSubObjectAttr>()) {
+            SubObjects.push_back(VD);
+            continue;
+          }
+        }
         GlobalsWithInit.push_back(VD);
+      }
     } else if (auto *DC = dyn_cast<DeclContext>(D)) {
       // Recurse into DeclContexts like namespace, cbuffer, class/struct, etc.
-      GatherGlobalsWithInitializers(DC, GlobalsWithInit);
+      GatherGlobalsWithInitializers(DC, GlobalsWithInit, SubObjects);
     }
   }
 }
@@ -592,14 +601,24 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
       hlsl::ShaderModel::GetByName(self->getLangOpts().HLSLProfile.c_str());
 
   llvm::SmallVector<VarDecl *, 16> GlobalsWithInit;
-  GatherGlobalsWithInitializers(self->getASTContext().getTranslationUnitDecl(),
-                                GlobalsWithInit);
-
+  llvm::SmallVector<VarDecl *, 16> SubObjects;
   std::set<FunctionDecl *> DiagnosedRecursiveDecls;
   llvm::SmallPtrSet<CallExpr *, 16> DiagnosedCalls;
   llvm::SmallPtrSet<DeclRefExpr *, 16> DeclAvailabilityChecked;
   llvm::SmallSet<SourceLocation, 16> DiagnosedTypeLocs;
 
+  GatherGlobalsWithInitializers(self->getASTContext().getTranslationUnitDecl(),
+                                GlobalsWithInit, SubObjects);
+
+  if (shaderModel->GetKind() == DXIL::ShaderKind::Library) {
+    DXIL::NodeLaunchType NodeLaunchTy = DXIL::NodeLaunchType::Invalid;
+    HLSLReachableDiagnoseVisitor Visitor(
+        self, shaderModel, shaderModel->GetKind(), NodeLaunchTy, nullptr,
+        DiagnosedCalls, DeclAvailabilityChecked, DiagnosedTypeLocs);
+    for (VarDecl *VD : SubObjects)
+      Visitor.TraverseDecl(VD);
+  }
+
   // for each FDecl, check for recursion
   for (FunctionDecl *FDecl : FDeclsToCheck) {
     CallGraphWithRecurseGuard callGraph;
@@ -705,7 +724,6 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
             << PatchConstantFunctionReturnIdx;
       }
     }
-
     DXIL::ShaderKind EntrySK = shaderModel->GetKind();
     DXIL::NodeLaunchType NodeLaunchTy = DXIL::NodeLaunchType::Invalid;
     if (EntrySK == DXIL::ShaderKind::Library) {
diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/raytracingpipelineconfig1.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/raytracingpipelineconfig1.hlsl
new file mode 100644
index 0000000000..44424f5d14
--- /dev/null
+++ b/tools/clang/test/HLSLFileCheck/d3dreflect/raytracingpipelineconfig1.hlsl
@@ -0,0 +1,19 @@
+// RUN: %dxilver 1.9 | %dxc -T lib_6_9  %s | FileCheck %s
+// RUN: %dxilver 1.9 | %dxc -T lib_6_9 -ast-dump %s | FileCheck -check-prefix=AST %s
+// RUN: %dxilver 1.9 | %dxc -T lib_6_9 -ast-dump-implicit %s | FileCheck -check-prefix=ASTIMPL %s
+
+
+// CHECK: ; RaytracingPipelineConfig1 rpc = { MaxTraceRecursionDepth = 32, Flags = RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS };
+
+// AST: TranslationUnitDecl 0x{{.+}} <<invalid sloc>> <invalid sloc>
+// AST-NEXT: VarDecl 0x{{.+}} rpc 'RaytracingPipelineConfig1' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingPipelineConfig1'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <IntegralCast>
+// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 32
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <LValueToRValue>
+// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS' 'const unsigned int'
+// ASTIMPL: VarDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS 'const unsigned int' static cinit
+// ASTIMPL-NEXT: IntegerLiteral 0x{{.+}} <<invalid sloc>> 'const unsigned int' 1024
+// ASTIMPL-NEXT: AvailabilityAttr 0x{{.+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS };
diff --git a/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-warnings.hlsl b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-warnings.hlsl
new file mode 100644
index 0000000000..c220f5734d
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-warnings.hlsl
@@ -0,0 +1,6 @@
+// RUN: %dxc -T lib_6_8 -verify %s
+
+// expected-warning@+1{{potential misuse of built-in constant 'RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_8; introduced in shader model 6.9}} 
+RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS };
+
+
diff --git a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp
index 01f4973fbe..3af305d52a 100644
--- a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp
+++ b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp
@@ -671,6 +671,8 @@ static const char *FlagToString(DXIL::RaytracingPipelineFlags Flag) {
     return "RAYTRACING_PIPELINE_FLAG_SKIP_TRIANGLES";
   case DXIL::RaytracingPipelineFlags::SkipProceduralPrimitives:
     return "RAYTRACING_PIPELINE_FLAG_SKIP_PROCEDURAL_PRIMITIVES";
+  case DXIL::RaytracingPipelineFlags::AllowOpacityMicromaps:
+    return "RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS";
   }
   return "<invalid RaytracingPipelineFlags>";
 }

From 3035d316c35289b68e8fc9d8cf21d86a204fb0e2 Mon Sep 17 00:00:00 2001
From: Chris B <cbieneman@microsoft.com>
Date: Tue, 1 Apr 2025 12:10:06 -0500
Subject: [PATCH 57/88] Require CMake 3.17, remove CMP0051 (#7287)

Hopefully this works and gets us able to build with CMake 4+.
---
 CMakeLists.txt             | 9 ---------
 tools/clang/CMakeLists.txt | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 74244c1d58..0977fa1246 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,15 +17,6 @@ if(POLICY CMP0022)
   cmake_policy(SET CMP0022 NEW) # automatic when 2.8.12 is required
 endif()
 
-if (POLICY CMP0051)
-  # CMake 3.1 and higher include generator expressions of the form
-  # $<TARGETLIB:obj> in the SOURCES property.  These need to be
-  # stripped everywhere that access the SOURCES property, so we just
-  # defer to the OLD behavior of not including generator expressions
-  # in the output for now.
-  cmake_policy(SET CMP0051 OLD)
-endif()
-
 if(CMAKE_VERSION VERSION_LESS 3.1.20141117)
   set(cmake_3_2_USES_TERMINAL)
 else()
diff --git a/tools/clang/CMakeLists.txt b/tools/clang/CMakeLists.txt
index 71190336ca..449e6c28b4 100644
--- a/tools/clang/CMakeLists.txt
+++ b/tools/clang/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 2.8.8)
+cmake_minimum_required(VERSION 3.17.2) # HLSL Change - Require CMake 3.17.2.
 
 # FIXME: It may be removed when we use 2.8.12.
 if(CMAKE_VERSION VERSION_LESS 2.8.12)

From 30bfd82296a04f8302c949d79387b06fc37a31c6 Mon Sep 17 00:00:00 2001
From: Tex Riddell <texr@microsoft.com>
Date: Tue, 1 Apr 2025 11:56:18 -0700
Subject: [PATCH 58/88] NFC: Infrastructure changes for DXIL op vector and
 multi-dim overloads (#7259)

This change adds vector and multi-dimensional overload support for DXIL
operations.

Multi-dimensional (or "extended") overloads are added, where two or more
types in a DXIL Op function signature may vary independently, such as
both the return type and a parameter type. Until now, only one overload
dimension has been necessary.

For single-dim overloads, any number of parameters in a DXIL op may
refer to this single overload type.
For multi-dim overloads, each type that can vary must have a unique
overload dimension, even when two or more types must be the same. This
follows a pattern from llvm intrinsics. If two or more of the types need
to be the same, this constraint must be handled manually, outside the
automatic overload constraints defined by the DXIL op definitions.

Vector overloads are also added, requiring an additional set of scalar
overload types to define the allowed vector element types, on top of the
original set describing the allowed scalar overloads for an operation,
since both scalar and vector overloads may be allowed on the same
operation.

There are several components involved in handling DXIL operation
overloads, with some changes:

- DXIL Op definitions in `hctdb.py` use a string of characters to define
the allowed overloads, and special type names used in parameter
definitions that refer to the overload type.
  - Overload string syntax updated and more heavily validated.
  - `','` may separate dimensions for multi-dim overloads
- `'<'` indicates that a vector overload is allowed, in which case,
scalar components on the left indicate normal scalar overloads allowed,
and scalar components on the right indicate the allowed vector element
overloads.
- If scalar overloads are present to the left, and omitted to the right,
the scalar components are replicated to the right automatically. For
instance: `"hf<"` is equivalent to `"hf<hf"`.
- `dxil_max_overload_dims = 2` is introduced to define the maximum
number of overload dimensions currently supported.
- This is used to generate the `DXIL::kDxilMaxOloadDims` definition in
`DxilConstants.h`.
- `"$x0"` and `"$x1"` are used to reference each overloaded dxil type in
parameter definitions when more than one overload dimension is defined
for a DXIL op. Other special overload types are not allowed for
multi-dim overloads, which means you cannot (currently) describe a
multi-dim overload where a returned overload type is wrapped in a
resource return struct along with residency status. This could be
changed in the future if necessary.
- Enforced rules for multi-dim overloads keep them compatible with the
llvm intrinsic overloading scheme.
- `hctdb_instrhelp.py` translates overload and param type info from DXIL
operation definitions into code inserted into `DxilOperations.cpp`.
- `DxilOperations.h|cpp` encodes allowed overloads inside
`OpCodeProperty` state for each operation in the `m_OpCodeProps` table.
It uses this information, along with generated code, to enforce overload
rules on DXIL ops.
- The allowed overload definition in `OpCodeProperty` has been rewritten
to use a more compact `OverloadMask` type, support multi-dim overloads,
and add a second layer `AllowedVectorElements` for vector overloads for
each dimension.
- There are assumptions that one `llvm::Type*` describes the overload
type, such as with: `GetOpFunc`, `GetOpFuncList`, `GetOverloadType`,
`IsOverloadLegal`, and `m_OpCodeClassCache`. The scheme used for
multi-dim overloads is to encode each of the overload types in a single
unnamed `StructType`, like `type {i32, <4 x float>}`. This makes it
compatible with all these existing mechanisms without requiring an API
overhaul impacting the broader code base. `GetExtendedOverloadType` is
used to construct this type from multiple types.

While updating `DxilOperations.h|cpp`, I noticed and removed some unused
methods: `IsDxilOpTypeName`, `IsDxilOpType`, `IsDupDxilOpType`,
`GetOriginalDxilOpType`.

---------

Co-authored-by: Greg Roth <grroth@microsoft.com>
---
 include/dxc/DXIL/DxilConstants.h      |    5 +
 include/dxc/DXIL/DxilOperations.h     |  103 +-
 lib/DXIL/DxilOperations.cpp           | 5884 ++++++++++++-------------
 lib/DxilValidation/DxilValidation.cpp |   17 +-
 utils/hct/hctdb.py                    |  170 +-
 utils/hct/hctdb_instrhelp.py          |  142 +-
 6 files changed, 3162 insertions(+), 3159 deletions(-)

diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
index 0a9c6a4ffd..447728300b 100644
--- a/include/dxc/DXIL/DxilConstants.h
+++ b/include/dxc/DXIL/DxilConstants.h
@@ -155,6 +155,11 @@ const float kMinMipLodBias = -16.0f;
 
 const unsigned kResRetStatusIndex = 4;
 
+/* <py::lines('OLOAD_DIMS-TEXT')>hctdb_instrhelp.get_max_oload_dims()</py>*/
+// OLOAD_DIMS-TEXT:BEGIN
+const unsigned kDxilMaxOloadDims = 2;
+// OLOAD_DIMS-TEXT:END
+
 enum class ComponentType : uint32_t {
   Invalid = 0,
   I1,
diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h
index e522e06204..05021ce789 100644
--- a/include/dxc/DXIL/DxilOperations.h
+++ b/include/dxc/DXIL/DxilOperations.h
@@ -57,12 +57,31 @@ class OP {
   // caches.
   void RefreshCache();
 
+  // The single llvm::Type * "OverloadType" has one of these forms:
+  // No overloads (NumOverloadDims == 0):
+  //  - TS_Void: VoidTy
+  // For single overload dimension (NumOverloadDims == 1):
+  //  - TS_F*, TS_I*: a scalar numeric type (half, float, i1, i64, etc.),
+  //  - TS_UDT: a pointer to a StructType representing a User Defined Type,
+  //  - TS_Object: a named StructType representing a built-in object, or
+  //  - TS_Vector: a vector type (<4 x float>, <16 x i16>, etc.)
+  // For multiple overload dimensions (TS_Extended, NumOverloadDims > 1):
+  //  - an unnamed StructType containing each type for the corresponding
+  //    dimension, such as: type { i32, <2 x float> }
+  //  - contained type options are the same as for single dimension.
+
   llvm::Function *GetOpFunc(OpCode OpCode, llvm::Type *pOverloadType);
+
+  // N-dimension convenience version of GetOpFunc:
+  llvm::Function *GetOpFunc(OpCode OpCode,
+                            llvm::ArrayRef<llvm::Type *> OverloadTypes);
+
   const llvm::SmallMapVector<llvm::Type *, llvm::Function *, 8> &
   GetOpFuncList(OpCode OpCode) const;
   bool IsDxilOpUsed(OpCode opcode) const;
   void RemoveFunction(llvm::Function *F);
   llvm::LLVMContext &GetCtx() { return m_Ctx; }
+  llvm::Module *GetModule() { return m_pModule; }
   llvm::Type *GetHandleType() const;
   llvm::Type *GetHitObjectType() const;
   llvm::Type *GetNodeHandleType() const;
@@ -81,9 +100,14 @@ class OP {
 
   llvm::Type *GetResRetType(llvm::Type *pOverloadType);
   llvm::Type *GetCBufferRetType(llvm::Type *pOverloadType);
-  llvm::Type *GetVectorType(unsigned numElements, llvm::Type *pOverloadType);
+  llvm::Type *GetStructVectorType(unsigned numElements,
+                                  llvm::Type *pOverloadType);
   bool IsResRetType(llvm::Type *Ty);
 
+  // Construct an unnamed struct type containing the set of member types.
+  llvm::StructType *
+  GetExtendedOverloadType(llvm::ArrayRef<llvm::Type *> OverloadTypes);
+
   // Try to get the opcode class for a function.
   // Return true and set `opClass` if the given function is a dxil function.
   // Return false if the given function is not a dxil function.
@@ -128,11 +152,6 @@ class OP {
   static bool BarrierRequiresGroup(const llvm::CallInst *CI);
   static bool BarrierRequiresNode(const llvm::CallInst *CI);
   static DXIL::BarrierMode TranslateToBarrierMode(const llvm::CallInst *CI);
-  static bool IsDxilOpTypeName(llvm::StringRef name);
-  static bool IsDxilOpType(llvm::StructType *ST);
-  static bool IsDupDxilOpType(llvm::StructType *ST);
-  static llvm::StructType *GetOriginalDxilOpType(llvm::StructType *ST,
-                                                 llvm::Module &M);
   static void GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
                                        unsigned &major, unsigned &minor,
                                        unsigned &mask);
@@ -141,6 +160,13 @@ class OP {
                                        unsigned valMinor, unsigned &major,
                                        unsigned &minor, unsigned &mask);
 
+  static bool IsDxilOpExtendedOverload(OpCode C);
+
+  // Return true if the overload name suffix for this operation may be
+  // constructed based on a user-defined or user-influenced type name
+  // that may not represent the same type in different linked modules.
+  static bool MayHaveNonCanonicalOverload(OpCode OC);
+
 private:
   // Per-module properties.
   llvm::LLVMContext &m_Ctx;
@@ -164,13 +190,33 @@ class OP {
 
   DXIL::LowPrecisionMode m_LowPrecisionMode;
 
-  static const unsigned kUserDefineTypeSlot = 9;
-  static const unsigned kObjectTypeSlot = 10;
-  static const unsigned kNumTypeOverloads =
-      11; // void, h,f,d, i1, i8,i16,i32,i64, udt, obj
+  // Overload types are split into "basic" overload types and special types
+  // Basic: void, half, float, double, i1, i8, i16, i32, i64
+  //  - These have one canonical overload per TypeSlot
+  // Special: udt, obj, vec, extended
+  //  - These may have many overloads per type slot
+  enum TypeSlot : unsigned {
+    TS_F16 = 0,
+    TS_F32 = 1,
+    TS_F64 = 2,
+    TS_I1 = 3,
+    TS_I8 = 4,
+    TS_I16 = 5,
+    TS_I32 = 6,
+    TS_I64 = 7,
+    TS_BasicCount,
+    TS_UDT = 8,      // Ex: %"struct.MyStruct" *
+    TS_Object = 9,   // Ex: %"class.StructuredBuffer<Foo>"
+    TS_Vector = 10,  // Ex: <8 x i16>
+    TS_MaskBitCount, // Types used in Mask end here
+    // TS_Extended is only used to identify the unnamed struct type used to wrap
+    // multiple overloads when using GetTypeSlot.
+    TS_Extended, // Ex: type { float, <16 x i32> }
+    TS_Invalid = UINT_MAX,
+  };
 
-  llvm::Type *m_pResRetType[kNumTypeOverloads];
-  llvm::Type *m_pCBufferRetType[kNumTypeOverloads];
+  llvm::Type *m_pResRetType[TS_BasicCount];
+  llvm::Type *m_pCBufferRetType[TS_BasicCount];
 
   struct OpCodeCacheItem {
     llvm::SmallMapVector<llvm::Type *, llvm::Function *, 8> pOverloads;
@@ -181,27 +227,46 @@ class OP {
 
 private:
   // Static properties.
+  struct OverloadMask {
+    // mask of type slot bits as (1 << TypeSlot)
+    uint16_t SlotMask;
+    static_assert(TS_MaskBitCount <= (sizeof(SlotMask) * 8));
+    bool operator[](unsigned TypeSlot) const {
+      return (TypeSlot < TS_MaskBitCount) ? (bool)(SlotMask & (1 << TypeSlot))
+                                          : 0;
+    }
+    operator bool() const { return SlotMask != 0; }
+  };
   struct OpCodeProperty {
     OpCode opCode;
     const char *pOpCodeName;
     OpCodeClass opCodeClass;
     const char *pOpCodeClassName;
-    bool bAllowOverload[kNumTypeOverloads]; // void, h,f,d, i1, i8,i16,i32,i64,
-                                            // udt
     llvm::Attribute::AttrKind FuncAttr;
+
+    // Number of overload dimensions used by the operation.
+    unsigned int NumOverloadDims;
+
+    // Mask of supported overload types for each overload dimension.
+    OverloadMask AllowedOverloads[DXIL::kDxilMaxOloadDims];
+
+    // Mask of scalar components allowed for each demension where
+    // AllowedOverloads[n][TS_Vector] is true.
+    OverloadMask AllowedVectorElements[DXIL::kDxilMaxOloadDims];
   };
   static const OpCodeProperty m_OpCodeProps[(unsigned)OpCode::NumOpCodes];
 
-  static const char *m_OverloadTypeName[kNumTypeOverloads];
+  static const char *m_OverloadTypeName[TS_BasicCount];
   static const char *m_NamePrefix;
   static const char *m_TypePrefix;
   static const char *m_MatrixTypePrefix;
   static unsigned GetTypeSlot(llvm::Type *pType);
   static const char *GetOverloadTypeName(unsigned TypeSlot);
-  static llvm::StringRef GetTypeName(llvm::Type *Ty, std::string &str);
-  static llvm::StringRef ConstructOverloadName(llvm::Type *Ty,
-                                               DXIL::OpCode opCode,
-                                               std::string &funcNameStorage);
+  static llvm::StringRef GetTypeName(llvm::Type *Ty,
+                                     llvm::SmallVectorImpl<char> &Storage);
+  static llvm::StringRef
+  ConstructOverloadName(llvm::Type *Ty, DXIL::OpCode opCode,
+                        llvm::SmallVectorImpl<char> &Storage);
 };
 
 } // namespace hlsl
diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp
index 86049fee9c..56cdd0d04f 100644
--- a/lib/DXIL/DxilOperations.cpp
+++ b/lib/DXIL/DxilOperations.cpp
@@ -23,8 +23,6 @@
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
-using std::string;
-using std::vector;
 
 namespace hlsl {
 
@@ -41,2989 +39,2605 @@ import hctdb_instrhelp
 /* <py::lines('OPCODE-OLOADS')>hctdb_instrhelp.get_oloads_props()</py>*/
 // OPCODE-OLOADS:BEGIN
 const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
-    //   OpCode                       OpCode name,                OpCodeClass
-    //   OpCodeClass name,              void,     h,     f,     d,    i1,    i8,
-    //   i16,   i32,   i64,   udt,   obj,  function attribute
-    // Temporary, indexable, input, output registers void,     h,     f,     d,
-    // i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::TempRegLoad,
-        "TempRegLoad",
-        OCC::TempRegLoad,
-        "tempRegLoad",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::TempRegStore,
-        "TempRegStore",
-        OCC::TempRegStore,
-        "tempRegStore",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::MinPrecXRegLoad,
-        "MinPrecXRegLoad",
-        OCC::MinPrecXRegLoad,
-        "minPrecXRegLoad",
-        {false, true, false, false, false, false, true, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::MinPrecXRegStore,
-        "MinPrecXRegStore",
-        OCC::MinPrecXRegStore,
-        "minPrecXRegStore",
-        {false, true, false, false, false, false, true, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::LoadInput,
-        "LoadInput",
-        OCC::LoadInput,
-        "loadInput",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::StoreOutput,
-        "StoreOutput",
-        OCC::StoreOutput,
-        "storeOutput",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Unary float void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::FAbs,
-        "FAbs",
-        OCC::Unary,
-        "unary",
-        {false, true, true, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Saturate,
-        "Saturate",
-        OCC::Unary,
-        "unary",
-        {false, true, true, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::IsNaN,
-        "IsNaN",
-        OCC::IsSpecialFloat,
-        "isSpecialFloat",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::IsInf,
-        "IsInf",
-        OCC::IsSpecialFloat,
-        "isSpecialFloat",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::IsFinite,
-        "IsFinite",
-        OCC::IsSpecialFloat,
-        "isSpecialFloat",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::IsNormal,
-        "IsNormal",
-        OCC::IsSpecialFloat,
-        "isSpecialFloat",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Cos,
-        "Cos",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Sin,
-        "Sin",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Tan,
-        "Tan",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Acos,
-        "Acos",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Asin,
-        "Asin",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Atan,
-        "Atan",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Hcos,
-        "Hcos",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Hsin,
-        "Hsin",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Htan,
-        "Htan",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Exp,
-        "Exp",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Frc,
-        "Frc",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Log,
-        "Log",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Sqrt,
-        "Sqrt",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Rsqrt,
-        "Rsqrt",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Unary float - rounding void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::Round_ne,
-        "Round_ne",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Round_ni,
-        "Round_ni",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Round_pi,
-        "Round_pi",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Round_z,
-        "Round_z",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Unary int void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::Bfrev,
-        "Bfrev",
-        OCC::Unary,
-        "unary",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Countbits,
-        "Countbits",
-        OCC::UnaryBits,
-        "unaryBits",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::FirstbitLo,
-        "FirstbitLo",
-        OCC::UnaryBits,
-        "unaryBits",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Unary uint void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::FirstbitHi,
-        "FirstbitHi",
-        OCC::UnaryBits,
-        "unaryBits",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Unary int void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::FirstbitSHi,
-        "FirstbitSHi",
-        OCC::UnaryBits,
-        "unaryBits",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Binary float void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::FMax,
-        "FMax",
-        OCC::Binary,
-        "binary",
-        {false, true, true, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::FMin,
-        "FMin",
-        OCC::Binary,
-        "binary",
-        {false, true, true, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Binary int void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::IMax,
-        "IMax",
-        OCC::Binary,
-        "binary",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::IMin,
-        "IMin",
-        OCC::Binary,
-        "binary",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Binary uint void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::UMax,
-        "UMax",
-        OCC::Binary,
-        "binary",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::UMin,
-        "UMin",
-        OCC::Binary,
-        "binary",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Binary int with two outputs void,     h,     f,     d,    i1,    i8, i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::IMul,
-        "IMul",
-        OCC::BinaryWithTwoOuts,
-        "binaryWithTwoOuts",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Binary uint with two outputs void,     h,     f,     d,    i1,    i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::UMul,
-        "UMul",
-        OCC::BinaryWithTwoOuts,
-        "binaryWithTwoOuts",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::UDiv,
-        "UDiv",
-        OCC::BinaryWithTwoOuts,
-        "binaryWithTwoOuts",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Binary uint with carry or borrow void,     h,     f,     d,    i1,    i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::UAddc,
-        "UAddc",
-        OCC::BinaryWithCarryOrBorrow,
-        "binaryWithCarryOrBorrow",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::USubb,
-        "USubb",
-        OCC::BinaryWithCarryOrBorrow,
-        "binaryWithCarryOrBorrow",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Tertiary float void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::FMad,
-        "FMad",
-        OCC::Tertiary,
-        "tertiary",
-        {false, true, true, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Fma,
-        "Fma",
-        OCC::Tertiary,
-        "tertiary",
-        {false, false, false, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Tertiary int void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::IMad,
-        "IMad",
-        OCC::Tertiary,
-        "tertiary",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Tertiary uint void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::UMad,
-        "UMad",
-        OCC::Tertiary,
-        "tertiary",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Tertiary int void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::Msad,
-        "Msad",
-        OCC::Tertiary,
-        "tertiary",
-        {false, false, false, false, false, false, false, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Ibfe,
-        "Ibfe",
-        OCC::Tertiary,
-        "tertiary",
-        {false, false, false, false, false, false, false, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Tertiary uint void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::Ubfe,
-        "Ubfe",
-        OCC::Tertiary,
-        "tertiary",
-        {false, false, false, false, false, false, false, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Quaternary void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::Bfi,
-        "Bfi",
-        OCC::Quaternary,
-        "quaternary",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Dot void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,
-    // obj ,  function attribute
-    {
-        OC::Dot2,
-        "Dot2",
-        OCC::Dot2,
-        "dot2",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Dot3,
-        "Dot3",
-        OCC::Dot3,
-        "dot3",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Dot4,
-        "Dot4",
-        OCC::Dot4,
-        "dot4",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Resources void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::CreateHandle,
-        "CreateHandle",
-        OCC::CreateHandle,
-        "createHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::CBufferLoad,
-        "CBufferLoad",
-        OCC::CBufferLoad,
-        "cbufferLoad",
-        {false, true, true, true, false, true, true, true, true, false, false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::CBufferLoadLegacy,
-        "CBufferLoadLegacy",
-        OCC::CBufferLoadLegacy,
-        "cbufferLoadLegacy",
-        {false, true, true, true, false, false, true, true, true, false, false},
-        Attribute::ReadOnly,
-    },
-
-    // Resources - sample void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::Sample,
-        "Sample",
-        OCC::Sample,
-        "sample",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::SampleBias,
-        "SampleBias",
-        OCC::SampleBias,
-        "sampleBias",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::SampleLevel,
-        "SampleLevel",
-        OCC::SampleLevel,
-        "sampleLevel",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::SampleGrad,
-        "SampleGrad",
-        OCC::SampleGrad,
-        "sampleGrad",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::SampleCmp,
-        "SampleCmp",
-        OCC::SampleCmp,
-        "sampleCmp",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::SampleCmpLevelZero,
-        "SampleCmpLevelZero",
-        OCC::SampleCmpLevelZero,
-        "sampleCmpLevelZero",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Resources void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::TextureLoad,
-        "TextureLoad",
-        OCC::TextureLoad,
-        "textureLoad",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::TextureStore,
-        "TextureStore",
-        OCC::TextureStore,
-        "textureStore",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::BufferLoad,
-        "BufferLoad",
-        OCC::BufferLoad,
-        "bufferLoad",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::BufferStore,
-        "BufferStore",
-        OCC::BufferStore,
-        "bufferStore",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::BufferUpdateCounter,
-        "BufferUpdateCounter",
-        OCC::BufferUpdateCounter,
-        "bufferUpdateCounter",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::CheckAccessFullyMapped,
-        "CheckAccessFullyMapped",
-        OCC::CheckAccessFullyMapped,
-        "checkAccessFullyMapped",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::GetDimensions,
-        "GetDimensions",
-        OCC::GetDimensions,
-        "getDimensions",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Resources - gather void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::TextureGather,
-        "TextureGather",
-        OCC::TextureGather,
-        "textureGather",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::TextureGatherCmp,
-        "TextureGatherCmp",
-        OCC::TextureGatherCmp,
-        "textureGatherCmp",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Resources - sample void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::Texture2DMSGetSamplePosition,
-        "Texture2DMSGetSamplePosition",
-        OCC::Texture2DMSGetSamplePosition,
-        "texture2DMSGetSamplePosition",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RenderTargetGetSamplePosition,
-        "RenderTargetGetSamplePosition",
-        OCC::RenderTargetGetSamplePosition,
-        "renderTargetGetSamplePosition",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RenderTargetGetSampleCount,
-        "RenderTargetGetSampleCount",
-        OCC::RenderTargetGetSampleCount,
-        "renderTargetGetSampleCount",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Synchronization void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::AtomicBinOp,
-        "AtomicBinOp",
-        OCC::AtomicBinOp,
-        "atomicBinOp",
-        {false, false, false, false, false, false, false, true, true, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::AtomicCompareExchange,
-        "AtomicCompareExchange",
-        OCC::AtomicCompareExchange,
-        "atomicCompareExchange",
-        {false, false, false, false, false, false, false, true, true, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Barrier,
-        "Barrier",
-        OCC::Barrier,
-        "barrier",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::NoDuplicate,
-    },
-
-    // Derivatives void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::CalculateLOD,
-        "CalculateLOD",
-        OCC::CalculateLOD,
-        "calculateLOD",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Pixel shader void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::Discard,
-        "Discard",
-        OCC::Discard,
-        "discard",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Derivatives void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::DerivCoarseX,
-        "DerivCoarseX",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::DerivCoarseY,
-        "DerivCoarseY",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::DerivFineX,
-        "DerivFineX",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::DerivFineY,
-        "DerivFineY",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Pixel shader void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::EvalSnapped,
-        "EvalSnapped",
-        OCC::EvalSnapped,
-        "evalSnapped",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::EvalSampleIndex,
-        "EvalSampleIndex",
-        OCC::EvalSampleIndex,
-        "evalSampleIndex",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::EvalCentroid,
-        "EvalCentroid",
-        OCC::EvalCentroid,
-        "evalCentroid",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::SampleIndex,
-        "SampleIndex",
-        OCC::SampleIndex,
-        "sampleIndex",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Coverage,
-        "Coverage",
-        OCC::Coverage,
-        "coverage",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::InnerCoverage,
-        "InnerCoverage",
-        OCC::InnerCoverage,
-        "innerCoverage",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Compute/Mesh/Amplification/Node shader void,     h,     f,     d,    i1,
-    // i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::ThreadId,
-        "ThreadId",
-        OCC::ThreadId,
-        "threadId",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::GroupId,
-        "GroupId",
-        OCC::GroupId,
-        "groupId",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::ThreadIdInGroup,
-        "ThreadIdInGroup",
-        OCC::ThreadIdInGroup,
-        "threadIdInGroup",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::FlattenedThreadIdInGroup,
-        "FlattenedThreadIdInGroup",
-        OCC::FlattenedThreadIdInGroup,
-        "flattenedThreadIdInGroup",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Geometry shader void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::EmitStream,
-        "EmitStream",
-        OCC::EmitStream,
-        "emitStream",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::CutStream,
-        "CutStream",
-        OCC::CutStream,
-        "cutStream",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::EmitThenCutStream,
-        "EmitThenCutStream",
-        OCC::EmitThenCutStream,
-        "emitThenCutStream",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::GSInstanceID,
-        "GSInstanceID",
-        OCC::GSInstanceID,
-        "gsInstanceID",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Double precision void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::MakeDouble,
-        "MakeDouble",
-        OCC::MakeDouble,
-        "makeDouble",
-        {false, false, false, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::SplitDouble,
-        "SplitDouble",
-        OCC::SplitDouble,
-        "splitDouble",
-        {false, false, false, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Domain and hull shader void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::LoadOutputControlPoint,
-        "LoadOutputControlPoint",
-        OCC::LoadOutputControlPoint,
-        "loadOutputControlPoint",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::LoadPatchConstant,
-        "LoadPatchConstant",
-        OCC::LoadPatchConstant,
-        "loadPatchConstant",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Domain shader void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::DomainLocation,
-        "DomainLocation",
-        OCC::DomainLocation,
-        "domainLocation",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Hull shader void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::StorePatchConstant,
-        "StorePatchConstant",
-        OCC::StorePatchConstant,
-        "storePatchConstant",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::OutputControlPointID,
-        "OutputControlPointID",
-        OCC::OutputControlPointID,
-        "outputControlPointID",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Hull, Domain and Geometry shaders void,     h,     f,     d,    i1, i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::PrimitiveID,
-        "PrimitiveID",
-        OCC::PrimitiveID,
-        "primitiveID",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Other void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64, udt,
-    // obj ,  function attribute
-    {
-        OC::CycleCounterLegacy,
-        "CycleCounterLegacy",
-        OCC::CycleCounterLegacy,
-        "cycleCounterLegacy",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Wave void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,
-    // obj ,  function attribute
-    {
-        OC::WaveIsFirstLane,
-        "WaveIsFirstLane",
-        OCC::WaveIsFirstLane,
-        "waveIsFirstLane",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WaveGetLaneIndex,
-        "WaveGetLaneIndex",
-        OCC::WaveGetLaneIndex,
-        "waveGetLaneIndex",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::WaveGetLaneCount,
-        "WaveGetLaneCount",
-        OCC::WaveGetLaneCount,
-        "waveGetLaneCount",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::WaveAnyTrue,
-        "WaveAnyTrue",
-        OCC::WaveAnyTrue,
-        "waveAnyTrue",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WaveAllTrue,
-        "WaveAllTrue",
-        OCC::WaveAllTrue,
-        "waveAllTrue",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WaveActiveAllEqual,
-        "WaveActiveAllEqual",
-        OCC::WaveActiveAllEqual,
-        "waveActiveAllEqual",
-        {false, true, true, true, true, true, true, true, true, false, false},
-        Attribute::None,
-    },
-    {
-        OC::WaveActiveBallot,
-        "WaveActiveBallot",
-        OCC::WaveActiveBallot,
-        "waveActiveBallot",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WaveReadLaneAt,
-        "WaveReadLaneAt",
-        OCC::WaveReadLaneAt,
-        "waveReadLaneAt",
-        {false, true, true, true, true, true, true, true, true, false, false},
-        Attribute::None,
-    },
-    {
-        OC::WaveReadLaneFirst,
-        "WaveReadLaneFirst",
-        OCC::WaveReadLaneFirst,
-        "waveReadLaneFirst",
-        {false, true, true, true, true, true, true, true, true, false, false},
-        Attribute::None,
-    },
-    {
-        OC::WaveActiveOp,
-        "WaveActiveOp",
-        OCC::WaveActiveOp,
-        "waveActiveOp",
-        {false, true, true, true, true, true, true, true, true, false, false},
-        Attribute::None,
-    },
-    {
-        OC::WaveActiveBit,
-        "WaveActiveBit",
-        OCC::WaveActiveBit,
-        "waveActiveBit",
-        {false, false, false, false, false, true, true, true, true, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WavePrefixOp,
-        "WavePrefixOp",
-        OCC::WavePrefixOp,
-        "wavePrefixOp",
-        {false, true, true, true, false, true, true, true, true, false, false},
-        Attribute::None,
-    },
-
-    // Quad Wave Ops void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::QuadReadLaneAt,
-        "QuadReadLaneAt",
-        OCC::QuadReadLaneAt,
-        "quadReadLaneAt",
-        {false, true, true, true, true, true, true, true, true, false, false},
-        Attribute::None,
-    },
-    {
-        OC::QuadOp,
-        "QuadOp",
-        OCC::QuadOp,
-        "quadOp",
-        {false, true, true, true, false, true, true, true, true, false, false},
-        Attribute::None,
-    },
-
-    // Bitcasts with different sizes void,     h,     f,     d,    i1,    i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::BitcastI16toF16,
-        "BitcastI16toF16",
-        OCC::BitcastI16toF16,
-        "bitcastI16toF16",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::BitcastF16toI16,
-        "BitcastF16toI16",
-        OCC::BitcastF16toI16,
-        "bitcastF16toI16",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::BitcastI32toF32,
-        "BitcastI32toF32",
-        OCC::BitcastI32toF32,
-        "bitcastI32toF32",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::BitcastF32toI32,
-        "BitcastF32toI32",
-        OCC::BitcastF32toI32,
-        "bitcastF32toI32",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::BitcastI64toF64,
-        "BitcastI64toF64",
-        OCC::BitcastI64toF64,
-        "bitcastI64toF64",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::BitcastF64toI64,
-        "BitcastF64toI64",
-        OCC::BitcastF64toI64,
-        "bitcastF64toI64",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Legacy floating-point void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::LegacyF32ToF16,
-        "LegacyF32ToF16",
-        OCC::LegacyF32ToF16,
-        "legacyF32ToF16",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::LegacyF16ToF32,
-        "LegacyF16ToF32",
-        OCC::LegacyF16ToF32,
-        "legacyF16ToF32",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Double precision void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::LegacyDoubleToFloat,
-        "LegacyDoubleToFloat",
-        OCC::LegacyDoubleToFloat,
-        "legacyDoubleToFloat",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::LegacyDoubleToSInt32,
-        "LegacyDoubleToSInt32",
-        OCC::LegacyDoubleToSInt32,
-        "legacyDoubleToSInt32",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::LegacyDoubleToUInt32,
-        "LegacyDoubleToUInt32",
-        OCC::LegacyDoubleToUInt32,
-        "legacyDoubleToUInt32",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Wave void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,
-    // obj ,  function attribute
-    {
-        OC::WaveAllBitCount,
-        "WaveAllBitCount",
-        OCC::WaveAllOp,
-        "waveAllOp",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WavePrefixBitCount,
-        "WavePrefixBitCount",
-        OCC::WavePrefixOp,
-        "wavePrefixOp",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Pixel shader void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::AttributeAtVertex,
-        "AttributeAtVertex",
-        OCC::AttributeAtVertex,
-        "attributeAtVertex",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Graphics shader void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::ViewID,
-        "ViewID",
-        OCC::ViewID,
-        "viewID",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Resources void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::RawBufferLoad,
-        "RawBufferLoad",
-        OCC::RawBufferLoad,
-        "rawBufferLoad",
-        {false, true, true, true, false, false, true, true, true, false, false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RawBufferStore,
-        "RawBufferStore",
-        OCC::RawBufferStore,
-        "rawBufferStore",
-        {false, true, true, true, false, false, true, true, true, false, false},
-        Attribute::None,
-    },
-
-    // Raytracing object space uint System Values void,     h,     f,     d, i1,
-    // i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::InstanceID,
-        "InstanceID",
-        OCC::InstanceID,
-        "instanceID",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::InstanceIndex,
-        "InstanceIndex",
-        OCC::InstanceIndex,
-        "instanceIndex",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Raytracing hit uint System Values void,     h,     f,     d,    i1, i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::HitKind,
-        "HitKind",
-        OCC::HitKind,
-        "hitKind",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Raytracing uint System Values void,     h,     f,     d,    i1,    i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::RayFlags,
-        "RayFlags",
-        OCC::RayFlags,
-        "rayFlags",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Ray Dispatch Arguments void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::DispatchRaysIndex,
-        "DispatchRaysIndex",
-        OCC::DispatchRaysIndex,
-        "dispatchRaysIndex",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::DispatchRaysDimensions,
-        "DispatchRaysDimensions",
-        OCC::DispatchRaysDimensions,
-        "dispatchRaysDimensions",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Ray Vectors void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::WorldRayOrigin,
-        "WorldRayOrigin",
-        OCC::WorldRayOrigin,
-        "worldRayOrigin",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::WorldRayDirection,
-        "WorldRayDirection",
-        OCC::WorldRayDirection,
-        "worldRayDirection",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Ray object space Vectors void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::ObjectRayOrigin,
-        "ObjectRayOrigin",
-        OCC::ObjectRayOrigin,
-        "objectRayOrigin",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::ObjectRayDirection,
-        "ObjectRayDirection",
-        OCC::ObjectRayDirection,
-        "objectRayDirection",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Ray Transforms void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::ObjectToWorld,
-        "ObjectToWorld",
-        OCC::ObjectToWorld,
-        "objectToWorld",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::WorldToObject,
-        "WorldToObject",
-        OCC::WorldToObject,
-        "worldToObject",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // RayT void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,
-    // obj ,  function attribute
-    {
-        OC::RayTMin,
-        "RayTMin",
-        OCC::RayTMin,
-        "rayTMin",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::RayTCurrent,
-        "RayTCurrent",
-        OCC::RayTCurrent,
-        "rayTCurrent",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // AnyHit Terminals void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::IgnoreHit,
-        "IgnoreHit",
-        OCC::IgnoreHit,
-        "ignoreHit",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::NoReturn,
-    },
-    {
-        OC::AcceptHitAndEndSearch,
-        "AcceptHitAndEndSearch",
-        OCC::AcceptHitAndEndSearch,
-        "acceptHitAndEndSearch",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::NoReturn,
-    },
-
-    // Indirect Shader Invocation void,     h,     f,     d,    i1,    i8, i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::TraceRay,
-        "TraceRay",
-        OCC::TraceRay,
-        "traceRay",
-        {false, false, false, false, false, false, false, false, false, true,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReportHit,
-        "ReportHit",
-        OCC::ReportHit,
-        "reportHit",
-        {false, false, false, false, false, false, false, false, false, true,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::CallShader,
-        "CallShader",
-        OCC::CallShader,
-        "callShader",
-        {false, false, false, false, false, false, false, false, false, true,
-         false},
-        Attribute::None,
-    },
-
-    // Library create handle from resource struct (like HL intrinsic) void, h,
-    // f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function
-    // attribute
-    {
-        OC::CreateHandleForLib,
-        "CreateHandleForLib",
-        OCC::CreateHandleForLib,
-        "createHandleForLib",
-        {false, false, false, false, false, false, false, false, false, false,
-         true},
-        Attribute::ReadOnly,
-    },
-
-    // Raytracing object space uint System Values void,     h,     f,     d, i1,
-    // i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::PrimitiveIndex,
-        "PrimitiveIndex",
-        OCC::PrimitiveIndex,
-        "primitiveIndex",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Dot product with accumulate void,     h,     f,     d,    i1,    i8, i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::Dot2AddHalf,
-        "Dot2AddHalf",
-        OCC::Dot2AddHalf,
-        "dot2AddHalf",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Dot4AddI8Packed,
-        "Dot4AddI8Packed",
-        OCC::Dot4AddPacked,
-        "dot4AddPacked",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Dot4AddU8Packed,
-        "Dot4AddU8Packed",
-        OCC::Dot4AddPacked,
-        "dot4AddPacked",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Wave void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,
-    // obj ,  function attribute
-    {
-        OC::WaveMatch,
-        "WaveMatch",
-        OCC::WaveMatch,
-        "waveMatch",
-        {false, true, true, true, false, true, true, true, true, false, false},
-        Attribute::None,
-    },
-    {
-        OC::WaveMultiPrefixOp,
-        "WaveMultiPrefixOp",
-        OCC::WaveMultiPrefixOp,
-        "waveMultiPrefixOp",
-        {false, true, true, true, false, true, true, true, true, false, false},
-        Attribute::None,
-    },
-    {
-        OC::WaveMultiPrefixBitCount,
-        "WaveMultiPrefixBitCount",
-        OCC::WaveMultiPrefixBitCount,
-        "waveMultiPrefixBitCount",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Mesh shader instructions void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::SetMeshOutputCounts,
-        "SetMeshOutputCounts",
-        OCC::SetMeshOutputCounts,
-        "setMeshOutputCounts",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::EmitIndices,
-        "EmitIndices",
-        OCC::EmitIndices,
-        "emitIndices",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::GetMeshPayload,
-        "GetMeshPayload",
-        OCC::GetMeshPayload,
-        "getMeshPayload",
-        {false, false, false, false, false, false, false, false, false, true,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::StoreVertexOutput,
-        "StoreVertexOutput",
-        OCC::StoreVertexOutput,
-        "storeVertexOutput",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::StorePrimitiveOutput,
-        "StorePrimitiveOutput",
-        OCC::StorePrimitiveOutput,
-        "storePrimitiveOutput",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Amplification shader instructions void,     h,     f,     d,    i1, i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::DispatchMesh,
-        "DispatchMesh",
-        OCC::DispatchMesh,
-        "dispatchMesh",
-        {false, false, false, false, false, false, false, false, false, true,
-         false},
-        Attribute::None,
-    },
-
-    // Sampler Feedback void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::WriteSamplerFeedback,
-        "WriteSamplerFeedback",
-        OCC::WriteSamplerFeedback,
-        "writeSamplerFeedback",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WriteSamplerFeedbackBias,
-        "WriteSamplerFeedbackBias",
-        OCC::WriteSamplerFeedbackBias,
-        "writeSamplerFeedbackBias",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WriteSamplerFeedbackLevel,
-        "WriteSamplerFeedbackLevel",
-        OCC::WriteSamplerFeedbackLevel,
-        "writeSamplerFeedbackLevel",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WriteSamplerFeedbackGrad,
-        "WriteSamplerFeedbackGrad",
-        OCC::WriteSamplerFeedbackGrad,
-        "writeSamplerFeedbackGrad",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Inline Ray Query void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::AllocateRayQuery,
-        "AllocateRayQuery",
-        OCC::AllocateRayQuery,
-        "allocateRayQuery",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::RayQuery_TraceRayInline,
-        "RayQuery_TraceRayInline",
-        OCC::RayQuery_TraceRayInline,
-        "rayQuery_TraceRayInline",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::RayQuery_Proceed,
-        "RayQuery_Proceed",
-        OCC::RayQuery_Proceed,
-        "rayQuery_Proceed",
-        {false, false, false, false, true, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::RayQuery_Abort,
-        "RayQuery_Abort",
-        OCC::RayQuery_Abort,
-        "rayQuery_Abort",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::RayQuery_CommitNonOpaqueTriangleHit,
-        "RayQuery_CommitNonOpaqueTriangleHit",
-        OCC::RayQuery_CommitNonOpaqueTriangleHit,
-        "rayQuery_CommitNonOpaqueTriangleHit",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::RayQuery_CommitProceduralPrimitiveHit,
-        "RayQuery_CommitProceduralPrimitiveHit",
-        OCC::RayQuery_CommitProceduralPrimitiveHit,
-        "rayQuery_CommitProceduralPrimitiveHit",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::RayQuery_CommittedStatus,
-        "RayQuery_CommittedStatus",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateType,
-        "RayQuery_CandidateType",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateObjectToWorld3x4,
-        "RayQuery_CandidateObjectToWorld3x4",
-        OCC::RayQuery_StateMatrix,
-        "rayQuery_StateMatrix",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateWorldToObject3x4,
-        "RayQuery_CandidateWorldToObject3x4",
-        OCC::RayQuery_StateMatrix,
-        "rayQuery_StateMatrix",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedObjectToWorld3x4,
-        "RayQuery_CommittedObjectToWorld3x4",
-        OCC::RayQuery_StateMatrix,
-        "rayQuery_StateMatrix",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedWorldToObject3x4,
-        "RayQuery_CommittedWorldToObject3x4",
-        OCC::RayQuery_StateMatrix,
-        "rayQuery_StateMatrix",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateProceduralPrimitiveNonOpaque,
-        "RayQuery_CandidateProceduralPrimitiveNonOpaque",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, true, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateTriangleFrontFace,
-        "RayQuery_CandidateTriangleFrontFace",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, true, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedTriangleFrontFace,
-        "RayQuery_CommittedTriangleFrontFace",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, true, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateTriangleBarycentrics,
-        "RayQuery_CandidateTriangleBarycentrics",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedTriangleBarycentrics,
-        "RayQuery_CommittedTriangleBarycentrics",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_RayFlags,
-        "RayQuery_RayFlags",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_WorldRayOrigin,
-        "RayQuery_WorldRayOrigin",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_WorldRayDirection,
-        "RayQuery_WorldRayDirection",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_RayTMin,
-        "RayQuery_RayTMin",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateTriangleRayT,
-        "RayQuery_CandidateTriangleRayT",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedRayT,
-        "RayQuery_CommittedRayT",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateInstanceIndex,
-        "RayQuery_CandidateInstanceIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateInstanceID,
-        "RayQuery_CandidateInstanceID",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateGeometryIndex,
-        "RayQuery_CandidateGeometryIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidatePrimitiveIndex,
-        "RayQuery_CandidatePrimitiveIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateObjectRayOrigin,
-        "RayQuery_CandidateObjectRayOrigin",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateObjectRayDirection,
-        "RayQuery_CandidateObjectRayDirection",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedInstanceIndex,
-        "RayQuery_CommittedInstanceIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedInstanceID,
-        "RayQuery_CommittedInstanceID",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedGeometryIndex,
-        "RayQuery_CommittedGeometryIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedPrimitiveIndex,
-        "RayQuery_CommittedPrimitiveIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedObjectRayOrigin,
-        "RayQuery_CommittedObjectRayOrigin",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedObjectRayDirection,
-        "RayQuery_CommittedObjectRayDirection",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Raytracing object space uint System Values, raytracing tier 1.1 void, h,
-    // f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function
-    // attribute
-    {
-        OC::GeometryIndex,
-        "GeometryIndex",
-        OCC::GeometryIndex,
-        "geometryIndex",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Inline Ray Query void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::RayQuery_CandidateInstanceContributionToHitGroupIndex,
-        "RayQuery_CandidateInstanceContributionToHitGroupIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedInstanceContributionToHitGroupIndex,
-        "RayQuery_CommittedInstanceContributionToHitGroupIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Get handle from heap void,     h,     f,     d,    i1,    i8,   i16, i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::AnnotateHandle,
-        "AnnotateHandle",
-        OCC::AnnotateHandle,
-        "annotateHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::CreateHandleFromBinding,
-        "CreateHandleFromBinding",
-        OCC::CreateHandleFromBinding,
-        "createHandleFromBinding",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::CreateHandleFromHeap,
-        "CreateHandleFromHeap",
-        OCC::CreateHandleFromHeap,
-        "createHandleFromHeap",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Unpacking intrinsics void,     h,     f,     d,    i1,    i8,   i16, i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::Unpack4x8,
-        "Unpack4x8",
-        OCC::Unpack4x8,
-        "unpack4x8",
-        {false, false, false, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Packing intrinsics void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::Pack4x8,
-        "Pack4x8",
-        OCC::Pack4x8,
-        "pack4x8",
-        {false, false, false, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Helper Lanes void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::IsHelperLane,
-        "IsHelperLane",
-        OCC::IsHelperLane,
-        "isHelperLane",
-        {false, false, false, false, true, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Quad Wave Ops void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::QuadVote,
-        "QuadVote",
-        OCC::QuadVote,
-        "quadVote",
-        {false, false, false, false, true, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Resources - gather void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::TextureGatherRaw,
-        "TextureGatherRaw",
-        OCC::TextureGatherRaw,
-        "textureGatherRaw",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Resources - sample void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::SampleCmpLevel,
-        "SampleCmpLevel",
-        OCC::SampleCmpLevel,
-        "sampleCmpLevel",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Resources void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::TextureStoreSample,
-        "TextureStoreSample",
-        OCC::TextureStoreSample,
-        "textureStoreSample",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-
-    //                                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::Reserved0,
-        "Reserved0",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved1,
-        "Reserved1",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved2,
-        "Reserved2",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved3,
-        "Reserved3",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved4,
-        "Reserved4",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved5,
-        "Reserved5",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved6,
-        "Reserved6",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved7,
-        "Reserved7",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved8,
-        "Reserved8",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved9,
-        "Reserved9",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved10,
-        "Reserved10",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved11,
-        "Reserved11",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Create/Annotate Node Handles void,     h,     f,     d,    i1,    i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::AllocateNodeOutputRecords,
-        "AllocateNodeOutputRecords",
-        OCC::AllocateNodeOutputRecords,
-        "allocateNodeOutputRecords",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Get Pointer to Node Record in Address Space 6 void,     h,     f,     d,
-    // i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::GetNodeRecordPtr,
-        "GetNodeRecordPtr",
-        OCC::GetNodeRecordPtr,
-        "getNodeRecordPtr",
-        {false, false, false, false, false, false, false, false, false, true,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Work Graph intrinsics void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::IncrementOutputCount,
-        "IncrementOutputCount",
-        OCC::IncrementOutputCount,
-        "incrementOutputCount",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::OutputComplete,
-        "OutputComplete",
-        OCC::OutputComplete,
-        "outputComplete",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::GetInputRecordCount,
-        "GetInputRecordCount",
-        OCC::GetInputRecordCount,
-        "getInputRecordCount",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::FinishedCrossGroupSharing,
-        "FinishedCrossGroupSharing",
-        OCC::FinishedCrossGroupSharing,
-        "finishedCrossGroupSharing",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Synchronization void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::BarrierByMemoryType,
-        "BarrierByMemoryType",
-        OCC::BarrierByMemoryType,
-        "barrierByMemoryType",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::NoDuplicate,
-    },
-    {
-        OC::BarrierByMemoryHandle,
-        "BarrierByMemoryHandle",
-        OCC::BarrierByMemoryHandle,
-        "barrierByMemoryHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::NoDuplicate,
-    },
-    {
-        OC::BarrierByNodeRecordHandle,
-        "BarrierByNodeRecordHandle",
-        OCC::BarrierByNodeRecordHandle,
-        "barrierByNodeRecordHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::NoDuplicate,
-    },
-
-    // Create/Annotate Node Handles void,     h,     f,     d,    i1,    i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::CreateNodeOutputHandle,
-        "CreateNodeOutputHandle",
-        OCC::createNodeOutputHandle,
-        "createNodeOutputHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::IndexNodeHandle,
-        "IndexNodeHandle",
-        OCC::IndexNodeHandle,
-        "indexNodeHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::AnnotateNodeHandle,
-        "AnnotateNodeHandle",
-        OCC::AnnotateNodeHandle,
-        "annotateNodeHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::CreateNodeInputRecordHandle,
-        "CreateNodeInputRecordHandle",
-        OCC::CreateNodeInputRecordHandle,
-        "createNodeInputRecordHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::AnnotateNodeRecordHandle,
-        "AnnotateNodeRecordHandle",
-        OCC::AnnotateNodeRecordHandle,
-        "annotateNodeRecordHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Work Graph intrinsics void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::NodeOutputIsValid,
-        "NodeOutputIsValid",
-        OCC::NodeOutputIsValid,
-        "nodeOutputIsValid",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::GetRemainingRecursionLevels,
-        "GetRemainingRecursionLevels",
-        OCC::GetRemainingRecursionLevels,
-        "getRemainingRecursionLevels",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Comparison Samples void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::SampleCmpGrad,
-        "SampleCmpGrad",
-        OCC::SampleCmpGrad,
-        "sampleCmpGrad",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::SampleCmpBias,
-        "SampleCmpBias",
-        OCC::SampleCmpBias,
-        "sampleCmpBias",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Extended Command Information void,     h,     f,     d,    i1,    i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::StartVertexLocation,
-        "StartVertexLocation",
-        OCC::StartVertexLocation,
-        "startVertexLocation",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::StartInstanceLocation,
-        "StartInstanceLocation",
-        OCC::StartInstanceLocation,
-        "startInstanceLocation",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Inline Ray Query void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::AllocateRayQuery2,
-        "AllocateRayQuery2",
-        OCC::AllocateRayQuery2,
-        "allocateRayQuery2",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    //                                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::ReservedA0,
-        "ReservedA0",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedA1,
-        "ReservedA1",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedA2,
-        "ReservedA2",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB0,
-        "ReservedB0",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB1,
-        "ReservedB1",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB2,
-        "ReservedB2",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Shader Execution Reordering void,     h,     f,     d,    i1,    i8, i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::HitObject_MakeMiss,
-        "HitObject_MakeMiss",
-        OCC::HitObject_MakeMiss,
-        "hitObject_MakeMiss",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::HitObject_MakeNop,
-        "HitObject_MakeNop",
-        OCC::HitObject_MakeNop,
-        "hitObject_MakeNop",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    //                                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::ReservedB5,
-        "ReservedB5",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB6,
-        "ReservedB6",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB7,
-        "ReservedB7",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB8,
-        "ReservedB8",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB9,
-        "ReservedB9",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB10,
-        "ReservedB10",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB11,
-        "ReservedB11",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB12,
-        "ReservedB12",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB13,
-        "ReservedB13",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB14,
-        "ReservedB14",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB15,
-        "ReservedB15",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB16,
-        "ReservedB16",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB17,
-        "ReservedB17",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB18,
-        "ReservedB18",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB19,
-        "ReservedB19",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB20,
-        "ReservedB20",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB21,
-        "ReservedB21",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB22,
-        "ReservedB22",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB23,
-        "ReservedB23",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB24,
-        "ReservedB24",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB25,
-        "ReservedB25",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB26,
-        "ReservedB26",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB27,
-        "ReservedB27",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB28,
-        "ReservedB28",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB29,
-        "ReservedB29",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB30,
-        "ReservedB30",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC0,
-        "ReservedC0",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC1,
-        "ReservedC1",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC2,
-        "ReservedC2",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC3,
-        "ReservedC3",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC4,
-        "ReservedC4",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC5,
-        "ReservedC5",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC6,
-        "ReservedC6",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC7,
-        "ReservedC7",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC8,
-        "ReservedC8",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC9,
-        "ReservedC9",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
+    // Temporary, indexable, input, output registers
+    {OC::TempRegLoad,
+     "TempRegLoad",
+     OCC::TempRegLoad,
+     "tempRegLoad",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::TempRegStore,
+     "TempRegStore",
+     OCC::TempRegStore,
+     "tempRegStore",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::MinPrecXRegLoad,
+     "MinPrecXRegLoad",
+     OCC::MinPrecXRegLoad,
+     "minPrecXRegLoad",
+     Attribute::ReadOnly,
+     1,
+     {{0x21}},
+     {{0x0}}}, // Overloads: hw
+    {OC::MinPrecXRegStore,
+     "MinPrecXRegStore",
+     OCC::MinPrecXRegStore,
+     "minPrecXRegStore",
+     Attribute::None,
+     1,
+     {{0x21}},
+     {{0x0}}}, // Overloads: hw
+    {OC::LoadInput,
+     "LoadInput",
+     OCC::LoadInput,
+     "loadInput",
+     Attribute::ReadNone,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::StoreOutput,
+     "StoreOutput",
+     OCC::StoreOutput,
+     "storeOutput",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+
+    // Unary float
+    {OC::FAbs,
+     "FAbs",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x7}},
+     {{0x0}}}, // Overloads: hfd
+    {OC::Saturate,
+     "Saturate",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x7}},
+     {{0x0}}}, // Overloads: hfd
+    {OC::IsNaN,
+     "IsNaN",
+     OCC::IsSpecialFloat,
+     "isSpecialFloat",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::IsInf,
+     "IsInf",
+     OCC::IsSpecialFloat,
+     "isSpecialFloat",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::IsFinite,
+     "IsFinite",
+     OCC::IsSpecialFloat,
+     "isSpecialFloat",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::IsNormal,
+     "IsNormal",
+     OCC::IsSpecialFloat,
+     "isSpecialFloat",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Cos,
+     "Cos",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Sin,
+     "Sin",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Tan,
+     "Tan",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Acos,
+     "Acos",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Asin,
+     "Asin",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Atan,
+     "Atan",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Hcos,
+     "Hcos",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Hsin,
+     "Hsin",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Htan,
+     "Htan",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Exp,
+     "Exp",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Frc,
+     "Frc",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Log,
+     "Log",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Sqrt,
+     "Sqrt",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Rsqrt,
+     "Rsqrt",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+
+    // Unary float - rounding
+    {OC::Round_ne,
+     "Round_ne",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Round_ni,
+     "Round_ni",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Round_pi,
+     "Round_pi",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Round_z,
+     "Round_z",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+
+    // Unary int
+    {OC::Bfrev,
+     "Bfrev",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+    {OC::Countbits,
+     "Countbits",
+     OCC::UnaryBits,
+     "unaryBits",
+     Attribute::ReadNone,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+    {OC::FirstbitLo,
+     "FirstbitLo",
+     OCC::UnaryBits,
+     "unaryBits",
+     Attribute::ReadNone,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+
+    // Unary uint
+    {OC::FirstbitHi,
+     "FirstbitHi",
+     OCC::UnaryBits,
+     "unaryBits",
+     Attribute::ReadNone,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+
+    // Unary int
+    {OC::FirstbitSHi,
+     "FirstbitSHi",
+     OCC::UnaryBits,
+     "unaryBits",
+     Attribute::ReadNone,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+
+    // Binary float
+    {OC::FMax,
+     "FMax",
+     OCC::Binary,
+     "binary",
+     Attribute::ReadNone,
+     1,
+     {{0x7}},
+     {{0x0}}}, // Overloads: hfd
+    {OC::FMin,
+     "FMin",
+     OCC::Binary,
+     "binary",
+     Attribute::ReadNone,
+     1,
+     {{0x7}},
+     {{0x0}}}, // Overloads: hfd
+
+    // Binary int
+    {OC::IMax,
+     "IMax",
+     OCC::Binary,
+     "binary",
+     Attribute::ReadNone,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+    {OC::IMin,
+     "IMin",
+     OCC::Binary,
+     "binary",
+     Attribute::ReadNone,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+
+    // Binary uint
+    {OC::UMax,
+     "UMax",
+     OCC::Binary,
+     "binary",
+     Attribute::ReadNone,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+    {OC::UMin,
+     "UMin",
+     OCC::Binary,
+     "binary",
+     Attribute::ReadNone,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+
+    // Binary int with two outputs
+    {OC::IMul,
+     "IMul",
+     OCC::BinaryWithTwoOuts,
+     "binaryWithTwoOuts",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Binary uint with two outputs
+    {OC::UMul,
+     "UMul",
+     OCC::BinaryWithTwoOuts,
+     "binaryWithTwoOuts",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::UDiv,
+     "UDiv",
+     OCC::BinaryWithTwoOuts,
+     "binaryWithTwoOuts",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Binary uint with carry or borrow
+    {OC::UAddc,
+     "UAddc",
+     OCC::BinaryWithCarryOrBorrow,
+     "binaryWithCarryOrBorrow",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::USubb,
+     "USubb",
+     OCC::BinaryWithCarryOrBorrow,
+     "binaryWithCarryOrBorrow",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Tertiary float
+    {OC::FMad,
+     "FMad",
+     OCC::Tertiary,
+     "tertiary",
+     Attribute::ReadNone,
+     1,
+     {{0x7}},
+     {{0x0}}}, // Overloads: hfd
+    {OC::Fma,
+     "Fma",
+     OCC::Tertiary,
+     "tertiary",
+     Attribute::ReadNone,
+     1,
+     {{0x4}},
+     {{0x0}}}, // Overloads: d
+
+    // Tertiary int
+    {OC::IMad,
+     "IMad",
+     OCC::Tertiary,
+     "tertiary",
+     Attribute::ReadNone,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+
+    // Tertiary uint
+    {OC::UMad,
+     "UMad",
+     OCC::Tertiary,
+     "tertiary",
+     Attribute::ReadNone,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+
+    // Tertiary int
+    {OC::Msad,
+     "Msad",
+     OCC::Tertiary,
+     "tertiary",
+     Attribute::ReadNone,
+     1,
+     {{0xc0}},
+     {{0x0}}}, // Overloads: il
+    {OC::Ibfe,
+     "Ibfe",
+     OCC::Tertiary,
+     "tertiary",
+     Attribute::ReadNone,
+     1,
+     {{0xc0}},
+     {{0x0}}}, // Overloads: il
+
+    // Tertiary uint
+    {OC::Ubfe,
+     "Ubfe",
+     OCC::Tertiary,
+     "tertiary",
+     Attribute::ReadNone,
+     1,
+     {{0xc0}},
+     {{0x0}}}, // Overloads: il
+
+    // Quaternary
+    {OC::Bfi,
+     "Bfi",
+     OCC::Quaternary,
+     "quaternary",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Dot
+    {OC::Dot2,
+     "Dot2",
+     OCC::Dot2,
+     "dot2",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Dot3,
+     "Dot3",
+     OCC::Dot3,
+     "dot3",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Dot4,
+     "Dot4",
+     OCC::Dot4,
+     "dot4",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+
+    // Resources
+    {OC::CreateHandle,
+     "CreateHandle",
+     OCC::CreateHandle,
+     "createHandle",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::CBufferLoad,
+     "CBufferLoad",
+     OCC::CBufferLoad,
+     "cbufferLoad",
+     Attribute::ReadOnly,
+     1,
+     {{0xf7}},
+     {{0x0}}}, // Overloads: hfd8wil
+    {OC::CBufferLoadLegacy,
+     "CBufferLoadLegacy",
+     OCC::CBufferLoadLegacy,
+     "cbufferLoadLegacy",
+     Attribute::ReadOnly,
+     1,
+     {{0xe7}},
+     {{0x0}}}, // Overloads: hfdwil
+
+    // Resources - sample
+    {OC::Sample,
+     "Sample",
+     OCC::Sample,
+     "sample",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::SampleBias,
+     "SampleBias",
+     OCC::SampleBias,
+     "sampleBias",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::SampleLevel,
+     "SampleLevel",
+     OCC::SampleLevel,
+     "sampleLevel",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::SampleGrad,
+     "SampleGrad",
+     OCC::SampleGrad,
+     "sampleGrad",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::SampleCmp,
+     "SampleCmp",
+     OCC::SampleCmp,
+     "sampleCmp",
+     Attribute::ReadOnly,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::SampleCmpLevelZero,
+     "SampleCmpLevelZero",
+     OCC::SampleCmpLevelZero,
+     "sampleCmpLevelZero",
+     Attribute::ReadOnly,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+
+    // Resources
+    {OC::TextureLoad,
+     "TextureLoad",
+     OCC::TextureLoad,
+     "textureLoad",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::TextureStore,
+     "TextureStore",
+     OCC::TextureStore,
+     "textureStore",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::BufferLoad,
+     "BufferLoad",
+     OCC::BufferLoad,
+     "bufferLoad",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::BufferStore,
+     "BufferStore",
+     OCC::BufferStore,
+     "bufferStore",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::BufferUpdateCounter,
+     "BufferUpdateCounter",
+     OCC::BufferUpdateCounter,
+     "bufferUpdateCounter",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::CheckAccessFullyMapped,
+     "CheckAccessFullyMapped",
+     OCC::CheckAccessFullyMapped,
+     "checkAccessFullyMapped",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::GetDimensions,
+     "GetDimensions",
+     OCC::GetDimensions,
+     "getDimensions",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Resources - gather
+    {OC::TextureGather,
+     "TextureGather",
+     OCC::TextureGather,
+     "textureGather",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::TextureGatherCmp,
+     "TextureGatherCmp",
+     OCC::TextureGatherCmp,
+     "textureGatherCmp",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+
+    // Resources - sample
+    {OC::Texture2DMSGetSamplePosition,
+     "Texture2DMSGetSamplePosition",
+     OCC::Texture2DMSGetSamplePosition,
+     "texture2DMSGetSamplePosition",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::RenderTargetGetSamplePosition,
+     "RenderTargetGetSamplePosition",
+     OCC::RenderTargetGetSamplePosition,
+     "renderTargetGetSamplePosition",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::RenderTargetGetSampleCount,
+     "RenderTargetGetSampleCount",
+     OCC::RenderTargetGetSampleCount,
+     "renderTargetGetSampleCount",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Synchronization
+    {OC::AtomicBinOp,
+     "AtomicBinOp",
+     OCC::AtomicBinOp,
+     "atomicBinOp",
+     Attribute::None,
+     1,
+     {{0xc0}},
+     {{0x0}}}, // Overloads: li
+    {OC::AtomicCompareExchange,
+     "AtomicCompareExchange",
+     OCC::AtomicCompareExchange,
+     "atomicCompareExchange",
+     Attribute::None,
+     1,
+     {{0xc0}},
+     {{0x0}}}, // Overloads: li
+    {OC::Barrier,
+     "Barrier",
+     OCC::Barrier,
+     "barrier",
+     Attribute::NoDuplicate,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Derivatives
+    {OC::CalculateLOD,
+     "CalculateLOD",
+     OCC::CalculateLOD,
+     "calculateLOD",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+
+    // Pixel shader
+    {OC::Discard,
+     "Discard",
+     OCC::Discard,
+     "discard",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Derivatives
+    {OC::DerivCoarseX,
+     "DerivCoarseX",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::DerivCoarseY,
+     "DerivCoarseY",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::DerivFineX,
+     "DerivFineX",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::DerivFineY,
+     "DerivFineY",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+
+    // Pixel shader
+    {OC::EvalSnapped,
+     "EvalSnapped",
+     OCC::EvalSnapped,
+     "evalSnapped",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::EvalSampleIndex,
+     "EvalSampleIndex",
+     OCC::EvalSampleIndex,
+     "evalSampleIndex",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::EvalCentroid,
+     "EvalCentroid",
+     OCC::EvalCentroid,
+     "evalCentroid",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::SampleIndex,
+     "SampleIndex",
+     OCC::SampleIndex,
+     "sampleIndex",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::Coverage,
+     "Coverage",
+     OCC::Coverage,
+     "coverage",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::InnerCoverage,
+     "InnerCoverage",
+     OCC::InnerCoverage,
+     "innerCoverage",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Compute/Mesh/Amplification/Node shader
+    {OC::ThreadId,
+     "ThreadId",
+     OCC::ThreadId,
+     "threadId",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::GroupId,
+     "GroupId",
+     OCC::GroupId,
+     "groupId",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::ThreadIdInGroup,
+     "ThreadIdInGroup",
+     OCC::ThreadIdInGroup,
+     "threadIdInGroup",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::FlattenedThreadIdInGroup,
+     "FlattenedThreadIdInGroup",
+     OCC::FlattenedThreadIdInGroup,
+     "flattenedThreadIdInGroup",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Geometry shader
+    {OC::EmitStream,
+     "EmitStream",
+     OCC::EmitStream,
+     "emitStream",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::CutStream,
+     "CutStream",
+     OCC::CutStream,
+     "cutStream",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::EmitThenCutStream,
+     "EmitThenCutStream",
+     OCC::EmitThenCutStream,
+     "emitThenCutStream",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::GSInstanceID,
+     "GSInstanceID",
+     OCC::GSInstanceID,
+     "gsInstanceID",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Double precision
+    {OC::MakeDouble,
+     "MakeDouble",
+     OCC::MakeDouble,
+     "makeDouble",
+     Attribute::ReadNone,
+     1,
+     {{0x4}},
+     {{0x0}}}, // Overloads: d
+    {OC::SplitDouble,
+     "SplitDouble",
+     OCC::SplitDouble,
+     "splitDouble",
+     Attribute::ReadNone,
+     1,
+     {{0x4}},
+     {{0x0}}}, // Overloads: d
+
+    // Domain and hull shader
+    {OC::LoadOutputControlPoint,
+     "LoadOutputControlPoint",
+     OCC::LoadOutputControlPoint,
+     "loadOutputControlPoint",
+     Attribute::ReadNone,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::LoadPatchConstant,
+     "LoadPatchConstant",
+     OCC::LoadPatchConstant,
+     "loadPatchConstant",
+     Attribute::ReadNone,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+
+    // Domain shader
+    {OC::DomainLocation,
+     "DomainLocation",
+     OCC::DomainLocation,
+     "domainLocation",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+
+    // Hull shader
+    {OC::StorePatchConstant,
+     "StorePatchConstant",
+     OCC::StorePatchConstant,
+     "storePatchConstant",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::OutputControlPointID,
+     "OutputControlPointID",
+     OCC::OutputControlPointID,
+     "outputControlPointID",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Hull, Domain and Geometry shaders
+    {OC::PrimitiveID,
+     "PrimitiveID",
+     OCC::PrimitiveID,
+     "primitiveID",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Other
+    {OC::CycleCounterLegacy,
+     "CycleCounterLegacy",
+     OCC::CycleCounterLegacy,
+     "cycleCounterLegacy",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Wave
+    {OC::WaveIsFirstLane,
+     "WaveIsFirstLane",
+     OCC::WaveIsFirstLane,
+     "waveIsFirstLane",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WaveGetLaneIndex,
+     "WaveGetLaneIndex",
+     OCC::WaveGetLaneIndex,
+     "waveGetLaneIndex",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WaveGetLaneCount,
+     "WaveGetLaneCount",
+     OCC::WaveGetLaneCount,
+     "waveGetLaneCount",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WaveAnyTrue,
+     "WaveAnyTrue",
+     OCC::WaveAnyTrue,
+     "waveAnyTrue",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WaveAllTrue,
+     "WaveAllTrue",
+     OCC::WaveAllTrue,
+     "waveAllTrue",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WaveActiveAllEqual,
+     "WaveActiveAllEqual",
+     OCC::WaveActiveAllEqual,
+     "waveActiveAllEqual",
+     Attribute::None,
+     1,
+     {{0xff}},
+     {{0x0}}}, // Overloads: hfd18wil
+    {OC::WaveActiveBallot,
+     "WaveActiveBallot",
+     OCC::WaveActiveBallot,
+     "waveActiveBallot",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WaveReadLaneAt,
+     "WaveReadLaneAt",
+     OCC::WaveReadLaneAt,
+     "waveReadLaneAt",
+     Attribute::None,
+     1,
+     {{0xff}},
+     {{0x0}}}, // Overloads: hfd18wil
+    {OC::WaveReadLaneFirst,
+     "WaveReadLaneFirst",
+     OCC::WaveReadLaneFirst,
+     "waveReadLaneFirst",
+     Attribute::None,
+     1,
+     {{0xff}},
+     {{0x0}}}, // Overloads: hfd18wil
+    {OC::WaveActiveOp,
+     "WaveActiveOp",
+     OCC::WaveActiveOp,
+     "waveActiveOp",
+     Attribute::None,
+     1,
+     {{0xff}},
+     {{0x0}}}, // Overloads: hfd18wil
+    {OC::WaveActiveBit,
+     "WaveActiveBit",
+     OCC::WaveActiveBit,
+     "waveActiveBit",
+     Attribute::None,
+     1,
+     {{0xf0}},
+     {{0x0}}}, // Overloads: 8wil
+    {OC::WavePrefixOp,
+     "WavePrefixOp",
+     OCC::WavePrefixOp,
+     "wavePrefixOp",
+     Attribute::None,
+     1,
+     {{0xf7}},
+     {{0x0}}}, // Overloads: hfd8wil
+
+    // Quad Wave Ops
+    {OC::QuadReadLaneAt,
+     "QuadReadLaneAt",
+     OCC::QuadReadLaneAt,
+     "quadReadLaneAt",
+     Attribute::None,
+     1,
+     {{0xff}},
+     {{0x0}}}, // Overloads: hfd18wil
+    {OC::QuadOp,
+     "QuadOp",
+     OCC::QuadOp,
+     "quadOp",
+     Attribute::None,
+     1,
+     {{0xf7}},
+     {{0x0}}}, // Overloads: hfd8wil
+
+    // Bitcasts with different sizes
+    {OC::BitcastI16toF16,
+     "BitcastI16toF16",
+     OCC::BitcastI16toF16,
+     "bitcastI16toF16",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::BitcastF16toI16,
+     "BitcastF16toI16",
+     OCC::BitcastF16toI16,
+     "bitcastF16toI16",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::BitcastI32toF32,
+     "BitcastI32toF32",
+     OCC::BitcastI32toF32,
+     "bitcastI32toF32",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::BitcastF32toI32,
+     "BitcastF32toI32",
+     OCC::BitcastF32toI32,
+     "bitcastF32toI32",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::BitcastI64toF64,
+     "BitcastI64toF64",
+     OCC::BitcastI64toF64,
+     "bitcastI64toF64",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::BitcastF64toI64,
+     "BitcastF64toI64",
+     OCC::BitcastF64toI64,
+     "bitcastF64toI64",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Legacy floating-point
+    {OC::LegacyF32ToF16,
+     "LegacyF32ToF16",
+     OCC::LegacyF32ToF16,
+     "legacyF32ToF16",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::LegacyF16ToF32,
+     "LegacyF16ToF32",
+     OCC::LegacyF16ToF32,
+     "legacyF16ToF32",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Double precision
+    {OC::LegacyDoubleToFloat,
+     "LegacyDoubleToFloat",
+     OCC::LegacyDoubleToFloat,
+     "legacyDoubleToFloat",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::LegacyDoubleToSInt32,
+     "LegacyDoubleToSInt32",
+     OCC::LegacyDoubleToSInt32,
+     "legacyDoubleToSInt32",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::LegacyDoubleToUInt32,
+     "LegacyDoubleToUInt32",
+     OCC::LegacyDoubleToUInt32,
+     "legacyDoubleToUInt32",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Wave
+    {OC::WaveAllBitCount,
+     "WaveAllBitCount",
+     OCC::WaveAllOp,
+     "waveAllOp",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WavePrefixBitCount,
+     "WavePrefixBitCount",
+     OCC::WavePrefixOp,
+     "wavePrefixOp",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Pixel shader
+    {OC::AttributeAtVertex,
+     "AttributeAtVertex",
+     OCC::AttributeAtVertex,
+     "attributeAtVertex",
+     Attribute::ReadNone,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfiw
+
+    // Graphics shader
+    {OC::ViewID,
+     "ViewID",
+     OCC::ViewID,
+     "viewID",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Resources
+    {OC::RawBufferLoad,
+     "RawBufferLoad",
+     OCC::RawBufferLoad,
+     "rawBufferLoad",
+     Attribute::ReadOnly,
+     1,
+     {{0xe7}},
+     {{0x0}}}, // Overloads: hfwidl
+    {OC::RawBufferStore,
+     "RawBufferStore",
+     OCC::RawBufferStore,
+     "rawBufferStore",
+     Attribute::None,
+     1,
+     {{0xe7}},
+     {{0x0}}}, // Overloads: hfwidl
+
+    // Raytracing object space uint System Values
+    {OC::InstanceID,
+     "InstanceID",
+     OCC::InstanceID,
+     "instanceID",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::InstanceIndex,
+     "InstanceIndex",
+     OCC::InstanceIndex,
+     "instanceIndex",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Raytracing hit uint System Values
+    {OC::HitKind,
+     "HitKind",
+     OCC::HitKind,
+     "hitKind",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Raytracing uint System Values
+    {OC::RayFlags,
+     "RayFlags",
+     OCC::RayFlags,
+     "rayFlags",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Ray Dispatch Arguments
+    {OC::DispatchRaysIndex,
+     "DispatchRaysIndex",
+     OCC::DispatchRaysIndex,
+     "dispatchRaysIndex",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::DispatchRaysDimensions,
+     "DispatchRaysDimensions",
+     OCC::DispatchRaysDimensions,
+     "dispatchRaysDimensions",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Ray Vectors
+    {OC::WorldRayOrigin,
+     "WorldRayOrigin",
+     OCC::WorldRayOrigin,
+     "worldRayOrigin",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::WorldRayDirection,
+     "WorldRayDirection",
+     OCC::WorldRayDirection,
+     "worldRayDirection",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+
+    // Ray object space Vectors
+    {OC::ObjectRayOrigin,
+     "ObjectRayOrigin",
+     OCC::ObjectRayOrigin,
+     "objectRayOrigin",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::ObjectRayDirection,
+     "ObjectRayDirection",
+     OCC::ObjectRayDirection,
+     "objectRayDirection",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+
+    // Ray Transforms
+    {OC::ObjectToWorld,
+     "ObjectToWorld",
+     OCC::ObjectToWorld,
+     "objectToWorld",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::WorldToObject,
+     "WorldToObject",
+     OCC::WorldToObject,
+     "worldToObject",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+
+    // RayT
+    {OC::RayTMin,
+     "RayTMin",
+     OCC::RayTMin,
+     "rayTMin",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayTCurrent,
+     "RayTCurrent",
+     OCC::RayTCurrent,
+     "rayTCurrent",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+
+    // AnyHit Terminals
+    {OC::IgnoreHit,
+     "IgnoreHit",
+     OCC::IgnoreHit,
+     "ignoreHit",
+     Attribute::NoReturn,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::AcceptHitAndEndSearch,
+     "AcceptHitAndEndSearch",
+     OCC::AcceptHitAndEndSearch,
+     "acceptHitAndEndSearch",
+     Attribute::NoReturn,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Indirect Shader Invocation
+    {OC::TraceRay,
+     "TraceRay",
+     OCC::TraceRay,
+     "traceRay",
+     Attribute::None,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+    {OC::ReportHit,
+     "ReportHit",
+     OCC::ReportHit,
+     "reportHit",
+     Attribute::None,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+    {OC::CallShader,
+     "CallShader",
+     OCC::CallShader,
+     "callShader",
+     Attribute::None,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+
+    // Library create handle from resource struct (like HL intrinsic)
+    {OC::CreateHandleForLib,
+     "CreateHandleForLib",
+     OCC::CreateHandleForLib,
+     "createHandleForLib",
+     Attribute::ReadOnly,
+     1,
+     {{0x200}},
+     {{0x0}}}, // Overloads: o
+
+    // Raytracing object space uint System Values
+    {OC::PrimitiveIndex,
+     "PrimitiveIndex",
+     OCC::PrimitiveIndex,
+     "primitiveIndex",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Dot product with accumulate
+    {OC::Dot2AddHalf,
+     "Dot2AddHalf",
+     OCC::Dot2AddHalf,
+     "dot2AddHalf",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::Dot4AddI8Packed,
+     "Dot4AddI8Packed",
+     OCC::Dot4AddPacked,
+     "dot4AddPacked",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::Dot4AddU8Packed,
+     "Dot4AddU8Packed",
+     OCC::Dot4AddPacked,
+     "dot4AddPacked",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Wave
+    {OC::WaveMatch,
+     "WaveMatch",
+     OCC::WaveMatch,
+     "waveMatch",
+     Attribute::None,
+     1,
+     {{0xf7}},
+     {{0x0}}}, // Overloads: hfd8wil
+    {OC::WaveMultiPrefixOp,
+     "WaveMultiPrefixOp",
+     OCC::WaveMultiPrefixOp,
+     "waveMultiPrefixOp",
+     Attribute::None,
+     1,
+     {{0xf7}},
+     {{0x0}}}, // Overloads: hfd8wil
+    {OC::WaveMultiPrefixBitCount,
+     "WaveMultiPrefixBitCount",
+     OCC::WaveMultiPrefixBitCount,
+     "waveMultiPrefixBitCount",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Mesh shader instructions
+    {OC::SetMeshOutputCounts,
+     "SetMeshOutputCounts",
+     OCC::SetMeshOutputCounts,
+     "setMeshOutputCounts",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::EmitIndices,
+     "EmitIndices",
+     OCC::EmitIndices,
+     "emitIndices",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::GetMeshPayload,
+     "GetMeshPayload",
+     OCC::GetMeshPayload,
+     "getMeshPayload",
+     Attribute::ReadOnly,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+    {OC::StoreVertexOutput,
+     "StoreVertexOutput",
+     OCC::StoreVertexOutput,
+     "storeVertexOutput",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::StorePrimitiveOutput,
+     "StorePrimitiveOutput",
+     OCC::StorePrimitiveOutput,
+     "storePrimitiveOutput",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+
+    // Amplification shader instructions
+    {OC::DispatchMesh,
+     "DispatchMesh",
+     OCC::DispatchMesh,
+     "dispatchMesh",
+     Attribute::None,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+
+    // Sampler Feedback
+    {OC::WriteSamplerFeedback,
+     "WriteSamplerFeedback",
+     OCC::WriteSamplerFeedback,
+     "writeSamplerFeedback",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WriteSamplerFeedbackBias,
+     "WriteSamplerFeedbackBias",
+     OCC::WriteSamplerFeedbackBias,
+     "writeSamplerFeedbackBias",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WriteSamplerFeedbackLevel,
+     "WriteSamplerFeedbackLevel",
+     OCC::WriteSamplerFeedbackLevel,
+     "writeSamplerFeedbackLevel",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WriteSamplerFeedbackGrad,
+     "WriteSamplerFeedbackGrad",
+     OCC::WriteSamplerFeedbackGrad,
+     "writeSamplerFeedbackGrad",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Inline Ray Query
+    {OC::AllocateRayQuery,
+     "AllocateRayQuery",
+     OCC::AllocateRayQuery,
+     "allocateRayQuery",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::RayQuery_TraceRayInline,
+     "RayQuery_TraceRayInline",
+     OCC::RayQuery_TraceRayInline,
+     "rayQuery_TraceRayInline",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::RayQuery_Proceed,
+     "RayQuery_Proceed",
+     OCC::RayQuery_Proceed,
+     "rayQuery_Proceed",
+     Attribute::None,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+    {OC::RayQuery_Abort,
+     "RayQuery_Abort",
+     OCC::RayQuery_Abort,
+     "rayQuery_Abort",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::RayQuery_CommitNonOpaqueTriangleHit,
+     "RayQuery_CommitNonOpaqueTriangleHit",
+     OCC::RayQuery_CommitNonOpaqueTriangleHit,
+     "rayQuery_CommitNonOpaqueTriangleHit",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::RayQuery_CommitProceduralPrimitiveHit,
+     "RayQuery_CommitProceduralPrimitiveHit",
+     OCC::RayQuery_CommitProceduralPrimitiveHit,
+     "rayQuery_CommitProceduralPrimitiveHit",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::RayQuery_CommittedStatus,
+     "RayQuery_CommittedStatus",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CandidateType,
+     "RayQuery_CandidateType",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CandidateObjectToWorld3x4,
+     "RayQuery_CandidateObjectToWorld3x4",
+     OCC::RayQuery_StateMatrix,
+     "rayQuery_StateMatrix",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CandidateWorldToObject3x4,
+     "RayQuery_CandidateWorldToObject3x4",
+     OCC::RayQuery_StateMatrix,
+     "rayQuery_StateMatrix",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CommittedObjectToWorld3x4,
+     "RayQuery_CommittedObjectToWorld3x4",
+     OCC::RayQuery_StateMatrix,
+     "rayQuery_StateMatrix",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CommittedWorldToObject3x4,
+     "RayQuery_CommittedWorldToObject3x4",
+     OCC::RayQuery_StateMatrix,
+     "rayQuery_StateMatrix",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CandidateProceduralPrimitiveNonOpaque,
+     "RayQuery_CandidateProceduralPrimitiveNonOpaque",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+    {OC::RayQuery_CandidateTriangleFrontFace,
+     "RayQuery_CandidateTriangleFrontFace",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+    {OC::RayQuery_CommittedTriangleFrontFace,
+     "RayQuery_CommittedTriangleFrontFace",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+    {OC::RayQuery_CandidateTriangleBarycentrics,
+     "RayQuery_CandidateTriangleBarycentrics",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CommittedTriangleBarycentrics,
+     "RayQuery_CommittedTriangleBarycentrics",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_RayFlags,
+     "RayQuery_RayFlags",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_WorldRayOrigin,
+     "RayQuery_WorldRayOrigin",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_WorldRayDirection,
+     "RayQuery_WorldRayDirection",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_RayTMin,
+     "RayQuery_RayTMin",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CandidateTriangleRayT,
+     "RayQuery_CandidateTriangleRayT",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CommittedRayT,
+     "RayQuery_CommittedRayT",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CandidateInstanceIndex,
+     "RayQuery_CandidateInstanceIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CandidateInstanceID,
+     "RayQuery_CandidateInstanceID",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CandidateGeometryIndex,
+     "RayQuery_CandidateGeometryIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CandidatePrimitiveIndex,
+     "RayQuery_CandidatePrimitiveIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CandidateObjectRayOrigin,
+     "RayQuery_CandidateObjectRayOrigin",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CandidateObjectRayDirection,
+     "RayQuery_CandidateObjectRayDirection",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CommittedInstanceIndex,
+     "RayQuery_CommittedInstanceIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CommittedInstanceID,
+     "RayQuery_CommittedInstanceID",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CommittedGeometryIndex,
+     "RayQuery_CommittedGeometryIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CommittedPrimitiveIndex,
+     "RayQuery_CommittedPrimitiveIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CommittedObjectRayOrigin,
+     "RayQuery_CommittedObjectRayOrigin",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CommittedObjectRayDirection,
+     "RayQuery_CommittedObjectRayDirection",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+
+    // Raytracing object space uint System Values, raytracing tier 1.1
+    {OC::GeometryIndex,
+     "GeometryIndex",
+     OCC::GeometryIndex,
+     "geometryIndex",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Inline Ray Query
+    {OC::RayQuery_CandidateInstanceContributionToHitGroupIndex,
+     "RayQuery_CandidateInstanceContributionToHitGroupIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CommittedInstanceContributionToHitGroupIndex,
+     "RayQuery_CommittedInstanceContributionToHitGroupIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Get handle from heap
+    {OC::AnnotateHandle,
+     "AnnotateHandle",
+     OCC::AnnotateHandle,
+     "annotateHandle",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::CreateHandleFromBinding,
+     "CreateHandleFromBinding",
+     OCC::CreateHandleFromBinding,
+     "createHandleFromBinding",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::CreateHandleFromHeap,
+     "CreateHandleFromHeap",
+     OCC::CreateHandleFromHeap,
+     "createHandleFromHeap",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Unpacking intrinsics
+    {OC::Unpack4x8,
+     "Unpack4x8",
+     OCC::Unpack4x8,
+     "unpack4x8",
+     Attribute::ReadNone,
+     1,
+     {{0x60}},
+     {{0x0}}}, // Overloads: iw
+
+    // Packing intrinsics
+    {OC::Pack4x8,
+     "Pack4x8",
+     OCC::Pack4x8,
+     "pack4x8",
+     Attribute::ReadNone,
+     1,
+     {{0x60}},
+     {{0x0}}}, // Overloads: iw
+
+    // Helper Lanes
+    {OC::IsHelperLane,
+     "IsHelperLane",
+     OCC::IsHelperLane,
+     "isHelperLane",
+     Attribute::ReadOnly,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+
+    // Quad Wave Ops
+    {OC::QuadVote,
+     "QuadVote",
+     OCC::QuadVote,
+     "quadVote",
+     Attribute::None,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+
+    // Resources - gather
+    {OC::TextureGatherRaw,
+     "TextureGatherRaw",
+     OCC::TextureGatherRaw,
+     "textureGatherRaw",
+     Attribute::ReadOnly,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+
+    // Resources - sample
+    {OC::SampleCmpLevel,
+     "SampleCmpLevel",
+     OCC::SampleCmpLevel,
+     "sampleCmpLevel",
+     Attribute::ReadOnly,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+
+    // Resources
+    {OC::TextureStoreSample,
+     "TextureStoreSample",
+     OCC::TextureStoreSample,
+     "textureStoreSample",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+
+    {OC::Reserved0,
+     "Reserved0",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved1,
+     "Reserved1",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved2,
+     "Reserved2",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved3,
+     "Reserved3",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved4,
+     "Reserved4",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved5,
+     "Reserved5",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved6,
+     "Reserved6",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved7,
+     "Reserved7",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved8,
+     "Reserved8",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved9,
+     "Reserved9",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved10,
+     "Reserved10",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved11,
+     "Reserved11",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Create/Annotate Node Handles
+    {OC::AllocateNodeOutputRecords,
+     "AllocateNodeOutputRecords",
+     OCC::AllocateNodeOutputRecords,
+     "allocateNodeOutputRecords",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Get Pointer to Node Record in Address Space 6
+    {OC::GetNodeRecordPtr,
+     "GetNodeRecordPtr",
+     OCC::GetNodeRecordPtr,
+     "getNodeRecordPtr",
+     Attribute::ReadNone,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+
+    // Work Graph intrinsics
+    {OC::IncrementOutputCount,
+     "IncrementOutputCount",
+     OCC::IncrementOutputCount,
+     "incrementOutputCount",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::OutputComplete,
+     "OutputComplete",
+     OCC::OutputComplete,
+     "outputComplete",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::GetInputRecordCount,
+     "GetInputRecordCount",
+     OCC::GetInputRecordCount,
+     "getInputRecordCount",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::FinishedCrossGroupSharing,
+     "FinishedCrossGroupSharing",
+     OCC::FinishedCrossGroupSharing,
+     "finishedCrossGroupSharing",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Synchronization
+    {OC::BarrierByMemoryType,
+     "BarrierByMemoryType",
+     OCC::BarrierByMemoryType,
+     "barrierByMemoryType",
+     Attribute::NoDuplicate,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::BarrierByMemoryHandle,
+     "BarrierByMemoryHandle",
+     OCC::BarrierByMemoryHandle,
+     "barrierByMemoryHandle",
+     Attribute::NoDuplicate,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::BarrierByNodeRecordHandle,
+     "BarrierByNodeRecordHandle",
+     OCC::BarrierByNodeRecordHandle,
+     "barrierByNodeRecordHandle",
+     Attribute::NoDuplicate,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Create/Annotate Node Handles
+    {OC::CreateNodeOutputHandle,
+     "CreateNodeOutputHandle",
+     OCC::createNodeOutputHandle,
+     "createNodeOutputHandle",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::IndexNodeHandle,
+     "IndexNodeHandle",
+     OCC::IndexNodeHandle,
+     "indexNodeHandle",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::AnnotateNodeHandle,
+     "AnnotateNodeHandle",
+     OCC::AnnotateNodeHandle,
+     "annotateNodeHandle",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::CreateNodeInputRecordHandle,
+     "CreateNodeInputRecordHandle",
+     OCC::CreateNodeInputRecordHandle,
+     "createNodeInputRecordHandle",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::AnnotateNodeRecordHandle,
+     "AnnotateNodeRecordHandle",
+     OCC::AnnotateNodeRecordHandle,
+     "annotateNodeRecordHandle",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Work Graph intrinsics
+    {OC::NodeOutputIsValid,
+     "NodeOutputIsValid",
+     OCC::NodeOutputIsValid,
+     "nodeOutputIsValid",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::GetRemainingRecursionLevels,
+     "GetRemainingRecursionLevels",
+     OCC::GetRemainingRecursionLevels,
+     "getRemainingRecursionLevels",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Comparison Samples
+    {OC::SampleCmpGrad,
+     "SampleCmpGrad",
+     OCC::SampleCmpGrad,
+     "sampleCmpGrad",
+     Attribute::ReadOnly,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::SampleCmpBias,
+     "SampleCmpBias",
+     OCC::SampleCmpBias,
+     "sampleCmpBias",
+     Attribute::ReadOnly,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+
+    // Extended Command Information
+    {OC::StartVertexLocation,
+     "StartVertexLocation",
+     OCC::StartVertexLocation,
+     "startVertexLocation",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::StartInstanceLocation,
+     "StartInstanceLocation",
+     OCC::StartInstanceLocation,
+     "startInstanceLocation",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Inline Ray Query
+    {OC::AllocateRayQuery2,
+     "AllocateRayQuery2",
+     OCC::AllocateRayQuery2,
+     "allocateRayQuery2",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    {OC::ReservedA0,
+     "ReservedA0",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedA1,
+     "ReservedA1",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedA2,
+     "ReservedA2",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB0,
+     "ReservedB0",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB1,
+     "ReservedB1",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB2,
+     "ReservedB2",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Shader Execution Reordering
+    {OC::HitObject_MakeMiss,
+     "HitObject_MakeMiss",
+     OCC::HitObject_MakeMiss,
+     "hitObject_MakeMiss",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::HitObject_MakeNop,
+     "HitObject_MakeNop",
+     OCC::HitObject_MakeNop,
+     "hitObject_MakeNop",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    {OC::ReservedB5,
+     "ReservedB5",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB6,
+     "ReservedB6",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB7,
+     "ReservedB7",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB8,
+     "ReservedB8",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB9,
+     "ReservedB9",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB10,
+     "ReservedB10",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB11,
+     "ReservedB11",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB12,
+     "ReservedB12",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB13,
+     "ReservedB13",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB14,
+     "ReservedB14",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB15,
+     "ReservedB15",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB16,
+     "ReservedB16",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB17,
+     "ReservedB17",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB18,
+     "ReservedB18",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB19,
+     "ReservedB19",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB20,
+     "ReservedB20",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB21,
+     "ReservedB21",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB22,
+     "ReservedB22",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB23,
+     "ReservedB23",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB24,
+     "ReservedB24",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB25,
+     "ReservedB25",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB26,
+     "ReservedB26",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB27,
+     "ReservedB27",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB28,
+     "ReservedB28",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB29,
+     "ReservedB29",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB30,
+     "ReservedB30",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC0,
+     "ReservedC0",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC1,
+     "ReservedC1",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC2,
+     "ReservedC2",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC3,
+     "ReservedC3",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC4,
+     "ReservedC4",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC5,
+     "ReservedC5",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC6,
+     "ReservedC6",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC7,
+     "ReservedC7",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC8,
+     "ReservedC8",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC9,
+     "ReservedC9",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
 };
 // OPCODE-OLOADS:END
 
-const char *OP::m_OverloadTypeName[kNumTypeOverloads] = {
-    "void", "f16", "f32", "f64", "i1",  "i8",
-    "i16",  "i32", "i64", "udt", "obj", // These should not be used
-};
+const char *OP::m_OverloadTypeName[TS_BasicCount] = {
+    "f16", "f32", "f64", "i1", "i8", "i16", "i32", "i64"};
 
 const char *OP::m_NamePrefix = "dx.op.";
 const char *OP::m_TypePrefix = "dx.types.";
@@ -3040,82 +2654,110 @@ unsigned OP::GetTypeSlot(Type *pType) {
   Type::TypeID T = pType->getTypeID();
   switch (T) {
   case Type::VoidTyID:
-    return 0;
+    return TS_Invalid;
   case Type::HalfTyID:
-    return 1;
+    return TS_F16;
   case Type::FloatTyID:
-    return 2;
+    return TS_F32;
   case Type::DoubleTyID:
-    return 3;
+    return TS_F64;
   case Type::IntegerTyID: {
     IntegerType *pIT = dyn_cast<IntegerType>(pType);
     unsigned Bits = pIT->getBitWidth();
     switch (Bits) {
     case 1:
-      return 4;
+      return TS_I1;
     case 8:
-      return 5;
+      return TS_I8;
     case 16:
-      return 6;
+      return TS_I16;
     case 32:
-      return 7;
+      return TS_I32;
     case 64:
-      return 8;
+      return TS_I64;
     }
     llvm_unreachable("Invalid Bits size");
+    return TS_Invalid;
   }
   case Type::PointerTyID: {
     pType = cast<PointerType>(pType)->getElementType();
     if (pType->isStructTy())
-      return kUserDefineTypeSlot;
+      return TS_UDT;
     DXASSERT(!pType->isPointerTy(), "pointer-to-pointer type unsupported");
     return GetTypeSlot(pType);
   }
   case Type::StructTyID:
-    return kObjectTypeSlot;
+    // Named struct value (not pointer) indicates a built-in object type.
+    // Anonymous struct value is used to wrap multi-overload dimensions.
+    if (cast<StructType>(pType)->hasName())
+      return TS_Object;
+    else
+      return TS_Extended;
+  case Type::VectorTyID:
+    return TS_Vector;
   default:
     break;
   }
-  return UINT_MAX;
+  return TS_Invalid;
 }
 
 const char *OP::GetOverloadTypeName(unsigned TypeSlot) {
-  DXASSERT(TypeSlot < kUserDefineTypeSlot, "otherwise caller passed OOB index");
+  DXASSERT(TypeSlot < TS_BasicCount, "otherwise caller passed OOB index");
   return m_OverloadTypeName[TypeSlot];
 }
 
-llvm::StringRef OP::GetTypeName(Type *Ty, std::string &str) {
+StringRef OP::GetTypeName(Type *Ty, SmallVectorImpl<char> &Storage) {
+  DXASSERT(!Ty->isVoidTy(), "must not pass void type here");
   unsigned TypeSlot = OP::GetTypeSlot(Ty);
-  if (TypeSlot < kUserDefineTypeSlot) {
+  if (TypeSlot < TS_BasicCount) {
     return GetOverloadTypeName(TypeSlot);
-  } else if (TypeSlot == kUserDefineTypeSlot) {
+  } else if (TypeSlot == TS_UDT) {
     if (Ty->isPointerTy())
       Ty = Ty->getPointerElementType();
     StructType *ST = cast<StructType>(Ty);
     return ST->getStructName();
-  } else if (TypeSlot == kObjectTypeSlot) {
+  } else if (TypeSlot == TS_Object) {
     StructType *ST = cast<StructType>(Ty);
     return ST->getStructName();
+  } else if (TypeSlot == TS_Vector) {
+    VectorType *VecTy = cast<VectorType>(Ty);
+    return (Twine("v") + Twine(VecTy->getNumElements()) +
+            Twine(
+                GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType()))))
+        .toStringRef(Storage);
+  } else if (TypeSlot == TS_Extended) {
+    DXASSERT(isa<StructType>(Ty),
+             "otherwise, extended overload type not wrapped in struct type.");
+    StructType *ST = cast<StructType>(Ty);
+    DXASSERT(ST->getNumElements() <= DXIL::kDxilMaxOloadDims,
+             "otherwise, extended overload has too many dimensions.");
+    // Iterate extended slots, recurse, separate with '.'
+    raw_svector_ostream OS(Storage);
+    for (unsigned I = 0; I < ST->getNumElements(); ++I) {
+      if (I > 0)
+        OS << ".";
+      SmallVector<char, 32> TempStr;
+      OS << GetTypeName(ST->getElementType(I), TempStr);
+    }
+    return OS.str();
   } else {
-    raw_string_ostream os(str);
-    Ty->print(os);
-    os.flush();
-    return str;
+    raw_svector_ostream OS(Storage);
+    Ty->print(OS);
+    return OS.str();
   }
 }
 
-llvm::StringRef OP::ConstructOverloadName(Type *Ty, DXIL::OpCode opCode,
-                                          std::string &funcNameStorage) {
+StringRef OP::ConstructOverloadName(Type *Ty, DXIL::OpCode opCode,
+                                    SmallVectorImpl<char> &Storage) {
   if (Ty == Type::getVoidTy(Ty->getContext())) {
-    funcNameStorage =
-        (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode))).str();
+    return (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode)))
+        .toStringRef(Storage);
   } else {
-    funcNameStorage =
-        (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode)) + "." +
-         GetTypeName(Ty, funcNameStorage))
-            .str();
+    llvm::SmallVector<char, 64> TempStr;
+    return (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode)) + "." +
+            GetTypeName(Ty, TempStr))
+        .toStringRef(Storage);
   }
-  return funcNameStorage;
 }
 
 const char *OP::GetOpCodeName(OpCode opCode) {
@@ -3143,13 +2785,41 @@ llvm::Attribute::AttrKind OP::GetMemAccessAttr(OpCode opCode) {
 }
 
 bool OP::IsOverloadLegal(OpCode opCode, Type *pType) {
-  if (!pType)
+  if (static_cast<unsigned>(opCode) >=
+      static_cast<unsigned>(OpCode::NumOpCodes))
     return false;
-  if (opCode == OpCode::NumOpCodes)
+  if (!pType)
     return false;
-  unsigned TypeSlot = GetTypeSlot(pType);
-  return TypeSlot != UINT_MAX &&
-         m_OpCodeProps[(unsigned)opCode].bAllowOverload[TypeSlot];
+  auto &OpProps = m_OpCodeProps[static_cast<unsigned>(opCode)];
+
+  if (OpProps.NumOverloadDims == 0)
+    return pType->isVoidTy();
+
+  // Normalize 1+ overload dimensions into array.
+  Type *Types[DXIL::kDxilMaxOloadDims] = {pType};
+  if (OpProps.NumOverloadDims > 1) {
+    StructType *ST = dyn_cast<StructType>(pType);
+    // Make sure multi-overload is well-formed.
+    if (!ST || ST->hasName() || ST->getNumElements() != OpProps.NumOverloadDims)
+      return false;
+    for (unsigned I = 0; I < ST->getNumElements(); ++I)
+      Types[I] = ST->getElementType(I);
+  }
+
+  for (unsigned I = 0; I < OpProps.NumOverloadDims; ++I) {
+    Type *Ty = Types[I];
+    unsigned TypeSlot = GetTypeSlot(Ty);
+    if (!OpProps.AllowedOverloads[I][TypeSlot])
+      return false;
+    if (TypeSlot == TS_Vector) {
+      unsigned EltTypeSlot =
+          GetTypeSlot(cast<VectorType>(Ty)->getElementType());
+      if (!OpProps.AllowedVectorElements[I][EltTypeSlot])
+        return false;
+    }
+  }
+
+  return true;
 }
 
 bool OP::CheckOpCodeTable() {
@@ -3173,41 +2843,6 @@ bool OP::IsDxilOpFunc(const llvm::Function *F) {
   return IsDxilOpFuncName(F->getName());
 }
 
-bool OP::IsDxilOpTypeName(StringRef name) {
-  return name.startswith(m_TypePrefix) || name.startswith(m_MatrixTypePrefix);
-}
-
-bool OP::IsDxilOpType(llvm::StructType *ST) {
-  if (!ST->hasName())
-    return false;
-  StringRef Name = ST->getName();
-  return IsDxilOpTypeName(Name);
-}
-
-bool OP::IsDupDxilOpType(llvm::StructType *ST) {
-  if (!ST->hasName())
-    return false;
-  StringRef Name = ST->getName();
-  if (!IsDxilOpTypeName(Name))
-    return false;
-  size_t DotPos = Name.rfind('.');
-  if (DotPos == 0 || DotPos == StringRef::npos || Name.back() == '.' ||
-      !isdigit(static_cast<unsigned char>(Name[DotPos + 1])))
-    return false;
-  return true;
-}
-
-StructType *OP::GetOriginalDxilOpType(llvm::StructType *ST, llvm::Module &M) {
-  DXASSERT(IsDupDxilOpType(ST), "else should not call GetOriginalDxilOpType");
-  StringRef Name = ST->getName();
-  size_t DotPos = Name.rfind('.');
-  StructType *OriginalST = M.getTypeByName(Name.substr(0, DotPos));
-  DXASSERT(OriginalST, "else name collison without original type");
-  DXASSERT(ST->isLayoutIdentical(OriginalST),
-           "else invalid layout for dxil types");
-  return OriginalST;
-}
-
 bool OP::IsDxilOpFuncCallInst(const llvm::Instruction *I) {
   const CallInst *CI = dyn_cast<CallInst>(I);
   if (CI == nullptr)
@@ -3297,6 +2932,12 @@ bool OP::IsDxilOpBarrier(OpCode C) {
   // OPCODE-BARRIER:END
 }
 
+bool OP::IsDxilOpExtendedOverload(OpCode C) {
+  if (C >= OpCode::NumOpCodes)
+    return false;
+  return m_OpCodeProps[static_cast<unsigned>(C)].NumOverloadDims > 1;
+}
+
 static unsigned MaskMemoryTypeFlagsIfAllowed(unsigned memoryTypeFlags,
                                              unsigned allowedMask) {
   // If the memory type is AllMemory, masking inapplicable flags is allowed.
@@ -3945,13 +3586,12 @@ void OP::FixOverloadNames() {
     if (F.isDeclaration() && OP::IsDxilOpFunc(&F) && !F.user_empty()) {
       CallInst *CI = cast<CallInst>(*F.user_begin());
       DXIL::OpCode opCode = OP::GetDxilOpFuncCallInst(CI);
+      if (!MayHaveNonCanonicalOverload(opCode))
+        continue;
       llvm::Type *Ty = OP::GetOverloadType(opCode, &F);
       if (!OP::IsOverloadLegal(opCode, Ty))
         continue;
-      if (!isa<StructType>(Ty) && !isa<PointerType>(Ty))
-        continue;
-
-      std::string funcName;
+      SmallVector<char, 256> funcName;
       if (OP::ConstructOverloadName(Ty, opCode, funcName)
               .compare(F.getName()) != 0)
         F.setName(funcName);
@@ -3964,11 +3604,54 @@ void OP::UpdateCache(OpCodeClass opClass, Type *Ty, llvm::Function *F) {
   m_FunctionToOpClass[F] = opClass;
 }
 
+bool OP::MayHaveNonCanonicalOverload(OpCode OC) {
+  if (OC >= OpCode::NumOpCodes)
+    return false;
+  const unsigned CheckMask = (1 << TS_UDT) | (1 << TS_Object);
+  auto &OpProps = m_OpCodeProps[static_cast<unsigned>(OC)];
+  for (unsigned I = 0; I < OpProps.NumOverloadDims; ++I)
+    if ((CheckMask & OpProps.AllowedOverloads[I].SlotMask) != 0)
+      return true;
+  return false;
+}
+
+Function *OP::GetOpFunc(OpCode OC, ArrayRef<Type *> OverloadTypes) {
+  if (OC >= OpCode::NumOpCodes)
+    return nullptr;
+  if (OverloadTypes.size() !=
+      m_OpCodeProps[static_cast<unsigned>(OC)].NumOverloadDims) {
+    llvm_unreachable("incorrect overload dimensions");
+    return nullptr;
+  }
+  if (OverloadTypes.size() == 0) {
+    return GetOpFunc(OC, Type::getVoidTy(m_Ctx));
+  } else if (OverloadTypes.size() == 1) {
+    return GetOpFunc(OC, OverloadTypes[0]);
+  }
+  return GetOpFunc(OC, GetExtendedOverloadType(OverloadTypes));
+}
+
 Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
-  if (opCode == OpCode::NumOpCodes)
+  if (opCode >= OpCode::NumOpCodes)
     return nullptr;
   if (!pOverloadType)
     return nullptr;
+
+  auto &OpProps = m_OpCodeProps[static_cast<unsigned>(opCode)];
+  if (IsDxilOpExtendedOverload(opCode)) {
+    // Make sure pOverloadType is well formed for an extended overload.
+    StructType *ST = dyn_cast<StructType>(pOverloadType);
+    DXASSERT(ST != nullptr,
+             "otherwise, extended overload type is not a struct");
+    if (ST == nullptr)
+      return nullptr;
+    bool EltCountValid = ST->getNumElements() == OpProps.NumOverloadDims;
+    DXASSERT(EltCountValid,
+             "otherwise, incorrect type count for extended overload.");
+    if (!EltCountValid)
+      return nullptr;
+  }
+
   // Illegal overloads are generated and eliminated by DXIL op constant
   // evaluation for a number of cases where a double overload of an HL intrinsic
   // that otherwise does not support double is used for literal values, when
@@ -3976,7 +3659,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
   // Illegal overloads of DXIL intrinsics may survive through to final DXIL,
   // but these will be caught by the validator, and this is not a regression.
 
-  OpCodeClass opClass = m_OpCodeProps[(unsigned)opCode].opCodeClass;
+  OpCodeClass opClass = OpProps.opCodeClass;
   Function *&F =
       m_OpCodeClassCache[(unsigned)opClass].pOverloads[pOverloadType];
   if (F != nullptr) {
@@ -3984,7 +3667,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     return F;
   }
 
-  vector<Type *> ArgTypes; // RetType is ArgTypes[0]
+  SmallVector<Type *, 32> ArgTypes; // RetType is ArgTypes[0]
   Type *pETy = pOverloadType;
   Type *pRes = GetHandleType();
   Type *pNodeHandle = GetNodeHandleType();
@@ -4020,7 +3703,10 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
 #define A(_x) ArgTypes.emplace_back(_x)
 #define RRT(_y) A(GetResRetType(_y))
 #define CBRT(_y) A(GetCBufferRetType(_y))
-#define VEC4(_y) A(GetVectorType(4, _y))
+#define VEC4(_y) A(GetStructVectorType(4, _y))
+
+// Extended Overload types are wrapped in an anonymous struct
+#define EXT(_y) A(cast<StructType>(pOverloadType)->getElementType(_y))
 
   /* <py::lines('OPCODE-OLOAD-FUNCS')>hctdb_instrhelp.get_oloads_funcs()</py>*/
   switch (opCode) { // return     opCode
@@ -6066,14 +5752,15 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
   pFT = FunctionType::get(
       ArgTypes[0], ArrayRef<Type *>(&ArgTypes[1], ArgTypes.size() - 1), false);
 
-  std::string funcName;
-  ConstructOverloadName(pOverloadType, opCode, funcName);
+  SmallVector<char, 256> FuncStorage;
+  StringRef FuncName =
+      ConstructOverloadName(pOverloadType, opCode, FuncStorage);
 
   // Try to find existing function with the same name in the module.
   // This needs to happen after the switch statement that constructs arguments
   // and return values to ensure that ResRetType is constructed in the
   // RefreshCache case.
-  if (Function *existF = m_pModule->getFunction(funcName)) {
+  if (Function *existF = m_pModule->getFunction(FuncName)) {
     if (existF->getFunctionType() != pFT)
       return nullptr;
     F = existF;
@@ -6081,13 +5768,13 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     return F;
   }
 
-  F = cast<Function>(m_pModule->getOrInsertFunction(funcName, pFT));
+  F = cast<Function>(m_pModule->getOrInsertFunction(FuncName, pFT));
 
   UpdateCache(opClass, pOverloadType, F);
   F->setCallingConv(CallingConv::C);
   F->addFnAttr(Attribute::NoUnwind);
-  if (m_OpCodeProps[(unsigned)opCode].FuncAttr != Attribute::None)
-    F->addFnAttr(m_OpCodeProps[(unsigned)opCode].FuncAttr);
+  if (OpProps.FuncAttr != Attribute::None)
+    F->addFnAttr(OpProps.FuncAttr);
 
   return F;
 }
@@ -6494,62 +6181,91 @@ Type *OP::GetFourI32Type() const { return m_pFourI32Type; }
 Type *OP::GetFourI16Type() const { return m_pFourI16Type; }
 
 bool OP::IsResRetType(llvm::Type *Ty) {
+  if (!Ty->isStructTy())
+    return false;
   for (Type *ResTy : m_pResRetType) {
     if (Ty == ResTy)
       return true;
   }
-  return false;
+  // Check for vector overload which isn't cached in m_pResRetType.
+  StructType *ST = cast<StructType>(Ty);
+  if (!ST->hasName() || ST->getNumElements() < 2 ||
+      !ST->getElementType(0)->isVectorTy())
+    return false;
+  return Ty == GetResRetType(ST->getElementType(0));
 }
 
 Type *OP::GetResRetType(Type *pOverloadType) {
   unsigned TypeSlot = GetTypeSlot(pOverloadType);
 
-  if (m_pResRetType[TypeSlot] == nullptr) {
-    string TypeName("dx.types.ResRet.");
-    TypeName += GetOverloadTypeName(TypeSlot);
-    Type *FieldTypes[5] = {pOverloadType, pOverloadType, pOverloadType,
-                           pOverloadType, Type::getInt32Ty(m_Ctx)};
-    m_pResRetType[TypeSlot] =
-        GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
+  if (TypeSlot < TS_BasicCount) {
+    if (m_pResRetType[TypeSlot] == nullptr) {
+      SmallVector<char, 32> Storage;
+      StringRef TypeName =
+          (Twine("dx.types.ResRet.") + Twine(GetOverloadTypeName(TypeSlot)))
+              .toStringRef(Storage);
+      Type *FieldTypes[5] = {pOverloadType, pOverloadType, pOverloadType,
+                             pOverloadType, Type::getInt32Ty(m_Ctx)};
+      m_pResRetType[TypeSlot] =
+          GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
+    }
+    return m_pResRetType[TypeSlot];
+  } else if (TypeSlot == TS_Vector) {
+    SmallVector<char, 32> Storage;
+    VectorType *VecTy = cast<VectorType>(pOverloadType);
+    StringRef TypeName =
+        (Twine("dx.types.ResRet.v") + Twine(VecTy->getNumElements()) +
+         Twine(GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType()))))
+            .toStringRef(Storage);
+    Type *FieldTypes[2] = {pOverloadType, Type::getInt32Ty(m_Ctx)};
+    return GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
   }
 
-  return m_pResRetType[TypeSlot];
+  llvm_unreachable("Invalid overload for GetResRetType");
+  return nullptr;
 }
 
 Type *OP::GetCBufferRetType(Type *pOverloadType) {
   unsigned TypeSlot = GetTypeSlot(pOverloadType);
 
+  if (TypeSlot >= TS_BasicCount) {
+    llvm_unreachable("Invalid overload for GetResRetType");
+    return nullptr;
+  }
+
   if (m_pCBufferRetType[TypeSlot] == nullptr) {
     DXASSERT(m_LowPrecisionMode != DXIL::LowPrecisionMode::Undefined,
              "m_LowPrecisionMode must be set before constructing type.");
-    string TypeName("dx.types.CBufRet.");
-    TypeName += GetOverloadTypeName(TypeSlot);
+    SmallVector<char, 32> Storage;
+    raw_svector_ostream OS(Storage);
+    OS << "dx.types.CBufRet.";
+    OS << GetOverloadTypeName(TypeSlot);
     Type *i64Ty = Type::getInt64Ty(pOverloadType->getContext());
     Type *i16Ty = Type::getInt16Ty(pOverloadType->getContext());
     if (pOverloadType->isDoubleTy() || pOverloadType == i64Ty) {
       Type *FieldTypes[2] = {pOverloadType, pOverloadType};
       m_pCBufferRetType[TypeSlot] =
-          GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
+          GetOrCreateStructType(m_Ctx, FieldTypes, OS.str(), m_pModule);
     } else if (!UseMinPrecision() &&
                (pOverloadType->isHalfTy() || pOverloadType == i16Ty)) {
-      TypeName += ".8"; // dx.types.CBufRet.fp16.8 for buffer of 8 halves
+      OS << ".8"; // dx.types.CBufRet.f16.8 for buffer of 8 halves
       Type *FieldTypes[8] = {
           pOverloadType, pOverloadType, pOverloadType, pOverloadType,
           pOverloadType, pOverloadType, pOverloadType, pOverloadType,
       };
       m_pCBufferRetType[TypeSlot] =
-          GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
+          GetOrCreateStructType(m_Ctx, FieldTypes, OS.str(), m_pModule);
     } else {
       Type *FieldTypes[4] = {pOverloadType, pOverloadType, pOverloadType,
                              pOverloadType};
       m_pCBufferRetType[TypeSlot] =
-          GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
+          GetOrCreateStructType(m_Ctx, FieldTypes, OS.str(), m_pModule);
     }
   }
   return m_pCBufferRetType[TypeSlot];
 }
 
-Type *OP::GetVectorType(unsigned numElements, Type *pOverloadType) {
+Type *OP::GetStructVectorType(unsigned numElements, Type *pOverloadType) {
   if (numElements == 4) {
     if (pOverloadType == Type::getInt32Ty(pOverloadType->getContext())) {
       return m_pFourI32Type;
@@ -6561,6 +6277,10 @@ Type *OP::GetVectorType(unsigned numElements, Type *pOverloadType) {
   return nullptr;
 }
 
+StructType *OP::GetExtendedOverloadType(ArrayRef<Type *> OverloadTypes) {
+  return StructType::get(m_Ctx, OverloadTypes);
+}
+
 //------------------------------------------------------------------------------
 //
 //  LLVM utility methods.
diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
index 4622256dfe..cac074adc3 100644
--- a/lib/DxilValidation/DxilValidation.cpp
+++ b/lib/DxilValidation/DxilValidation.cpp
@@ -2037,7 +2037,7 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrOload);
         continue;
       }
-      dxilFunc = hlslOP->GetOpFunc(dxilOpcode, Ty->getScalarType());
+      dxilFunc = hlslOP->GetOpFunc(dxilOpcode, Ty);
     }
 
     if (!dxilFunc) {
@@ -2109,17 +2109,20 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) {
     return true;
 
   unsigned EltNum = ST->getNumElements();
+  Type *EltTy = ST->getElementType(0);
   switch (EltNum) {
   case 2:
+    // Check if it's a native vector resret.
+    if (EltTy->isVectorTy())
+      return ST == hlslOP->GetResRetType(EltTy);
+    LLVM_FALLTHROUGH;
   case 4:
-  case 8: { // 2 for doubles, 8 for halfs.
-    Type *EltTy = ST->getElementType(0);
+  case 8: // 2 for doubles, 8 for halfs.
     return ST == hlslOP->GetCBufferRetType(EltTy);
-  } break;
-  case 5: {
-    Type *EltTy = ST->getElementType(0);
+    break;
+  case 5:
     return ST == hlslOP->GetResRetType(EltTy);
-  } break;
+    break;
   default:
     return false;
   }
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index e32ab1915a..05bc7d472d 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -37,6 +37,30 @@
     "array_local_ldst",
 ]
 
+# These are the valid overload type characters for DXIL instructions.
+# - "v" is for void, and can only be used alone.
+# - "u" is for user defined type (UDT), and is mutually exclusive with the other
+#   types.
+# - "o" is for an HLSL object type (e.g. Texture, Sampler, etc.), and is
+#   mutually exclusive with the other types.
+# - "<" is for vector overloads, and may be followed by a set of supported
+#   component types.
+#   - If "<" is not followed by any component types, any preceding scalar types
+#     are used.
+#   - Vector component types are captured into a separate list during
+#     processing.
+# - "," is used to separate multiple overload dimensions.
+#   - When used, only $x0, $x1, etc. are supported for overloaded parameter
+#     types.
+# dxil_all_user_oload_chars must be kept in sync with the indices in
+# hlsl::OP::TypeSlot in DxilOperations.h.
+dxil_all_user_oload_chars = "hfd18wiluo<"
+dxil_scalar_oload_chars = "hfd18wil"
+
+# Maximum number of overload dimensions supported through the extended overload
+# in DXIL instructions.
+dxil_max_overload_dims = 2
+
 
 class db_dxil_enum_value(object):
     "A representation for a value in an enumeration type"
@@ -81,6 +105,7 @@ def __init__(self, name, **kwargs):
         self.ops = []  # the operands that this instruction takes
         self.is_allowed = True  # whether this instruction is allowed in a DXIL program
         self.oload_types = ""  # overload types if applicable
+        # Always call process_oload_types() after setting oload_types.
         self.fn_attr = ""  # attribute shorthands: rn=does not access memory,ro=only reads from memory,
         self.is_deriv = False  # whether this is some kind of derivative
         self.is_gradient = False  # whether this requires a gradient calculation
@@ -98,6 +123,9 @@ def __init__(self, name, **kwargs):
         self.is_reserved = self.dxil_class == "Reserved"
         self.shader_model_translated = ()  # minimum shader model required with translation by linker
         self.props = {}  # extra properties
+        self.num_oloads = 0  # number of overloads for this instruction
+        if self.is_dxil_op:
+            self.process_oload_types()
 
     def __str__(self):
         return self.name
@@ -105,6 +133,127 @@ def __str__(self):
     def fully_qualified_name(self):
         return "{}::{}".format(self.fully_qualified_name_prefix, self.name)
 
+    def process_oload_types(self):
+        if type(self.oload_types) is not str:
+            raise ValueError(
+                f"overload for '{self.name}' should be a string - use empty if n/a"
+            )
+        # Early out for LLVM instructions
+        if not self.is_dxil_op:
+            return
+
+        self.num_oloads = 0
+
+        # Early out for void overloads.
+        if self.oload_types == "v":
+            return
+
+        if self.oload_types == "":
+            raise ValueError(
+                f"overload for '{self.name}' should not be empty - use void if n/a"
+            )
+        if "v" in self.oload_types:
+            raise ValueError(
+                f"void overload should be exclusive to other types for '({self.name})'"
+            )
+
+        # Process oload_types for extended and vector overloads.
+        # Contrived example: "hf<,<fd,i<1"
+        #   - "," splits multiple overload dimensions
+        #   - In the first overload dimension "hf<":
+        #     - "hf" means overloads for scalar half and float
+        #     - ending with "<" means vector overload supporting the same
+        #       components as defined for the scalar overload types.
+        #   - In the second overload dimension "<fd":
+        #     - starting with "<" means only vector overloads are supported.
+        #     - "fd" means the vector supports float or double components.
+        #   - In the third overload dimension "i<1":
+        #     - "i" means it supports a scalar i32 overload
+        #     - "<1" means it also supports a vector overload with an i1
+        #       component type.
+        oload_types = self.oload_types.split(",")
+        self.num_oloads = len(oload_types)
+        if self.num_oloads > dxil_max_overload_dims:
+            raise ValueError(
+                "Too many overload dimensions for DXIL op "
+                f"{self.name}: '{self.oload_types}'"
+            )
+
+        def check_duplicate_overloads(oloads):
+            if len(oloads) != len(set(oloads)):
+                raise ValueError(
+                    "Duplicate overload types specified for DXIL op "
+                    f"{self.name}: '{oloads}' in '{self.oload_types}'"
+                )
+
+        def check_overload_chars(oloads, valid_chars):
+            invalid_chars = set(oloads).difference(set(valid_chars))
+            if invalid_chars:
+                raise ValueError(
+                    "Invalid overload type character(s) used for DXIL op "
+                    f"{self.name}: '{invalid_chars}' in '{oloads}' from "
+                    f"'{self.oload_types}'"
+                )
+
+        for n, oloads in enumerate(oload_types):
+            if len(oloads) == 0:
+                raise ValueError(
+                    f"Invalid empty overload type for DXIL op "
+                    f"{self.name}: '{self.oload_types}'"
+                )
+            check_overload_chars(oloads, dxil_all_user_oload_chars)
+
+            # split at vector for component overloads, if vector specified
+            # without following components, use the scalar overloads that
+            # precede the vector character.
+            split = oloads.split("<")
+            if len(split) == 1:
+                # No vector overload.
+                continue
+            elif len(split) != 2:
+                raise ValueError(
+                    f"Invalid vector overload for DXIL op {self.name}: "
+                    f"{oloads} in '{self.oload_types}'"
+                )
+
+            # Split into scalar and vector component overloads.
+            scalars, vector_oloads = split
+            check_duplicate_overloads(scalars)
+            if not vector_oloads:
+                vector_oloads = scalars
+            else:
+                check_duplicate_overloads(vector_oloads)
+            if not vector_oloads:
+                raise ValueError(
+                    "No scalar overload types provided with vector overload "
+                    f"for DXIL op {self.name}: '{self.oload_types}'"
+                )
+            check_overload_chars(vector_oloads, dxil_scalar_oload_chars)
+            oload_types[n] = scalars + "<" + vector_oloads
+        # Reconstruct overload string with default vector overloads.
+        self.oload_types = ",".join(oload_types)
+        self.check_extended_oload_ops()
+
+    def check_extended_oload_ops(self):
+        "Ensure ops has sequential extended overload references with $x0, $x1, etc."
+        if self.num_oloads < 2:
+            return
+        next_oload_idx = 0
+        for i in self.ops:
+            if i.llvm_type.startswith("$x"):
+                if i.llvm_type != "$x" + str(next_oload_idx):
+                    raise ValueError(
+                        "Extended overloads are not sequentially referenced in "
+                        f"DXIL op {self.name}: {i.llvm_type} != $x{next_oload_idx}"
+                    )
+                next_oload_idx += 1
+        if next_oload_idx != self.num_oloads:
+            raise ValueError(
+                "Extended overloads are not referenced for all overload "
+                f"dimensions in DXIL op {self.name}: {next_oload_idx} != "
+                f"{self.num_oloads}"
+            )
+
 
 class db_dxil_metadata(object):
     "A representation for a metadata record"
@@ -477,9 +626,7 @@ def populate_categories_and_models(self):
                 "closesthit",
             )
         for i in "GeometryIndex".split(","):
-            self.name_idx[
-                i
-            ].category = (
+            self.name_idx[i].category = (
                 "Raytracing object space uint System Values, raytracing tier 1.1"
             )
             self.name_idx[i].shader_model = 6, 5
@@ -574,9 +721,7 @@ def populate_categories_and_models(self):
             self.name_idx[i].shader_model = 6, 3
             self.name_idx[i].shader_stages = ("library", "intersection")
         for i in "CreateHandleForLib".split(","):
-            self.name_idx[
-                i
-            ].category = (
+            self.name_idx[i].category = (
                 "Library create handle from resource struct (like HL intrinsic)"
             )
             self.name_idx[i].shader_model = 6, 3
@@ -5652,18 +5797,6 @@ def UFI(name, **mappings):
         )
         for i in self.instr:
             self.verify_dense(i.ops, lambda x: x.pos, lambda x: i.name)
-        for i in self.instr:
-            if i.is_dxil_op:
-                assert i.oload_types != "", (
-                    "overload for DXIL operation %s should not be empty - use void if n/a"
-                    % (i.name)
-                )
-                assert i.oload_types == "v" or i.oload_types.find("v") < 0, (
-                    "void overload should be exclusive to other types (%s)" % i.name
-                )
-            assert (
-                type(i.oload_types) is str
-            ), "overload for %s should be a string - use empty if n/a" % (i.name)
 
         # Verify that all operations in each class have the same signature.
         import itertools
@@ -8391,6 +8524,7 @@ def __init__(
         self.template_id_idx = template_id_idx  # Template ID numeric value
         self.component_id_idx = component_id_idx  # Component ID numeric value
 
+
 class db_hlsl(object):
     "A database of HLSL language data"
 
diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py
index 4580e6c12c..f0d8b0ebae 100644
--- a/utils/hct/hctdb_instrhelp.py
+++ b/utils/hct/hctdb_instrhelp.py
@@ -40,8 +40,10 @@ def get_hlsl_opcode_data():
             g_hlsl_opcode_data = {}
     return g_hlsl_opcode_data
 
+
 g_db_hlsl = None
 
+
 def get_db_hlsl():
     global g_db_hlsl
     if g_db_hlsl is None:
@@ -51,6 +53,10 @@ def get_db_hlsl():
     return g_db_hlsl
 
 
+def get_max_oload_dims():
+    return f"const unsigned kDxilMaxOloadDims = {dxil_max_overload_dims};"
+
+
 def format_comment(prefix, val):
     "Formats a value with a line-comment prefix."
     result = ""
@@ -507,26 +513,15 @@ def print_opfunc_props(self):
                 OP=self.OP
             )
         )
-        print(
-            "//   OpCode                       OpCode name,                OpCodeClass                    OpCodeClass name,              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj,  function attribute"
-        )
-        # Example formatted string:
-        #   {  OC::TempRegLoad,             "TempRegLoad",              OCC::TempRegLoad,              "tempRegLoad",                false,  true,  true, false,  true, false,  true,  true, false, Attribute::ReadOnly, },
-        # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
-        # 0         1         2         3         4         5         6         7         8         9         0         1         2         3         4         5         6         7         8         9         0
 
         last_category = None
-        # overload types are a string of (v)oid, (h)alf, (f)loat, (d)ouble, (1)-bit, (8)-bit, (w)ord, (i)nt, (l)ong, u(dt)
-        f = lambda i, c: "true" if i.oload_types.find(c) >= 0 else "false"
         lower_exceptions = {
             "CBufferLoad": "cbufferLoad",
             "CBufferLoadLegacy": "cbufferLoadLegacy",
             "GSInstanceID": "gsInstanceID",
         }
-        lower_fn = (
-            lambda t: lower_exceptions[t]
-            if t in lower_exceptions
-            else t[:1].lower() + t[1:]
+        lower_fn = lambda t: (
+            lower_exceptions[t] if t in lower_exceptions else t[:1].lower() + t[1:]
         )
         attr_dict = {
             "": "None",
@@ -537,35 +532,47 @@ def print_opfunc_props(self):
             "nr": "NoReturn",
             "wv": "None",
         }
-        attr_fn = lambda i: "Attribute::" + attr_dict[i.fn_attr] + ","
+        attr_fn = lambda i: "Attribute::" + attr_dict[i.fn_attr]
+        oload_to_mask = lambda oload: sum(
+            [1 << dxil_all_user_oload_chars.find(c) for c in oload]
+        )
+        oloads_fn = lambda oloads: (
+            "{" + ",".join(["{0x%x}" % m for m in oloads]) + "}"
+        )
         for i in self.instrs:
             if last_category != i.category:
                 if last_category != None:
                     print("")
-                print(
-                    "  // {category:118}  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute".format(
-                        category=i.category
-                    )
-                )
+                if not i.is_reserved:
+                    print(f"  // {i.category}")
                 last_category = i.category
+            scalar_masks = []
+            vector_masks = []
+            if i.num_oloads > 0:
+                for n, o in enumerate(i.oload_types.split(",")):
+                    if "<" in o:
+                        v = o.split("<")
+                        scalar_masks.append(oload_to_mask(v[0] + "<"))
+                        vector_masks.append(oload_to_mask(v[1]))
+                    else:
+                        scalar_masks.append(oload_to_mask(o))
+                        vector_masks.append(0)
             print(
-                "  {{  {OC}::{name:24} {quotName:27} {OCC}::{className:25} {classNameQuot:28} {{{v:>6},{h:>6},{f:>6},{d:>6},{b:>6},{e:>6},{w:>6},{i:>6},{l:>6},{u:>6},{o:>6}}}, {attr:20} }},".format(
+                (
+                    "  {{  {OC}::{name:24} {quotName:27} {OCC}::{className:25} "
+                    + "{classNameQuot:28} {attr:20}, {num_oloads}, "
+                    + "{scalar_masks:16}, {vector_masks:16} }}, "
+                    + "// Overloads: {oloads}"
+                ).format(
                     name=i.name + ",",
                     quotName='"' + i.name + '",',
                     className=i.dxil_class + ",",
                     classNameQuot='"' + lower_fn(i.dxil_class) + '",',
-                    v=f(i, "v"),
-                    h=f(i, "h"),
-                    f=f(i, "f"),
-                    d=f(i, "d"),
-                    b=f(i, "1"),
-                    e=f(i, "8"),
-                    w=f(i, "w"),
-                    i=f(i, "i"),
-                    l=f(i, "l"),
-                    u=f(i, "u"),
-                    o=f(i, "o"),
                     attr=attr_fn(i),
+                    num_oloads=i.num_oloads,
+                    scalar_masks=oloads_fn(scalar_masks),
+                    vector_masks=oloads_fn(vector_masks),
+                    oloads=i.oload_types,
                     OC=self.OC,
                     OCC=self.OCC,
                 )
@@ -621,6 +628,9 @@ def print_opfunc_table(self):
             "nodeproperty": "A(nodeProperty);",
             "noderecordproperty": "A(nodeRecordProperty);",
             "hit_object": "A(pHit);",
+            # Extended overload slots, extend as needed:
+            "$x0": "EXT(0);",
+            "$x1": "EXT(1);",
         }
         last_category = None
         for i in self.instrs:
@@ -651,14 +661,24 @@ def print_opfunc_oload_type(self):
         obj_ty = "obj"
         vec_ty = "$vec"
         gsptr_ty = "$gsptr"
+        extended_ty = "$x"
         last_category = None
 
         index_dict = collections.OrderedDict()
         ptr_index_dict = collections.OrderedDict()
         single_dict = collections.OrderedDict()
+        # extended_dict collects overloads with multiple overload types
+        # grouped by the set of overload parameter indices.
+        extended_dict = collections.OrderedDict()
         struct_list = []
+        extended_list = []
 
         for instr in self.instrs:
+            if instr.num_oloads > 1:
+                # Process extended overloads separately.
+                extended_list.append(instr)
+                continue
+
             ret_ty = instr.ops[0].llvm_type
             # Skip case return type is overload type
             if ret_ty == elt_ty:
@@ -730,8 +750,7 @@ def print_opfunc_oload_type(self):
                 "i": "IntegerType::get(Ctx, 32)",
                 "l": "IntegerType::get(Ctx, 64)",
                 "v": "Type::getVoidTy(Ctx)",
-                "u": "Type::getInt32PtrTy(Ctx)",
-                "o": "Type::getInt32PtrTy(Ctx)",
+                # No other types should be referenced here.
             }
             assert ty in type_code_texts, "llvm type %s is unknown" % (ty)
             ty_code = type_code_texts[ty]
@@ -791,6 +810,61 @@ def print_opfunc_oload_type(self):
         line = line + "}"
         print(line)
 
+        for instr in extended_list:
+            # Collect indices for overloaded return and types, make a tuple of
+            # indices the key, and add the opcode to a list of opcodes for that
+            # key.  Indices start with 0 for return type, and 1 for the first
+            # function parameter, which is the DXIL OpCode.
+            indices = []
+            for index, op in enumerate(instr.ops):
+                # Skip dxil opcode.
+                if op.pos == 1:
+                    continue
+
+                op_type = op.llvm_type
+                if op_type.startswith(extended_ty):
+                    try:
+                        extended_index = int(op_type[2:])
+                    except:
+                        raise ValueError(
+                            "Error parsing extended operand type "
+                            + f"'{op_type}' for DXIL op '{instr.name}'"
+                        )
+                    if extended_index != len(indices):
+                        raise ValueError(
+                            f"'$x{extended_index}' is not in sequential "
+                            + f"order for DXIL op '{instr.name}'"
+                        )
+                    indices.append(op.pos)
+
+            if len(indices) != instr.num_oloads:
+                raise ValueError(
+                    f"DXIL op {instr.name}: extended overload count "
+                    + "mismatches the number of overload types"
+                )
+            extended_dict.setdefault(tuple(indices), []).append(instr.name)
+
+        def get_type_at_index(index):
+            if index == 0:
+                return "FT->getReturnType()"
+            return f"FT->getParamType({index - 1})"
+
+        for index_tuple, opcodes in extended_dict.items():
+            line = ""
+            for opcode in opcodes:
+                line = line + f"case OpCode::{opcode}:\n"
+            if index_tuple[-1] > 0:
+                line += (
+                    f"  if (FT->getNumParams() < {index_tuple[-1]})\n"
+                    + "    return nullptr;\n"
+                )
+            line += (
+                "  return llvm::StructType::get(Ctx, {"
+                + ", ".join([get_type_at_index(index) for index in index_tuple])
+                + "});\n"
+            )
+            print(line)
+
 
 class db_valfns_gen:
     "A generator of validation functions."
@@ -1599,6 +1673,7 @@ def get_highest_released_shader_model():
     )
     return result
 
+
 def get_highest_shader_model():
     result = """static const unsigned kHighestMajor = %d;
 static const unsigned kHighestMinor = %d;""" % (
@@ -1607,6 +1682,7 @@ def get_highest_shader_model():
     )
     return result
 
+
 def get_dxil_version_minor():
     return "const unsigned kDxilMinor = %d;" % highest_minor
 

From a13938dd6bcd08b12ef086c834c35859f050ff3f Mon Sep 17 00:00:00 2001
From: Jeff Noyle <jeffno@microsoft.com>
Date: Tue, 1 Apr 2025 12:55:48 -0700
Subject: [PATCH 59/88] PIX: Check for existing PIX UAV in roots sigs before
 adding it again (#7238)

The DXR invocation counting pass calls a function to add an output UAV
twice. As part of adding the UAV, any DXIL-defined rootsigs will be
extended to include this new UAV. If the UAV already exists in the
rootsig, we should not add it again. (Doing so results in root sig that
will fail validation.)

Note: the test is not a file-check style because dxil-defined subobjects
don't get rehydrated into the DxilModule when the output of dxc.exe is
piped into the input of opt.exe, meaning that the broken case can't be
exercised.
---
 lib/DxilPIXPasses/PixPassHelpers.cpp        | 12 +++++++
 tools/clang/unittests/HLSL/PixTest.cpp      | 40 ++++++++++++++++++++-
 tools/clang/unittests/HLSL/PixTestUtils.cpp |  2 +-
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/lib/DxilPIXPasses/PixPassHelpers.cpp b/lib/DxilPIXPasses/PixPassHelpers.cpp
index dfb4b3aa83..69385ae048 100644
--- a/lib/DxilPIXPasses/PixPassHelpers.cpp
+++ b/lib/DxilPIXPasses/PixPassHelpers.cpp
@@ -199,6 +199,18 @@ constexpr uint32_t toolsUAVRegister = 0;
 template <typename RootSigDesc, typename RootParameterDesc>
 void ExtendRootSig(RootSigDesc &rootSigDesc) {
   auto *existingParams = rootSigDesc.pParameters;
+  for (uint32_t i = 0; i < rootSigDesc.NumParameters; ++i) {
+    if (rootSigDesc.pParameters[i].ParameterType ==
+        DxilRootParameterType::UAV) {
+      if (rootSigDesc.pParameters[i].Descriptor.RegisterSpace ==
+              toolsRegisterSpace &&
+          rootSigDesc.pParameters[i].Descriptor.ShaderRegister ==
+              toolsUAVRegister) {
+        // Already added
+        return;
+      }
+    }
+  }
   auto *newParams = new RootParameterDesc[rootSigDesc.NumParameters + 1];
   if (existingParams != nullptr) {
     memcpy(newParams, existingParams,
diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp
index bb81c1c953..b97aa70c05 100644
--- a/tools/clang/unittests/HLSL/PixTest.cpp
+++ b/tools/clang/unittests/HLSL/PixTest.cpp
@@ -146,6 +146,7 @@ class PixTest : public ::testing::Test {
   TEST_METHOD(RootSignatureUpgrade_Annotation)
 
   TEST_METHOD(DxilPIXDXRInvocationsLog_SanityTest)
+  TEST_METHOD(DxilPIXDXRInvocationsLog_EmbeddedRootSigs)
 
   TEST_METHOD(DebugInstrumentation_TextOutput)
   TEST_METHOD(DebugInstrumentation_BlockReport)
@@ -660,7 +661,7 @@ CComPtr<IDxcBlob> PixTest::RunDxilPIXDXRInvocationsLog(IDxcBlob *blob) {
   CComPtr<IDxcBlob> pOptimizedModule;
   CComPtr<IDxcBlobEncoding> pText;
   VERIFY_SUCCEEDED(pOptimizer->RunOptimizer(
-      dxil, Options.data(), Options.size(), &pOptimizedModule, &pText));
+      blob, Options.data(), Options.size(), &pOptimizedModule, &pText));
 
   std::string outputText;
   if (pText->GetBufferSize() != 0) {
@@ -2945,6 +2946,43 @@ void MyMiss(inout MyPayload payload)
   RunDxilPIXDXRInvocationsLog(compiledLib);
 }
 
+TEST_F(PixTest, DxilPIXDXRInvocationsLog_EmbeddedRootSigs) {
+
+  const char *source = R"x(
+
+GlobalRootSignature grs = {"CBV(b0)"};
+struct MyPayload
+{
+    float4 color;
+};
+
+[shader("raygeneration")]
+void MyRayGen()
+{
+}
+
+[shader("closesthit")]
+void MyClosestHit(inout MyPayload payload, in BuiltInTriangleIntersectionAttributes attr)
+{
+}
+
+[shader("anyhit")]
+void MyAnyHit(inout MyPayload payload, in BuiltInTriangleIntersectionAttributes attr)
+{
+}
+
+[shader("miss")]
+void MyMiss(inout MyPayload payload)
+{
+}
+
+)x";
+
+  auto compiledLib = Compile(m_dllSupport, source, L"lib_6_3",
+                             {L"-Qstrip_reflect"}, L"RootSig");
+  RunDxilPIXDXRInvocationsLog(compiledLib);
+}
+
 TEST_F(PixTest, DebugInstrumentation_TextOutput) {
 
   const char *source = R"x(
diff --git a/tools/clang/unittests/HLSL/PixTestUtils.cpp b/tools/clang/unittests/HLSL/PixTestUtils.cpp
index 91b6c4479c..61647ff5fa 100644
--- a/tools/clang/unittests/HLSL/PixTestUtils.cpp
+++ b/tools/clang/unittests/HLSL/PixTestUtils.cpp
@@ -397,7 +397,7 @@ CComPtr<IDxcBlob> Compile(dxc::DxcDllSupport &dllSupport, const char *hlsl,
       CheckOperationSucceeded(pResult, &pProgram);
 
       CComPtr<IDxcLibrary> pLib;
-      VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcLibrary, &pLib));
+      VERIFY_SUCCEEDED(dllSupport.CreateInstance(CLSID_DxcLibrary, &pLib));
       const hlsl::DxilContainerHeader *pContainer = hlsl::IsDxilContainerLike(
           pProgram->GetBufferPointer(), pProgram->GetBufferSize());
       VERIFY_IS_NOT_NULL(pContainer);

From 2f357a9d625eaaa982ce1fac513e5f77a7d81900 Mon Sep 17 00:00:00 2001
From: Antonio Maiorano <amaiorano@google.com>
Date: Tue, 1 Apr 2025 21:38:26 -0400
Subject: [PATCH 60/88] Fix assert due to unreachable discard (#7289)

When emitting discard in an unreachable code context (e.g. after an
infinite loop), DXC would assert (if asserts enabled), or trigger a
UBSAN failure because the discard instruction would have no parent. When
an infinite loop is emitted during CodeGen, the InsertPt is cleared,
thus subsequent discard instructions would be created, but no parent
set. We skip emitting discard in this case, which follows the same
pattern as is done for EmitIfStmt, and EmitSwitchStmt.
---
 tools/clang/lib/CodeGen/CGStmt.cpp            |  4 ++++
 .../FinishCodeGen/unreachable-discard.hlsl    | 21 +++++++++++++++++++
 2 files changed, 25 insertions(+)
 create mode 100644 tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl

diff --git a/tools/clang/lib/CodeGen/CGStmt.cpp b/tools/clang/lib/CodeGen/CGStmt.cpp
index 080d824022..340550dbdd 100644
--- a/tools/clang/lib/CodeGen/CGStmt.cpp
+++ b/tools/clang/lib/CodeGen/CGStmt.cpp
@@ -525,6 +525,10 @@ void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) {
 
 // HLSL Change Begins.
 void CodeGenFunction::EmitDiscardStmt(const DiscardStmt &S) {
+  // Skip unreachable discard.
+  if (!HaveInsertPoint())
+    return;
+
   CGM.getHLSLRuntime().EmitHLSLDiscard(*this);
 }
 // HLSL Change Ends.
diff --git a/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl b/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl
new file mode 100644
index 0000000000..77c0f51911
--- /dev/null
+++ b/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl
@@ -0,0 +1,21 @@
+// RUN: %dxc /T ps_6_5 -fcgl %s | FileCheck %s
+
+// Compiling this HLSL would trigger an assertion:
+//    While deleting: void (i32, float)* %dx.hl.op..void (i32, float)
+//    Use still stuck around after Def is destroyed:  call void @"dx.hl.op..void (i32, float)"(i32 120, float -1.000000e+00), !dbg <0x503000001cc8>
+//    Error: assert(use_empty() && "Uses remain when a value is destroyed!")
+//    File: <snip>/src/external/DirectXShaderCompiler/lib/IR/Value.cpp(83)
+//
+// Bug was fixed in CodeGenFunction::EmitDiscardStmt by skipping the emission of
+// an unreachable discard.
+
+// CHECK:      define void @main()
+// CHECK:      br label %
+// CHECK-NOT:  call void @"dx.hl.op..void (i32, float)"
+// CHECK:      ret void
+
+void main() {
+  while (true) {
+  }
+  discard;
+}

From 572aef579dc90cb8de5df254ed3e7225c2c8a30e Mon Sep 17 00:00:00 2001
From: Chris B <cbieneman@microsoft.com>
Date: Tue, 1 Apr 2025 22:18:50 -0500
Subject: [PATCH 61/88] Disable code owners in main (#7298)

MS just changed policy to enforce code owners across the whole
enterprise, which is _not_ what we want. So we need to disable this in
main for the time being.
---
 .github/CODEOWNERS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 01ad1577b7..6cbdeb6270 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1 +1,2 @@
-* @microsoft/hlsl-release
+# Uncomment the next line in release branches after ask-mode begins
+# * @microsoft/hlsl-release

From 9eb71198c9425ee77178e081e5188659ee2cf02c Mon Sep 17 00:00:00 2001
From: Dan Brown <61992655+danbrown-amd@users.noreply.github.com>
Date: Wed, 2 Apr 2025 05:04:38 -0600
Subject: [PATCH 62/88] [SPIRV] Implements vk::BufferPointer proposal (#7163)

Implements [vk::BufferPointer
proposal](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0010-vk-buffer-ref.md).
Closes #6489.
---
 include/dxc/HlslIntrinsicOp.h                 |   5 +-
 include/dxc/dxcapi.internal.h                 |   8 +-
 lib/HLSL/HLOperationLower.cpp                 |   9 ++
 tools/clang/include/clang/AST/HlslTypes.h     |  33 ++++-
 .../clang/include/clang/AST/OperationKinds.h  |   5 +
 tools/clang/include/clang/Basic/Attr.td       |  17 +++
 .../clang/Basic/DiagnosticSemaKinds.td        |   9 +-
 .../clang/include/clang/SPIRV/SpirvBuilder.h  |  11 ++
 .../clang/include/clang/SPIRV/SpirvContext.h  |  12 ++
 .../include/clang/SPIRV/SpirvInstruction.h    |  52 +++++++
 tools/clang/include/clang/SPIRV/SpirvType.h   |  24 ++++
 .../clang/include/clang/SPIRV/SpirvVisitor.h  |   6 +
 tools/clang/lib/AST/ASTContextHLSL.cpp        |  79 +++++++++++
 tools/clang/lib/AST/Expr.cpp                  |   9 +-
 tools/clang/lib/AST/ExprConstant.cpp          |   9 ++
 tools/clang/lib/AST/HlslTypes.cpp             |  47 +++++++
 tools/clang/lib/Lex/PPMacroExpansion.cpp      |   7 +-
 .../lib/SPIRV/AlignmentSizeCalculator.cpp     |  25 ++--
 tools/clang/lib/SPIRV/CapabilityVisitor.cpp   |   9 +-
 tools/clang/lib/SPIRV/EmitVisitor.cpp         |  44 +++++-
 tools/clang/lib/SPIRV/EmitVisitor.h           |  25 ++--
 tools/clang/lib/SPIRV/LowerTypeVisitor.cpp    |  33 ++++-
 tools/clang/lib/SPIRV/LowerTypeVisitor.h      |   4 +
 tools/clang/lib/SPIRV/SpirvBuilder.cpp        |  37 +++++
 tools/clang/lib/SPIRV/SpirvContext.cpp        |  26 ++++
 tools/clang/lib/SPIRV/SpirvEmitter.cpp        | 133 +++++++++++++++++-
 tools/clang/lib/SPIRV/SpirvEmitter.h          |  13 ++
 tools/clang/lib/SPIRV/SpirvInstruction.cpp    |  28 ++++
 tools/clang/lib/Sema/SemaCast.cpp             |  17 +++
 tools/clang/lib/Sema/SemaExprCXX.cpp          |  28 ++++
 tools/clang/lib/Sema/SemaHLSL.cpp             | 111 ++++++++++++++-
 .../vk.buffer-pointer.alias.cs.hlsl           |  28 ++++
 .../CodeGenSPIRV/vk.buffer-pointer.alias.hlsl |  72 ++++++++++
 .../vk.buffer-pointer.atomic.hlsl             |  39 +++++
 .../vk.buffer-pointer.error1.hlsl             |  19 +++
 .../vk.buffer-pointer.error2.hlsl             |  19 +++
 .../vk.buffer-pointer.error3.hlsl             |  19 +++
 .../vk.buffer-pointer.error4.hlsl             |  18 +++
 .../vk.buffer-pointer.error5.hlsl             |  26 ++++
 .../vk.buffer-pointer.error6.hlsl             |  23 +++
 .../vk.buffer-pointer.linked-list.hlsl        | 101 +++++++++++++
 .../CodeGenSPIRV/vk.buffer-pointer.read.hlsl  |  48 +++++++
 .../CodeGenSPIRV/vk.buffer-pointer.write.hlsl |  52 +++++++
 utils/hct/gen_intrin_main.txt                 |  10 +-
 utils/hct/hctdb.py                            |  12 +-
 utils/hct/hlsl_intrinsic_opcodes.json         |   7 +-
 46 files changed, 1326 insertions(+), 42 deletions(-)
 create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl
 create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl
 create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl
 create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl
 create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl
 create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl
 create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl
 create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl
 create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl
 create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl
 create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl
 create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl

diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h
index 90f3fafd79..68b88822e8 100644
--- a/include/dxc/HlslIntrinsicOp.h
+++ b/include/dxc/HlslIntrinsicOp.h
@@ -231,6 +231,9 @@ enum class IntrinsicOp {
   IOP_VkReadClock = 223,
   IOP_Vkext_execution_mode = 224,
   IOP_Vkext_execution_mode_id = 225,
+  IOP_Vkreinterpret_pointer_cast = 360,
+  IOP_Vkstatic_pointer_cast = 361,
+  MOP_GetBufferContents = 362,
   MOP_Append = 226,
   MOP_RestartStrip = 227,
   MOP_CalculateLevelOfDetail = 228,
@@ -366,7 +369,7 @@ enum class IntrinsicOp {
   IOP_usign = 355,
   MOP_InterlockedUMax = 356,
   MOP_InterlockedUMin = 357,
-  Num_Intrinsics = 360,
+  Num_Intrinsics = 363,
 };
 inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) {
   switch (opcode) {
diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h
index bf8a040673..f183bb6cf0 100644
--- a/include/dxc/dxcapi.internal.h
+++ b/include/dxc/dxcapi.internal.h
@@ -7,6 +7,9 @@
 //                                                                           //
 // Provides non-public declarations for the DirectX Compiler component.      //
 //                                                                           //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.              //
+// All rights reserved.                                                      //
+//                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
 
 #ifndef __DXC_API_INTERNAL__
@@ -35,6 +38,7 @@ typedef struct ID3D10Blob ID3D10Blob;
 static const BYTE INTRIN_TEMPLATE_FROM_TYPE = 0xff;
 static const BYTE INTRIN_TEMPLATE_VARARGS = 0xfe;
 static const BYTE INTRIN_TEMPLATE_FROM_FUNCTION = 0xfd;
+static const BYTE INTRIN_TEMPLATE_FROM_FUNCTION_2 = 0xfc;
 
 // Use this enumeration to describe allowed templates (layouts) in intrinsics.
 enum LEGAL_INTRINSIC_TEMPLATES {
@@ -128,7 +132,9 @@ enum LEGAL_INTRINSIC_COMPTYPES {
 
   LICOMPTYPE_HIT_OBJECT = 51,
 
-  LICOMPTYPE_COUNT = 52
+  LICOMPTYPE_VK_BUFFER_POINTER = 52,
+
+  LICOMPTYPE_COUNT = 53
 };
 
 static const BYTE IA_SPECIAL_BASE = 0xf0;
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index 3ab1f9fdec..445dbcc879 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -7,6 +7,9 @@
 //                                                                           //
 // Lower functions to lower HL operations to DXIL operations.                //
 //                                                                           //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.              //
+// All rights reserved.                                                      //
+//                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
 
 #define _USE_MATH_DEFINES
@@ -6818,6 +6821,12 @@ IntrinsicLower gLowerTable[] = {
     {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread,
      DXIL::OpCode::NumOpCodes_Dxil_1_8}, // FIXME: Just a placeholder Dxil
                                          // opcode
+    {IntrinsicOp::IOP_Vkstatic_pointer_cast, UnsupportedVulkanIntrinsic,
+     DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::IOP_Vkreinterpret_pointer_cast, UnsupportedVulkanIntrinsic,
+     DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_GetBufferContents, UnsupportedVulkanIntrinsic,
+     DXIL::OpCode::NumOpCodes},
 };
 } // namespace
 static_assert(
diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h
index 3b517576fe..ab29e4bde7 100644
--- a/tools/clang/include/clang/AST/HlslTypes.h
+++ b/tools/clang/include/clang/AST/HlslTypes.h
@@ -6,6 +6,9 @@
 // This file is distributed under the University of Illinois Open Source     //
 // License. See LICENSE.TXT for details.                                     //
 //                                                                           //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.              //
+// All rights reserved.                                                      //
+//                                                                           //
 ///
 /// \file                                                                    //
 /// \brief Defines the HLSL type system interface.                           //
@@ -31,6 +34,7 @@
 namespace clang {
 class ASTContext;
 class AttributeList;
+class CXXConstructorDecl;
 class CXXMethodDecl;
 class CXXRecordDecl;
 class ClassTemplateDecl;
@@ -402,6 +406,10 @@ DeclareNodeOrRecordType(clang::ASTContext &Ctx, DXIL::NodeIOKind Type,
                         bool IsCompleteType = false);
 
 #ifdef ENABLE_SPIRV_CODEGEN
+clang::CXXRecordDecl *
+DeclareVkBufferPointerType(clang::ASTContext &context,
+                           clang::DeclContext *declContext);
+
 clang::CXXRecordDecl *DeclareInlineSpirvType(clang::ASTContext &context,
                                              clang::DeclContext *declContext,
                                              llvm::StringRef typeName,
@@ -427,7 +435,7 @@ clang::VarDecl *DeclareBuiltinGlobal(llvm::StringRef name, clang::QualType Ty,
 /// method.</summary> <param name="context">AST context in which to
 /// work.</param> <param name="recordDecl">Class in which the function template
 /// is declared.</param> <param name="functionDecl">Function for which a
-/// template is created.</params> <param
+/// template is created.</param> <param
 /// name="templateParamNamedDecls">Declarations for templates to the
 /// function.</param> <param name="templateParamNamedDeclsCount">Count of
 /// template declarations.</param> <returns>A new function template declaration
@@ -533,6 +541,29 @@ bool DoesTypeDefineOverloadedOperator(clang::QualType typeWithOperator,
                                       clang::QualType paramType);
 bool IsPatchConstantFunctionDecl(const clang::FunctionDecl *FD);
 
+#ifdef ENABLE_SPIRV_CODEGEN
+bool IsVKBufferPointerType(clang::QualType type);
+clang::QualType GetVKBufferPointerBufferType(clang::QualType type);
+unsigned GetVKBufferPointerAlignment(clang::QualType type);
+#endif
+
+/// <summary>Adds a constructor declaration to the specified class
+/// record.</summary> <param name="context">ASTContext that owns
+/// declarations.</param> <param name="recordDecl">Record declaration in which
+/// to add constructor.</param> <param name="resultType">Result type for
+/// constructor.</param> <param name="paramTypes">Types for constructor
+/// parameters.</param> <param name="paramNames">Names for constructor
+/// parameters.</param> <param name="declarationName">Name for
+/// constructor.</param> <param name="isConst">Whether the constructor is a
+/// const function.</param> <returns>The method declaration for the
+/// constructor.</returns>
+clang::CXXConstructorDecl *CreateConstructorDeclarationWithParams(
+    clang::ASTContext &context, clang::CXXRecordDecl *recordDecl,
+    clang::QualType resultType, llvm::ArrayRef<clang::QualType> paramTypes,
+    llvm::ArrayRef<clang::StringRef> paramNames,
+    clang::DeclarationName declarationName, bool isConst,
+    bool isTemplateFunction = false);
+
 /// <summary>Adds a function declaration to the specified class
 /// record.</summary> <param name="context">ASTContext that owns
 /// declarations.</param> <param name="recordDecl">Record declaration in which
diff --git a/tools/clang/include/clang/AST/OperationKinds.h b/tools/clang/include/clang/AST/OperationKinds.h
index 75e665a5e9..3909c8b5e8 100644
--- a/tools/clang/include/clang/AST/OperationKinds.h
+++ b/tools/clang/include/clang/AST/OperationKinds.h
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 // This file enumerates the different kinds of operations that can be
@@ -321,6 +324,8 @@ enum CastKind {
   CK_HLSLCC_FloatingToIntegral,
   CK_HLSLCC_FloatingToBoolean,
   CK_HLSLCC_FloatingCast,
+  CK_VK_BufferPointerToIntegral,
+  CK_VK_IntegralToBufferPointer,
 
   // HLSL Change - Made CK_Invalid an enum case because otherwise it is UB to
   // assign it to a value of CastKind.
diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td
index 7a009aa7e1..9c117fb3ce 100644
--- a/tools/clang/include/clang/Basic/Attr.td
+++ b/tools/clang/include/clang/Basic/Attr.td
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 class DocumentationCategory<string name> {
@@ -1447,6 +1450,20 @@ def VKStorageClassExt : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def VKBufferPointer : InheritableAttr {
+  let Spellings = [CXX11<"", "hlsl_vk_buffer_pointer", 2021>];
+  let LangOpts = [SPIRV];
+  let Documentation = [Undocumented];
+}
+
+def VKAliasedPointer : InheritableAttr {
+  let Spellings = [CXX11<"vk", "aliased_pointer">];
+  let Subjects = SubjectList<[Var, ParmVar], ErrorDiag>;
+  let Args = [];
+  let LangOpts = [SPIRV];
+  let Documentation = [Undocumented];
+}
+
 // Global variables that are of struct type
 def StructGlobalVar : SubsetSubject<Var, [{S->hasGlobalStorage() && S->getType()->isStructureType()}]>;
 
diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 6ae59cac14..4f4dc28a4c 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -7838,7 +7841,7 @@ def warn_hlsl_intrinsic_in_wrong_shader_model : Warning<
    "intrinsic %0 potentially used by '%1' requires shader model %2 or greater">,
     DefaultError, InGroup<HLSLAvailability>;
 def warn_hlsl_intrinsic_overload_in_wrong_shader_model : Warning<
-   "overload of intrinsic %0 requires shader model %1 or greater">, 
+   "overload of intrinsic %0 requires shader model %1 or greater">,
     DefaultError, InGroup<HLSLAvailability>;
 def err_hlsl_intrinsic_template_arg_unsupported: Error<
    "Explicit template arguments on intrinsic %0 are not supported">;
@@ -8004,6 +8007,10 @@ def err_hlsl_hitobject_unsupported_stage : Error<
 
 // SPIRV Change Starts
 def err_hlsl_vulkan_specific_feature: Error<"%0 is a Vulkan specific feature">;
+def err_hlsl_vk_pointer_cast_alignment: Error<
+  "Vulkan buffer pointer cannot be cast to greater alignment">;
+def err_hlsl_vk_static_pointer_cast_type: Error<
+  "vk::static_pointer_cast() content type must be base class of argument's content type">;
 // SPIRV Change Ends
 
 let CategoryName = "OpenMP Issue" in {
diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h
index f03735115b..ed2cb3b6fd 100644
--- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h
+++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_SPIRVBUILDER_H
 #define LLVM_CLANG_SPIRV_SPIRVBUILDER_H
@@ -273,6 +276,14 @@ class SpirvBuilder {
                                                   SpirvInstruction *sample,
                                                   SourceLocation);
 
+  /// \brief Creates an OpConverPtrToU SPIR-V instruction with the given
+  /// parameters.
+  SpirvConvertPtrToU *createConvertPtrToU(SpirvInstruction *ptr, QualType type);
+
+  /// \brief Creates an OpConverUToPtr SPIR-V instruction with the given
+  /// parameters.
+  SpirvConvertUToPtr *createConvertUToPtr(SpirvInstruction *val, QualType type);
+
   /// \brief Creates SPIR-V instructions for sampling the given image.
   ///
   /// If compareVal is given a non-zero value, *Dref* variants of OpImageSample*
diff --git a/tools/clang/include/clang/SPIRV/SpirvContext.h b/tools/clang/include/clang/SPIRV/SpirvContext.h
index e65097bedb..c18c139642 100644
--- a/tools/clang/include/clang/SPIRV/SpirvContext.h
+++ b/tools/clang/include/clang/SPIRV/SpirvContext.h
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_SPIRVCONTEXT_H
 #define LLVM_CLANG_SPIRV_SPIRVCONTEXT_H
@@ -317,6 +320,13 @@ class SpirvContext {
 
   const HybridPointerType *getPointerType(QualType pointee, spv::StorageClass);
 
+  const ForwardPointerType *getForwardPointerType(QualType pointee);
+
+  const SpirvPointerType *getForwardReference(QualType type);
+
+  void registerForwardReference(QualType type,
+                                const SpirvPointerType *pointerType);
+
   /// Generates (or reuses an existing) OpString for the given string literal.
   SpirvString *getSpirvString(llvm::StringRef str);
 
@@ -478,6 +488,8 @@ class SpirvContext {
   llvm::SmallVector<const HybridStructType *, 8> hybridStructTypes;
   llvm::DenseMap<const SpirvType *, SCToPtrTyMap> pointerTypes;
   llvm::SmallVector<const HybridPointerType *, 8> hybridPointerTypes;
+  llvm::MapVector<QualType, const ForwardPointerType *> forwardPointerTypes;
+  llvm::MapVector<QualType, const SpirvPointerType *> forwardReferences;
   llvm::DenseSet<FunctionType *, FunctionTypeMapInfo> functionTypes;
   llvm::DenseMap<unsigned, SpirvIntrinsicType *> spirvIntrinsicTypesById;
   llvm::SmallVector<const SpirvIntrinsicType *, 8> spirvIntrinsicTypes;
diff --git a/tools/clang/include/clang/SPIRV/SpirvInstruction.h b/tools/clang/include/clang/SPIRV/SpirvInstruction.h
index 7ec1375bde..7a7ad3aa4d 100644
--- a/tools/clang/include/clang/SPIRV/SpirvInstruction.h
+++ b/tools/clang/include/clang/SPIRV/SpirvInstruction.h
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H
 #define LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H
@@ -67,6 +71,10 @@ class SpirvInstruction {
     IK_ConstantComposite,
     IK_ConstantNull,
 
+    // Pointer <-> uint conversions.
+    IK_ConvertPtrToU,
+    IK_ConvertUToPtr,
+
     // OpUndef
     IK_Undef,
 
@@ -1306,6 +1314,50 @@ class SpirvConstantNull : public SpirvConstant {
   bool operator==(const SpirvConstantNull &that) const;
 };
 
+class SpirvConvertPtrToU : public SpirvInstruction {
+public:
+  SpirvConvertPtrToU(SpirvInstruction *ptr, QualType type,
+                     SourceLocation loc = {}, SourceRange range = {});
+
+  DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvConvertPtrToU)
+
+  // For LLVM-style RTTI
+  static bool classof(const SpirvInstruction *inst) {
+    return inst->getKind() == IK_ConvertPtrToU;
+  }
+
+  bool operator==(const SpirvConvertPtrToU &that) const;
+
+  bool invokeVisitor(Visitor *v) override;
+
+  SpirvInstruction *getPtr() const { return ptr; }
+
+private:
+  SpirvInstruction *ptr;
+};
+
+class SpirvConvertUToPtr : public SpirvInstruction {
+public:
+  SpirvConvertUToPtr(SpirvInstruction *intValue, QualType type,
+                     SourceLocation loc = {}, SourceRange range = {});
+
+  DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvConvertUToPtr)
+
+  // For LLVM-style RTTI
+  static bool classof(const SpirvInstruction *inst) {
+    return inst->getKind() == IK_ConvertUToPtr;
+  }
+
+  bool operator==(const SpirvConvertUToPtr &that) const;
+
+  bool invokeVisitor(Visitor *v) override;
+
+  SpirvInstruction *getVal() const { return val; }
+
+private:
+  SpirvInstruction *val;
+};
+
 class SpirvUndef : public SpirvInstruction {
 public:
   SpirvUndef(QualType type);
diff --git a/tools/clang/include/clang/SPIRV/SpirvType.h b/tools/clang/include/clang/SPIRV/SpirvType.h
index 221f01e5ff..00a00ef238 100644
--- a/tools/clang/include/clang/SPIRV/SpirvType.h
+++ b/tools/clang/include/clang/SPIRV/SpirvType.h
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_SPIRVTYPE_H
 #define LLVM_CLANG_SPIRV_SPIRVTYPE_H
@@ -53,6 +56,7 @@ class SpirvType {
     TK_RuntimeArray,
     TK_Struct,
     TK_Pointer,
+    TK_ForwardPointer,
     TK_Function,
     TK_AccelerationStructureNV,
     TK_RayQueryKHR,
@@ -387,6 +391,26 @@ class SpirvPointerType : public SpirvType {
   spv::StorageClass storageClass;
 };
 
+/// Represents a SPIR-V forwarding pointer type.
+class ForwardPointerType : public SpirvType {
+public:
+  ForwardPointerType(QualType pointee)
+      : SpirvType(TK_ForwardPointer), pointeeType(pointee) {}
+
+  static bool classof(const SpirvType *t) {
+    return t->getKind() == TK_ForwardPointer;
+  }
+
+  const QualType getPointeeType() const { return pointeeType; }
+
+  bool operator==(const ForwardPointerType &that) const {
+    return pointeeType == that.pointeeType;
+  }
+
+private:
+  const QualType pointeeType;
+};
+
 /// Represents a SPIR-V function type. None of the parameters nor the return
 /// type is allowed to be a hybrid type.
 class FunctionType : public SpirvType {
diff --git a/tools/clang/include/clang/SPIRV/SpirvVisitor.h b/tools/clang/include/clang/SPIRV/SpirvVisitor.h
index 303a4600a1..93682518a1 100644
--- a/tools/clang/include/clang/SPIRV/SpirvVisitor.h
+++ b/tools/clang/include/clang/SPIRV/SpirvVisitor.h
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_SPIRVVISITOR_H
 #define LLVM_CLANG_SPIRV_SPIRVVISITOR_H
@@ -89,6 +93,8 @@ class Visitor {
   DEFINE_VISIT_METHOD(SpirvConstantFloat)
   DEFINE_VISIT_METHOD(SpirvConstantComposite)
   DEFINE_VISIT_METHOD(SpirvConstantNull)
+  DEFINE_VISIT_METHOD(SpirvConvertPtrToU)
+  DEFINE_VISIT_METHOD(SpirvConvertUToPtr)
   DEFINE_VISIT_METHOD(SpirvUndef)
   DEFINE_VISIT_METHOD(SpirvCompositeConstruct)
   DEFINE_VISIT_METHOD(SpirvCompositeExtract)
diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp
index 3748f8f8f8..c7a031a219 100644
--- a/tools/clang/lib/AST/ASTContextHLSL.cpp
+++ b/tools/clang/lib/AST/ASTContextHLSL.cpp
@@ -6,6 +6,9 @@
 // This file is distributed under the University of Illinois Open Source     //
 // License. See LICENSE.TXT for details.                                     //
 //                                                                           //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.              //
+// All rights reserved.                                                      //
+//                                                                           //
 //  This file implements the ASTContext interface for HLSL.                  //
 //                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
@@ -1072,6 +1075,47 @@ static void CreateConstructorDeclaration(
   (*constructorDecl)->setAccess(AccessSpecifier::AS_public);
 }
 
+CXXConstructorDecl *hlsl::CreateConstructorDeclarationWithParams(
+    ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType,
+    ArrayRef<QualType> paramTypes, ArrayRef<StringRef> paramNames,
+    DeclarationName declarationName, bool isConst, bool isTemplateFunction) {
+  DXASSERT_NOMSG(recordDecl != nullptr);
+  DXASSERT_NOMSG(!resultType.isNull());
+  DXASSERT_NOMSG(paramTypes.size() == paramNames.size());
+
+  TypeSourceInfo *tinfo;
+  CXXConstructorDecl *constructorDecl;
+  CreateConstructorDeclaration(context, recordDecl, resultType, paramTypes,
+                               declarationName, isConst, &constructorDecl,
+                               &tinfo);
+
+  // Create and associate parameters to constructor.
+  SmallVector<ParmVarDecl *, 2> parmVarDecls;
+  if (!paramTypes.empty()) {
+    for (unsigned int i = 0; i < paramTypes.size(); ++i) {
+      IdentifierInfo *argIi = &context.Idents.get(paramNames[i]);
+      ParmVarDecl *parmVarDecl = ParmVarDecl::Create(
+          context, constructorDecl, NoLoc, NoLoc, argIi, paramTypes[i],
+          context.getTrivialTypeSourceInfo(paramTypes[i], NoLoc),
+          StorageClass::SC_None, nullptr);
+      parmVarDecl->setScopeInfo(0, i);
+      DXASSERT(parmVarDecl->getFunctionScopeIndex() == i,
+               "otherwise failed to set correct index");
+      parmVarDecls.push_back(parmVarDecl);
+    }
+    constructorDecl->setParams(ArrayRef<ParmVarDecl *>(parmVarDecls));
+    AssociateParametersToFunctionPrototype(tinfo, &parmVarDecls.front(),
+                                           parmVarDecls.size());
+  }
+
+  // If this is going to be part of a template function decl, don't add it to
+  // the record because the template function decl will be added instead.
+  if (!isTemplateFunction)
+    recordDecl->addDecl(constructorDecl);
+
+  return constructorDecl;
+}
+
 static void CreateObjectFunctionDeclaration(
     ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType,
     ArrayRef<QualType> args, DeclarationName declarationName, bool isConst,
@@ -1324,6 +1368,41 @@ CXXRecordDecl *hlsl::DeclareNodeOrRecordType(
 }
 
 #ifdef ENABLE_SPIRV_CODEGEN
+CXXRecordDecl *hlsl::DeclareVkBufferPointerType(ASTContext &context,
+                                                DeclContext *declContext) {
+  BuiltinTypeDeclBuilder Builder(declContext, "BufferPointer",
+                                 TagDecl::TagKind::TTK_Struct);
+  TemplateTypeParmDecl *TyParamDecl =
+      Builder.addTypeTemplateParam("recordtype");
+  Builder.addIntegerTemplateParam("alignment", context.UnsignedIntTy, 0);
+
+  Builder.startDefinition();
+
+  QualType paramType = QualType(TyParamDecl->getTypeForDecl(), 0);
+  CXXRecordDecl *recordDecl = Builder.getRecordDecl();
+
+  CXXMethodDecl *methodDecl = CreateObjectFunctionDeclarationWithParams(
+      context, recordDecl, context.getLValueReferenceType(paramType), {}, {},
+      DeclarationName(&context.Idents.get("Get")), true);
+  CanQualType canQualType =
+      recordDecl->getTypeForDecl()->getCanonicalTypeUnqualified();
+  CreateConstructorDeclarationWithParams(
+      context, recordDecl, context.VoidTy,
+      {context.getRValueReferenceType(canQualType)}, {"bufferPointer"},
+      context.DeclarationNames.getCXXConstructorName(canQualType), false);
+  CreateConstructorDeclarationWithParams(
+      context, recordDecl, context.VoidTy, {context.UnsignedIntTy}, {"address"},
+      context.DeclarationNames.getCXXConstructorName(canQualType), false);
+
+  StringRef OpcodeGroup = GetHLOpcodeGroupName(HLOpcodeGroup::HLIntrinsic);
+  unsigned Opcode = static_cast<unsigned>(IntrinsicOp::MOP_GetBufferContents);
+  methodDecl->addAttr(
+      HLSLIntrinsicAttr::CreateImplicit(context, OpcodeGroup, "", Opcode));
+  methodDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context));
+
+  return Builder.completeDefinition();
+}
+
 CXXRecordDecl *hlsl::DeclareInlineSpirvType(clang::ASTContext &context,
                                             clang::DeclContext *declContext,
                                             llvm::StringRef typeName,
diff --git a/tools/clang/lib/AST/Expr.cpp b/tools/clang/lib/AST/Expr.cpp
index 0e2ec8c6c2..c6dc21217e 100644
--- a/tools/clang/lib/AST/Expr.cpp
+++ b/tools/clang/lib/AST/Expr.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 // This file implements the Expr class and subclasses.
@@ -1716,7 +1719,11 @@ const char *CastExpr::getCastKindName() const {
     return "HLSLCC_FloatingToBoolean";
   case CK_HLSLCC_FloatingCast:
     return "HLSLCC_FloatingCast";
-  // HLSL Change Ends
+  case CK_VK_BufferPointerToIntegral:
+    return "VK_BufferPointerToIntegral";
+  case CK_VK_IntegralToBufferPointer:
+    return "VK_IntegralToBufferPointer";
+    // HLSL Change Ends
   }
 
   llvm_unreachable("Unhandled cast kind!");
diff --git a/tools/clang/lib/AST/ExprConstant.cpp b/tools/clang/lib/AST/ExprConstant.cpp
index 5e8d4700bd..69e0760bce 100644
--- a/tools/clang/lib/AST/ExprConstant.cpp
+++ b/tools/clang/lib/AST/ExprConstant.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 // This file implements the Expr constant evaluator.
@@ -7829,6 +7832,12 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) {
       return false;
     return Success(Value, E);
   }
+
+  // HLSL Change Starts
+  case CK_VK_BufferPointerToIntegral: {
+    return false;
+    // HLSL Change Ends
+  }
   }
 
   llvm_unreachable("unknown cast resulting in integral value");
diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp
index eaf8273413..d853125954 100644
--- a/tools/clang/lib/AST/HlslTypes.cpp
+++ b/tools/clang/lib/AST/HlslTypes.cpp
@@ -5,6 +5,9 @@
 // Copyright (C) Microsoft Corporation. All rights reserved.                 //
 // This file is distributed under the University of Illinois Open Source     //
 // License. See LICENSE.TXT for details.                                     //
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
 //                                                                           //
 ///
 /// \file                                                                    //
@@ -734,6 +737,50 @@ bool IsHLSLRayQueryType(clang::QualType type) {
   return false;
 }
 
+#ifdef ENABLE_SPIRV_CODEGEN
+static llvm::Optional<std::pair<clang::QualType, unsigned>>
+MaybeGetVKBufferPointerParams(clang::QualType type) {
+  const RecordType *RT = dyn_cast<RecordType>(type.getCanonicalType());
+  if (!RT)
+    return llvm::None;
+
+  const ClassTemplateSpecializationDecl *templateDecl =
+      dyn_cast<ClassTemplateSpecializationDecl>(RT->getAsCXXRecordDecl());
+  if (!templateDecl || !templateDecl->getName().equals("BufferPointer"))
+    return llvm::None;
+
+  auto *namespaceDecl =
+      dyn_cast_or_null<NamespaceDecl>(templateDecl->getDeclContext());
+  if (!namespaceDecl || !namespaceDecl->getName().equals("vk"))
+    return llvm::None;
+
+  const TemplateArgumentList &argList = templateDecl->getTemplateArgs();
+  QualType bufferType = argList[0].getAsType();
+  unsigned align =
+      argList.size() > 1 ? argList[1].getAsIntegral().getLimitedValue() : 0;
+  return std::make_pair(bufferType, align);
+}
+
+bool IsVKBufferPointerType(clang::QualType type) {
+  return MaybeGetVKBufferPointerParams(type).hasValue();
+}
+
+QualType GetVKBufferPointerBufferType(clang::QualType type) {
+  auto bpParams = MaybeGetVKBufferPointerParams(type);
+  assert(bpParams.hasValue() &&
+         "cannot get pointer type for type that is not a vk::BufferPointer");
+  return bpParams.getValue().first;
+}
+
+unsigned GetVKBufferPointerAlignment(clang::QualType type) {
+  auto bpParams = MaybeGetVKBufferPointerParams(type);
+  assert(
+      bpParams.hasValue() &&
+      "cannot get pointer alignment for type that is not a vk::BufferPointer");
+  return bpParams.getValue().second;
+}
+#endif
+
 QualType GetHLSLResourceResultType(QualType type) {
   // Don't canonicalize the type as to not lose snorm in Buffer<snorm float>
   const RecordType *RT = type->getAs<RecordType>();
diff --git a/tools/clang/lib/Lex/PPMacroExpansion.cpp b/tools/clang/lib/Lex/PPMacroExpansion.cpp
index 64ce8c9182..ebfb93df2e 100644
--- a/tools/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/tools/clang/lib/Lex/PPMacroExpansion.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 // This file implements the top level handling of macro expansion for the
@@ -1080,7 +1083,8 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("nullability", true)
       .Case("memory_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Memory))
       .Case("thread_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Thread))
-      .Case("dataflow_sanitizer", LangOpts.Sanitize.has(SanitizerKind::DataFlow))
+      .Case("dataflow_sanitizer",
+            LangOpts.Sanitize.has(SanitizerKind::DataFlow))
       // Objective-C features
       .Case("objc_arr", LangOpts.ObjCAutoRefCount) // FIXME: REMOVE?
       .Case("objc_arc", LangOpts.ObjCAutoRefCount)
@@ -1180,6 +1184,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("has_trivial_constructor", LangOpts.CPlusPlus)
       .Case("has_trivial_destructor", LangOpts.CPlusPlus)
       .Case("has_virtual_destructor", LangOpts.CPlusPlus)
+      .Case("hlsl_vk_buffer_pointer", LangOpts.SPIRV)
       .Case("is_abstract", LangOpts.CPlusPlus)
       .Case("is_base_of", LangOpts.CPlusPlus)
       .Case("is_class", LangOpts.CPlusPlus)
diff --git a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp
index 492640c493..db140f4766 100644
--- a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp
+++ b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #include "AlignmentSizeCalculator.h"
@@ -277,14 +280,20 @@ std::pair<uint32_t, uint32_t> AlignmentSizeCalculator::getAlignmentAndSize(
   if (recordType != nullptr) {
     const llvm::StringRef name = recordType->getDecl()->getName();
 
-    if (isTypeInVkNamespace(recordType) && name == "SpirvType") {
-      const ClassTemplateSpecializationDecl *templateDecl =
-          cast<ClassTemplateSpecializationDecl>(recordType->getDecl());
-      const uint64_t size =
-          templateDecl->getTemplateArgs()[1].getAsIntegral().getZExtValue();
-      const uint64_t alignment =
-          templateDecl->getTemplateArgs()[2].getAsIntegral().getZExtValue();
-      return {alignment, size};
+    if (isTypeInVkNamespace(recordType)) {
+      if (name == "BufferPointer") {
+        return {8, 8}; // same as uint64_t
+      }
+
+      if (name == "SpirvType") {
+        const ClassTemplateSpecializationDecl *templateDecl =
+            cast<ClassTemplateSpecializationDecl>(recordType->getDecl());
+        const uint64_t size =
+            templateDecl->getTemplateArgs()[1].getAsIntegral().getZExtValue();
+        const uint64_t alignment =
+            templateDecl->getTemplateArgs()[2].getAsIntegral().getZExtValue();
+        return {alignment, size};
+      }
     }
   }
 
diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
index c2b5acff53..6fd0c6d950 100644
--- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
+++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #include "CapabilityVisitor.h"
@@ -200,8 +203,10 @@ void CapabilityVisitor::addCapabilityForType(const SpirvType *type,
   }
   // Pointer type
   else if (const auto *ptrType = dyn_cast<SpirvPointerType>(type)) {
-    addCapabilityForType(ptrType->getPointeeType(), loc, sc);
-    if (sc == spv::StorageClass::PhysicalStorageBuffer) {
+    addCapabilityForType(ptrType->getPointeeType(), loc,
+                         ptrType->getStorageClass());
+    if (ptrType->getStorageClass() ==
+        spv::StorageClass::PhysicalStorageBuffer) {
       addExtension(Extension::KHR_physical_storage_buffer,
                    "SPV_KHR_physical_storage_buffer", loc);
       addCapability(spv::Capability::PhysicalStorageBufferAddresses);
diff --git a/tools/clang/lib/SPIRV/EmitVisitor.cpp b/tools/clang/lib/SPIRV/EmitVisitor.cpp
index 6f6f5f88cd..9c0368f7a1 100644
--- a/tools/clang/lib/SPIRV/EmitVisitor.cpp
+++ b/tools/clang/lib/SPIRV/EmitVisitor.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 // Do not change the inclusion order between "dxc/Support/*" files.
@@ -488,6 +491,7 @@ std::vector<uint32_t> EmitVisitor::takeBinary() {
                 debugVariableBinary.end());
   result.insert(result.end(), annotationsBinary.begin(),
                 annotationsBinary.end());
+  result.insert(result.end(), fwdDeclBinary.begin(), fwdDeclBinary.end());
   result.insert(result.end(), typeConstantBinary.begin(),
                 typeConstantBinary.end());
   result.insert(result.end(), globalVarsBinary.begin(), globalVarsBinary.end());
@@ -1016,6 +1020,28 @@ bool EmitVisitor::visit(SpirvConstantNull *inst) {
   return true;
 }
 
+bool EmitVisitor::visit(SpirvConvertPtrToU *inst) {
+  initInstruction(inst);
+  curInst.push_back(inst->getResultTypeId());
+  curInst.push_back(getOrAssignResultId<SpirvInstruction>(inst));
+  curInst.push_back(getOrAssignResultId<SpirvInstruction>(inst->getPtr()));
+  finalizeInstruction(&mainBinary);
+  emitDebugNameForInstruction(getOrAssignResultId<SpirvInstruction>(inst),
+                              inst->getDebugName());
+  return true;
+}
+
+bool EmitVisitor::visit(SpirvConvertUToPtr *inst) {
+  initInstruction(inst);
+  curInst.push_back(inst->getResultTypeId());
+  curInst.push_back(getOrAssignResultId<SpirvInstruction>(inst));
+  curInst.push_back(getOrAssignResultId<SpirvInstruction>(inst->getVal()));
+  finalizeInstruction(&mainBinary);
+  emitDebugNameForInstruction(getOrAssignResultId<SpirvInstruction>(inst),
+                              inst->getDebugName());
+  return true;
+}
+
 bool EmitVisitor::visit(SpirvUndef *inst) {
   typeHandler.getOrCreateUndef(inst);
   emitDebugNameForInstruction(getOrAssignResultId<SpirvInstruction>(inst),
@@ -2012,10 +2038,11 @@ void EmitTypeHandler::initTypeInstruction(spv::Op op) {
   curTypeInst.push_back(static_cast<uint32_t>(op));
 }
 
-void EmitTypeHandler::finalizeTypeInstruction() {
+void EmitTypeHandler::finalizeTypeInstruction(bool isFwdDecl) {
   curTypeInst[0] |= static_cast<uint32_t>(curTypeInst.size()) << 16;
-  typeConstantBinary->insert(typeConstantBinary->end(), curTypeInst.begin(),
-                             curTypeInst.end());
+  auto binarySection = isFwdDecl ? fwdDeclBinary : typeConstantBinary;
+  binarySection->insert(binarySection->end(), curTypeInst.begin(),
+                        curTypeInst.end());
 }
 
 uint32_t EmitTypeHandler::getResultIdForType(const SpirvType *type,
@@ -2594,6 +2621,17 @@ uint32_t EmitTypeHandler::emitType(const SpirvType *type) {
     curTypeInst.push_back(pointeeType);
     finalizeTypeInstruction();
   }
+  // Forward pointer types
+  else if (const auto *fwdPtrType = dyn_cast<ForwardPointerType>(type)) {
+    const SpirvPointerType *ptrType =
+        context.getForwardReference(fwdPtrType->getPointeeType());
+    const uint32_t refId = emitType(ptrType);
+    initTypeInstruction(spv::Op::OpTypeForwardPointer);
+    curTypeInst.push_back(refId);
+    curTypeInst.push_back(static_cast<uint32_t>(ptrType->getStorageClass()));
+    finalizeTypeInstruction(true);
+    return refId;
+  }
   // Function types
   else if (const auto *fnType = dyn_cast<FunctionType>(type)) {
     const uint32_t retTypeId = emitType(fnType->getReturnType());
diff --git a/tools/clang/lib/SPIRV/EmitVisitor.h b/tools/clang/lib/SPIRV/EmitVisitor.h
index 2f5d99b89d..1f9b0939e6 100644
--- a/tools/clang/lib/SPIRV/EmitVisitor.h
+++ b/tools/clang/lib/SPIRV/EmitVisitor.h
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_EMITVISITOR_H
 #define LLVM_CLANG_SPIRV_EMITVISITOR_H
@@ -49,15 +53,15 @@ class EmitTypeHandler {
   EmitTypeHandler(ASTContext &astCtx, SpirvContext &spvContext,
                   const SpirvCodeGenOptions &opts, FeatureManager &featureMgr,
                   std::vector<uint32_t> *debugVec,
-                  std::vector<uint32_t> *decVec,
+                  std::vector<uint32_t> *decVec, std::vector<uint32_t> *fwdVec,
                   std::vector<uint32_t> *typesVec,
                   const std::function<uint32_t()> &takeNextIdFn)
       : astContext(astCtx), context(spvContext), featureManager(featureMgr),
         debugVariableBinary(debugVec), annotationsBinary(decVec),
-        typeConstantBinary(typesVec), takeNextIdFunction(takeNextIdFn),
-        emittedConstantInts({}), emittedConstantFloats({}),
-        emittedConstantComposites({}), emittedConstantNulls({}),
-        emittedUndef({}), emittedConstantBools() {
+        fwdDeclBinary(fwdVec), typeConstantBinary(typesVec),
+        takeNextIdFunction(takeNextIdFn), emittedConstantInts({}),
+        emittedConstantFloats({}), emittedConstantComposites({}),
+        emittedConstantNulls({}), emittedUndef({}), emittedConstantBools() {
     assert(decVec);
     assert(typesVec);
   }
@@ -120,7 +124,7 @@ class EmitTypeHandler {
 
 private:
   void initTypeInstruction(spv::Op op);
-  void finalizeTypeInstruction();
+  void finalizeTypeInstruction(bool isFwdDecl = false);
 
   // Returns the result-id for the given type and decorations. If a type with
   // the same decorations have already been used, it returns the existing
@@ -161,6 +165,7 @@ class EmitTypeHandler {
   std::vector<uint32_t> curDecorationInst;
   std::vector<uint32_t> *debugVariableBinary;
   std::vector<uint32_t> *annotationsBinary;
+  std::vector<uint32_t> *fwdDeclBinary;
   std::vector<uint32_t> *typeConstantBinary;
   std::function<uint32_t()> takeNextIdFunction;
 
@@ -207,7 +212,7 @@ class EmitVisitor : public Visitor {
       : Visitor(opts, spvCtx), astContext(astCtx), featureManager(featureMgr),
         id(0),
         typeHandler(astCtx, spvCtx, opts, featureMgr, &debugVariableBinary,
-                    &annotationsBinary, &typeConstantBinary,
+                    &annotationsBinary, &fwdDeclBinary, &typeConstantBinary,
                     [this]() -> uint32_t { return takeNextId(); }),
         debugMainFileId(0), debugInfoExtInstId(0), debugLineStart(0),
         debugLineEnd(0), debugColumnStart(0), debugColumnEnd(0),
@@ -254,6 +259,8 @@ class EmitVisitor : public Visitor {
   bool visit(SpirvConstantFloat *) override;
   bool visit(SpirvConstantComposite *) override;
   bool visit(SpirvConstantNull *) override;
+  bool visit(SpirvConvertPtrToU *) override;
+  bool visit(SpirvConvertUToPtr *) override;
   bool visit(SpirvUndef *) override;
   bool visit(SpirvCompositeConstruct *) override;
   bool visit(SpirvCompositeExtract *) override;
@@ -438,7 +445,9 @@ class EmitVisitor : public Visitor {
   // All annotation instructions: OpDecorate, OpMemberDecorate, OpGroupDecorate,
   // OpGroupMemberDecorate, and OpDecorationGroup.
   std::vector<uint32_t> annotationsBinary;
-  // All type and constant instructions
+  // All forward pointer type declaration instructions
+  std::vector<uint32_t> fwdDeclBinary;
+  // All other type and constant instructions
   std::vector<uint32_t> typeConstantBinary;
   // All global variable declarations (all OpVariable instructions whose Storage
   // Class is not Function)
diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
index a5bc4a4aa8..b31d19b5d8 100644
--- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
+++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #include "LowerTypeVisitor.h"
@@ -549,7 +552,9 @@ const SpirvType *LowerTypeVisitor::lowerType(QualType type,
     // checking the general struct type.
     if (const auto *spvType =
             lowerResourceType(type, rule, isRowMajor, srcLoc)) {
-      spvContext.registerStructDeclForSpirvType(spvType, decl);
+      if (!isa<SpirvPointerType>(spvType)) {
+        spvContext.registerStructDeclForSpirvType(spvType, decl);
+      }
       return spvType;
     }
 
@@ -809,6 +814,32 @@ const SpirvType *LowerTypeVisitor::lowerVkTypeInVkNamespace(
     QualType realType = hlsl::GetHLSLResourceTemplateParamType(type);
     return lowerType(realType, rule, llvm::None, srcLoc);
   }
+  if (name == "BufferPointer") {
+    const size_t visitedTypeStackSize = visitedTypeStack.size();
+    (void)visitedTypeStackSize; // suppress unused warning (used only in assert)
+
+    for (QualType t : visitedTypeStack) {
+      if (t == type) {
+        return spvContext.getForwardPointerType(type);
+      }
+    }
+
+    QualType realType = hlsl::GetHLSLResourceTemplateParamType(type);
+    if (rule == SpirvLayoutRule::Void) {
+      rule = spvOptions.sBufferLayoutRule;
+    }
+    visitedTypeStack.push_back(type);
+
+    const SpirvType *spirvType = lowerType(realType, rule, llvm::None, srcLoc);
+    const auto *pointerType = spvContext.getPointerType(
+        spirvType, spv::StorageClass::PhysicalStorageBuffer);
+    spvContext.registerForwardReference(type, pointerType);
+
+    assert(visitedTypeStack.back() == type);
+    visitedTypeStack.pop_back();
+    assert(visitedTypeStack.size() == visitedTypeStackSize);
+    return pointerType;
+  }
   emitError("unknown type %0 in vk namespace", srcLoc) << type;
   return nullptr;
 }
diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.h b/tools/clang/lib/SPIRV/LowerTypeVisitor.h
index 96235d1508..5b26b67e3a 100644
--- a/tools/clang/lib/SPIRV/LowerTypeVisitor.h
+++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.h
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_LIB_SPIRV_LOWERTYPEVISITOR_H
@@ -137,6 +140,7 @@ class LowerTypeVisitor : public Visitor {
   AlignmentSizeCalculator alignmentCalc; /// alignment calculator
   bool useArrayForMat1xN;                /// SPIR-V array for HLSL Matrix 1xN
   SpirvBuilder &spvBuilder;
+  SmallVector<QualType, 4> visitedTypeStack; // for type recursion detection
 };
 
 } // end namespace spirv
diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp
index 1275e2b252..6b3f43fc77 100644
--- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp
+++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #include "clang/SPIRV/SpirvBuilder.h"
@@ -202,6 +205,14 @@ SpirvInstruction *SpirvBuilder::createLoad(QualType resultType,
   instruction->setLayoutRule(pointer->getLayoutRule());
   instruction->setRValue(true);
 
+  if (pointer->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer) {
+    AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions);
+    uint32_t align, size, stride;
+    std::tie(align, size) = alignmentCalc.getAlignmentAndSize(
+        resultType, pointer->getLayoutRule(), llvm::None, &stride);
+    instruction->setAlignment(align);
+  }
+
   if (pointer->containsAliasComponent() &&
       isAKindOfStructuredOrByteBuffer(resultType)) {
     instruction->setStorageClass(spv::StorageClass::Uniform);
@@ -300,6 +311,16 @@ SpirvStore *SpirvBuilder::createStore(SpirvInstruction *address,
       new (context) SpirvStore(loc, address, source, llvm::None, range);
   insertPoint->addInstruction(instruction);
 
+  if (address->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer &&
+      address->getAstResultType() != QualType()) { // exclude raw buffer
+    AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions);
+    uint32_t align, size, stride;
+    std::tie(align, size) = alignmentCalc.getAlignmentAndSize(
+        address->getAstResultType(), address->getLayoutRule(), llvm::None,
+        &stride);
+    instruction->setAlignment(align);
+  }
+
   if (address->isRasterizerOrdered()) {
     createEndInvocationInterlockEXT(loc, range);
   }
@@ -491,6 +512,22 @@ SpirvImageTexelPointer *SpirvBuilder::createImageTexelPointer(
   return instruction;
 }
 
+SpirvConvertPtrToU *SpirvBuilder::createConvertPtrToU(SpirvInstruction *ptr,
+                                                      QualType type) {
+  auto *instruction = new (context) SpirvConvertPtrToU(ptr, type);
+  instruction->setRValue(true);
+  insertPoint->addInstruction(instruction);
+  return instruction;
+}
+
+SpirvConvertUToPtr *SpirvBuilder::createConvertUToPtr(SpirvInstruction *val,
+                                                      QualType type) {
+  auto *instruction = new (context) SpirvConvertUToPtr(val, type);
+  instruction->setRValue(false);
+  insertPoint->addInstruction(instruction);
+  return instruction;
+}
+
 spv::ImageOperandsMask SpirvBuilder::composeImageOperandsMask(
     SpirvInstruction *bias, SpirvInstruction *lod,
     const std::pair<SpirvInstruction *, SpirvInstruction *> &grad,
diff --git a/tools/clang/lib/SPIRV/SpirvContext.cpp b/tools/clang/lib/SPIRV/SpirvContext.cpp
index 6af36eb691..47dfc67433 100644
--- a/tools/clang/lib/SPIRV/SpirvContext.cpp
+++ b/tools/clang/lib/SPIRV/SpirvContext.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #include <algorithm>
@@ -328,6 +331,29 @@ const HybridPointerType *SpirvContext::getPointerType(QualType pointee,
   return result;
 }
 
+const ForwardPointerType *
+SpirvContext::getForwardPointerType(QualType pointee) {
+  assert(hlsl::IsVKBufferPointerType(pointee));
+
+  auto foundPointee = forwardPointerTypes.find(pointee);
+  if (foundPointee != forwardPointerTypes.end()) {
+    return foundPointee->second;
+  }
+
+  return forwardPointerTypes[pointee] = new (this) ForwardPointerType(pointee);
+}
+
+const SpirvPointerType *SpirvContext::getForwardReference(QualType type) {
+  return forwardReferences[type];
+}
+
+void SpirvContext::registerForwardReference(
+    QualType type, const SpirvPointerType *pointerType) {
+  assert(pointerType->getStorageClass() ==
+         spv::StorageClass::PhysicalStorageBuffer);
+  forwardReferences[type] = pointerType;
+}
+
 FunctionType *
 SpirvContext::getFunctionType(const SpirvType *ret,
                               llvm::ArrayRef<const SpirvType *> param) {
diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
index 579af04ea6..7cc84fa2fc 100644
--- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 //  This file implements a SPIR-V emitter class that takes in HLSL AST and emits
@@ -1233,12 +1237,17 @@ SpirvInstruction *SpirvEmitter::doExpr(const Expr *expr,
   } else if (isa<CXXThisExpr>(expr)) {
     assert(curThis);
     result = curThis;
-  } else if (isa<CXXConstructExpr>(expr)) {
+  } else if (const auto *constructExpr = dyn_cast<CXXConstructExpr>(expr)) {
     // For RayQuery type, we should not explicitly initialize it using
     // CXXConstructExpr e.g., RayQuery<0> r = RayQuery<0>() is the same as we do
     // not have a variable initialization. Setting nullptr for the SPIR-V
     // instruction used for expr will let us skip the variable initialization.
-    if (!hlsl::IsHLSLRayQueryType(expr->getType()))
+    if (hlsl::IsVKBufferPointerType(expr->getType())) {
+      const Expr *arg = constructExpr->getArg(0);
+      SpirvInstruction *value = loadIfGLValue(arg, arg->getSourceRange());
+      result = spvBuilder.createConvertUToPtr(value, expr->getType());
+      result->setRValue();
+    } else if (!hlsl::IsHLSLRayQueryType(expr->getType()))
       result = curThis;
   } else if (const auto *unaryExpr = dyn_cast<UnaryExprOrTypeTraitExpr>(expr)) {
     result = doUnaryExprOrTypeTraitExpr(unaryExpr);
@@ -1543,7 +1552,23 @@ void SpirvEmitter::doFunctionDecl(const FunctionDecl *decl) {
   // Create all parameters.
   for (uint32_t i = 0; i < decl->getNumParams(); ++i) {
     const ParmVarDecl *paramDecl = decl->getParamDecl(i);
-    (void)declIdMapper.createFnParam(paramDecl, i + 1 + isNonStaticMemberFn);
+    QualType paramType = paramDecl->getType();
+    auto *param =
+        declIdMapper.createFnParam(paramDecl, i + 1 + isNonStaticMemberFn);
+#ifdef ENABLE_SPIRV_CODEGEN
+    if (hlsl::IsVKBufferPointerType(paramType)) {
+      Optional<bool> isRowMajor = llvm::None;
+      QualType desugaredType = desugarType(paramType, &isRowMajor);
+      if (hlsl::IsVKBufferPointerType(desugaredType)) {
+        spvBuilder.decorateWithLiterals(
+            param,
+            static_cast<unsigned>(paramDecl->hasAttr<VKAliasedPointerAttr>()
+                                      ? spv::Decoration::AliasedPointer
+                                      : spv::Decoration::RestrictPointer),
+            {}, loc);
+      }
+    }
+#endif
   }
 
   if (decl->hasBody()) {
@@ -1644,6 +1669,15 @@ bool SpirvEmitter::validateVKAttributes(const NamedDecl *decl) {
                 loc);
       success = false;
     }
+
+#ifdef ENABLE_SPIRV_CODEGEN
+    if (hlsl::IsVKBufferPointerType(cast<VarDecl>(decl)->getType())) {
+      emitError("vk::push_constant attribute cannot be used on declarations "
+                "with vk::BufferPointer type",
+                loc);
+      success = false;
+    }
+#endif
   }
 
   // vk::shader_record_nv is supported only on cbuffer/ConstantBuffer
@@ -1951,6 +1985,11 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) {
     return;
   }
 
+  if (hlsl::IsVKBufferPointerType(decl->getType()) && !decl->hasInit()) {
+    emitError("vk::BufferPointer has no default constructor", loc);
+    return;
+  }
+
   // We can have VarDecls inside cbuffer/tbuffer. For those VarDecls, we need
   // to emit their cbuffer/tbuffer as a whole and access each individual one
   // using access chains.
@@ -2037,10 +2076,24 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) {
       needsLegalization = true;
   }
 
-  if (var != nullptr && decl->hasAttrs()) {
-    declIdMapper.decorateWithIntrinsicAttrs(decl, var);
-    if (auto attr = decl->getAttr<VKStorageClassExtAttr>()) {
-      var->setStorageClass(static_cast<spv::StorageClass>(attr->getStclass()));
+  if (var != nullptr) {
+    Optional<bool> isRowMajor = llvm::None;
+    QualType desugaredType = desugarType(decl->getType(), &isRowMajor);
+    if (hlsl::IsVKBufferPointerType(desugaredType)) {
+      spvBuilder.decorateWithLiterals(
+          var,
+          static_cast<unsigned>(decl->hasAttr<VKAliasedPointerAttr>()
+                                    ? spv::Decoration::AliasedPointer
+                                    : spv::Decoration::RestrictPointer),
+          {}, loc);
+    }
+
+    if (decl->hasAttrs()) {
+      declIdMapper.decorateWithIntrinsicAttrs(decl, var);
+      if (auto attr = decl->getAttr<VKStorageClassExtAttr>()) {
+        var->setStorageClass(
+            static_cast<spv::StorageClass>(attr->getStclass()));
+      }
     }
   }
 
@@ -3665,6 +3718,12 @@ SpirvInstruction *SpirvEmitter::doCastExpr(const CastExpr *expr,
   }
   case CastKind::CK_ToVoid:
     return nullptr;
+  case CastKind::CK_VK_BufferPointerToIntegral: {
+    return spvBuilder.createConvertPtrToU(doExpr(subExpr, range), toType);
+  }
+  case CastKind::CK_VK_IntegralToBufferPointer: {
+    return spvBuilder.createConvertUToPtr(doExpr(subExpr, range), toType);
+  }
   default:
     emitError("implicit cast kind '%0' unimplemented", expr->getExprLoc())
         << expr->getCastKindName() << expr->getSourceRange();
@@ -5442,6 +5501,8 @@ SpirvEmitter::processIntrinsicMemberCall(const CXXMemberCallExpr *expr,
   case IntrinsicOp::MOP_WorldRayDirection:
   case IntrinsicOp::MOP_WorldRayOrigin:
     return processRayQueryIntrinsics(expr, opcode);
+  case IntrinsicOp::MOP_GetBufferContents:
+    return processIntrinsicGetBufferContents(expr);
   default:
     emitError("intrinsic '%0' method unimplemented",
               expr->getCallee()->getExprLoc())
@@ -7021,6 +7082,12 @@ SpirvInstruction *SpirvEmitter::reconstructValue(SpirvInstruction *srcVal,
   if (const auto *recordType = valType->getAs<RecordType>()) {
     assert(recordType->isStructureType());
 
+    if (isTypeInVkNamespace(recordType) &&
+        recordType->getDecl()->getName().equals("BufferPointer")) {
+      // Uniquely among structs, vk::BufferPointer<T> lowers to a pointer type.
+      return srcVal;
+    }
+
     LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions,
                                       spvBuilder);
     const StructType *spirvStructType =
@@ -9403,6 +9470,14 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
   case hlsl::IntrinsicOp::IOP_EvaluateAttributeSnapped: {
     retVal = processEvaluateAttributeAt(callExpr, hlslOpcode, srcLoc, srcRange);
     break;
+  }
+  case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast: {
+    retVal = processIntrinsicPointerCast(callExpr, false);
+    break;
+  }
+  case hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast: {
+    retVal = processIntrinsicPointerCast(callExpr, true);
+    break;
   }
     INTRINSIC_SPIRV_OP_CASE(ddx, DPdx, true);
     INTRINSIC_SPIRV_OP_CASE(ddx_coarse, DPdxCoarse, false);
@@ -10782,6 +10857,50 @@ SpirvEmitter::processIntrinsicClamp(const CallExpr *callExpr) {
                                       loc, range);
 }
 
+SpirvInstruction *
+SpirvEmitter::processIntrinsicPointerCast(const CallExpr *callExpr,
+                                          bool isStatic) {
+  const Expr *argExpr = callExpr->getArg(0);
+  SpirvInstruction *ptr = doExpr(argExpr);
+  QualType srcType = argExpr->getType();
+  QualType destType = callExpr->getType();
+  QualType srcTypeArg = hlsl::GetVKBufferPointerBufferType(srcType);
+  QualType destTypeArg = hlsl::GetVKBufferPointerBufferType(destType);
+  return srcTypeArg == destTypeArg
+             ? ptr
+             : spvBuilder.createUnaryOp(spv::Op::OpBitcast, destType, ptr,
+                                        callExpr->getExprLoc(),
+                                        callExpr->getSourceRange());
+}
+
+SpirvInstruction *SpirvEmitter::processIntrinsicGetBufferContents(
+    const CXXMemberCallExpr *callExpr) {
+  LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions,
+                                    spvBuilder);
+  Expr *obj = callExpr->getImplicitObjectArgument();
+  SpirvInstruction *bufferPointer = doExpr(obj);
+  if (!bufferPointer)
+    return nullptr;
+  unsigned align = hlsl::GetVKBufferPointerAlignment(obj->getType());
+  lowerTypeVisitor.visitInstruction(bufferPointer);
+
+  const SpirvPointerType *bufferPointerType =
+      dyn_cast<SpirvPointerType>(bufferPointer->getResultType());
+  SpirvLoad *retVal =
+      spvBuilder.createLoad(bufferPointerType->getPointeeType(), bufferPointer,
+                            callExpr->getLocStart());
+  if (!align) {
+    QualType bufferType = hlsl::GetVKBufferPointerBufferType(obj->getType());
+    AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions);
+    uint32_t stride;
+    std::tie(align, std::ignore) = alignmentCalc.getAlignmentAndSize(
+        bufferType, retVal->getLayoutRule(), llvm::None, &stride);
+  }
+  retVal->setAlignment(align);
+  retVal->setRValue(false);
+  return retVal;
+}
+
 SpirvInstruction *
 SpirvEmitter::processIntrinsicMemoryBarrier(const CallExpr *callExpr,
                                             bool isDevice, bool groupSync,
diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h
index eca038527f..0a5ff308c2 100644
--- a/tools/clang/lib/SPIRV/SpirvEmitter.h
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.h
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 //  This file defines a SPIR-V emitter class that takes in HLSL AST and emits
@@ -491,6 +495,15 @@ class SpirvEmitter : public ASTConsumer {
   /// Processes the 'lit' intrinsic function.
   SpirvInstruction *processIntrinsicLit(const CallExpr *);
 
+  /// Processes the 'vk::static_pointer_cast' and 'vk_reinterpret_pointer_cast'
+  /// intrinsic functions.
+  SpirvInstruction *processIntrinsicPointerCast(const CallExpr *,
+                                                bool isStatic);
+
+  /// Processes the vk::BufferPointer intrinsic function 'Get'.
+  SpirvInstruction *
+  processIntrinsicGetBufferContents(const CXXMemberCallExpr *);
+
   /// Processes the 'GroupMemoryBarrier', 'GroupMemoryBarrierWithGroupSync',
   /// 'DeviceMemoryBarrier', 'DeviceMemoryBarrierWithGroupSync',
   /// 'AllMemoryBarrier', and 'AllMemoryBarrierWithGroupSync' intrinsic
diff --git a/tools/clang/lib/SPIRV/SpirvInstruction.cpp b/tools/clang/lib/SPIRV/SpirvInstruction.cpp
index 21aada9e82..6deb11d946 100644
--- a/tools/clang/lib/SPIRV/SpirvInstruction.cpp
+++ b/tools/clang/lib/SPIRV/SpirvInstruction.cpp
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 //  This file implements the in-memory representation of SPIR-V instructions.
@@ -57,6 +61,8 @@ DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantInteger)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantFloat)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantComposite)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantNull)
+DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertPtrToU)
+DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertUToPtr)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvUndef)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvCompositeConstruct)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvCompositeExtract)
@@ -620,6 +626,28 @@ bool SpirvConstantNull::operator==(const SpirvConstantNull &that) const {
          astResultType == that.astResultType;
 }
 
+SpirvConvertPtrToU::SpirvConvertPtrToU(SpirvInstruction *ptr, QualType type,
+                                       SourceLocation loc, SourceRange range)
+    : SpirvInstruction(IK_ConvertPtrToU, spv::Op::OpConvertPtrToU, type, loc,
+                       range),
+      ptr(ptr) {}
+
+bool SpirvConvertPtrToU::operator==(const SpirvConvertPtrToU &that) const {
+  return opcode == that.opcode && resultType == that.resultType &&
+         astResultType == that.astResultType && ptr == that.ptr;
+}
+
+SpirvConvertUToPtr::SpirvConvertUToPtr(SpirvInstruction *val, QualType type,
+                                       SourceLocation loc, SourceRange range)
+    : SpirvInstruction(IK_ConvertUToPtr, spv::Op::OpConvertUToPtr, type, loc,
+                       range),
+      val(val) {}
+
+bool SpirvConvertUToPtr::operator==(const SpirvConvertUToPtr &that) const {
+  return opcode == that.opcode && resultType == that.resultType &&
+         astResultType == that.astResultType && val == that.val;
+}
+
 SpirvUndef::SpirvUndef(QualType type)
     : SpirvInstruction(IK_Undef, spv::Op::OpUndef, type,
                        /*SourceLocation*/ {}) {}
diff --git a/tools/clang/lib/Sema/SemaCast.cpp b/tools/clang/lib/Sema/SemaCast.cpp
index 10668dc388..f5a864e2b6 100644
--- a/tools/clang/lib/Sema/SemaCast.cpp
+++ b/tools/clang/lib/Sema/SemaCast.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 //  This file implements semantic analysis for cast expressions, including
@@ -1543,6 +1546,20 @@ TryStaticImplicitCast(Sema &Self, ExprResult &SrcExpr, QualType DestType,
   
   if (InitSeq.isConstructorInitialization())
     Kind = CK_ConstructorConversion;
+#ifdef ENABLE_SPIRV_CODEGEN
+  // Special cases for vk::BufferPointer.
+  else if (hlsl::IsVKBufferPointerType(SrcExpr.get()->getType()) &&
+           DestType->isIntegerType() && CCK == Sema::CCK_CStyleCast) {
+    Kind = CK_VK_BufferPointerToIntegral;
+    SrcExpr = Result;
+    return TC_Success;
+  } else if (hlsl::IsVKBufferPointerType(DestType) &&
+             SrcExpr.get()->getType()->isIntegerType()) {
+    Kind = CK_VK_IntegralToBufferPointer;
+    SrcExpr = Result;
+    return TC_Success;
+  }
+#endif
   else
     Kind = CK_NoOp;
   
diff --git a/tools/clang/lib/Sema/SemaExprCXX.cpp b/tools/clang/lib/Sema/SemaExprCXX.cpp
index f46bb0ad9f..4723bc93e9 100644
--- a/tools/clang/lib/Sema/SemaExprCXX.cpp
+++ b/tools/clang/lib/Sema/SemaExprCXX.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 ///
 /// \file
@@ -1052,6 +1055,31 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
   // corresponding cast expression.
   if (Exprs.size() == 1 && !ListInitialization) {
     Expr *Arg = Exprs[0];
+#ifdef ENABLE_SPIRV_CODEGEN
+    if (hlsl::IsVKBufferPointerType(Ty) && Arg->getType()->isIntegerType()) {
+      for (auto *ctor : Ty->getAsCXXRecordDecl()->ctors()) {
+        if (auto *functionType = ctor->getType()->getAs<FunctionProtoType>()) {
+          if (functionType->getNumParams() != 1 ||
+              !functionType->getParamType(0)->isIntegerType())
+            continue;
+
+          CanQualType argType = Arg->getType()->getCanonicalTypeUnqualified();
+          if (!Arg->isRValue()) {
+            Arg = ImpCastExprToType(Arg, argType, CK_LValueToRValue).get();
+          }
+          if (argType != Context.UnsignedLongLongTy) {
+            Arg = ImpCastExprToType(Arg, Context.UnsignedLongLongTy,
+                                    CK_IntegralCast)
+                      .get();
+          }
+          return CXXConstructExpr::Create(
+              Context, Ty, TyBeginLoc, ctor, false, {Arg}, false, false, false,
+              false, CXXConstructExpr::ConstructionKind::CK_Complete,
+              SourceRange(LParenLoc, RParenLoc));
+        }
+      }
+    }
+#endif
     return BuildCXXFunctionalCastExpr(TInfo, LParenLoc, Arg, RParenLoc);
   }
 
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index d20daa0ac0..f001cb70d9 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -6,6 +6,9 @@
 // This file is distributed under the University of Illinois Open Source     //
 // License. See LICENSE.TXT for details.                                     //
 //                                                                           //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.              //
+// All rights reserved.                                                      //
+//                                                                           //
 //  This file implements the semantic support for HLSL.                      //
 //                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
@@ -195,6 +198,7 @@ enum ArBasicKind {
   AR_OBJECT_VK_LITERAL,
   AR_OBJECT_VK_SPV_INTRINSIC_TYPE,
   AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID,
+  AR_OBJECT_VK_BUFFER_POINTER,
 #endif // ENABLE_SPIRV_CODEGEN
   // SPIRV change ends
 
@@ -550,6 +554,7 @@ const UINT g_uBasicKindProps[] = {
     BPROP_OBJECT,                 // AR_OBJECT_VK_LITERAL,
     BPROP_OBJECT, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE use recordType
     BPROP_OBJECT, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID use recordType
+    BPROP_OBJECT, // AR_OBJECT_VK_BUFFER_POINTER use recordType
 #endif            // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
@@ -1232,6 +1237,9 @@ static const ArBasicKind g_AnyOutputRecordCT[] = {
 static const ArBasicKind g_DxHitObjectCT[] = {AR_OBJECT_HIT_OBJECT,
                                               AR_BASIC_UNKNOWN};
 
+static const ArBasicKind g_VKBufferPointerCT[] = {AR_OBJECT_VK_BUFFER_POINTER,
+                                                  AR_BASIC_UNKNOWN};
+
 // Basic kinds, indexed by a LEGAL_INTRINSIC_COMPTYPES value.
 const ArBasicKind *g_LegalIntrinsicCompTypes[] = {
     g_NullCT,               // LICOMPTYPE_VOID
@@ -1287,6 +1295,7 @@ const ArBasicKind *g_LegalIntrinsicCompTypes[] = {
     g_GroupNodeOutputRecordsCT,  // LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS
     g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS
     g_DxHitObjectCT,             // LICOMPTYPE_HIT_OBJECT
+    g_VKBufferPointerCT,         // LICOMPTYPE_VK_BUFFER_POINTER
 };
 static_assert(
     ARRAYSIZE(g_LegalIntrinsicCompTypes) == LICOMPTYPE_COUNT,
@@ -1345,6 +1354,7 @@ static const ArBasicKind g_ArBasicKindsAsTypes[] = {
     AR_OBJECT_VK_SPIRV_TYPE, AR_OBJECT_VK_SPIRV_OPAQUE_TYPE,
     AR_OBJECT_VK_INTEGRAL_CONSTANT, AR_OBJECT_VK_LITERAL,
     AR_OBJECT_VK_SPV_INTRINSIC_TYPE, AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID,
+    AR_OBJECT_VK_BUFFER_POINTER,
 #endif // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
@@ -1451,6 +1461,7 @@ static const uint8_t g_ArBasicKindsTemplateCount[] = {
     1, // AR_OBJECT_VK_LITERAL,
     1, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE
     1, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID
+    2, // AR_OBJECT_VK_BUFFER_POINTER
 #endif // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
@@ -1599,6 +1610,7 @@ static const SubscriptOperatorRecord g_ArBasicKindsSubscripts[] = {
     {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_LITERAL,
     {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE
     {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID
+    {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_BUFFER_POINTER
 #endif                           // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
@@ -1763,6 +1775,7 @@ static const char *g_ArBasicTypeNames[] = {
     "Literal",
     "ext_type",
     "ext_result_id",
+    "BufferPointer",
 #endif // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
@@ -2981,6 +2994,7 @@ class HLSLExternalSource : public ExternalSemaSource {
 
   ClassTemplateDecl *m_vkIntegralConstantTemplateDecl;
   ClassTemplateDecl *m_vkLiteralTemplateDecl;
+  ClassTemplateDecl *m_vkBufferPointerTemplateDecl;
 
   // Declarations for Work Graph Output Record types
   ClassTemplateDecl *m_GroupNodeOutputRecordsTemplateDecl;
@@ -3486,6 +3500,25 @@ class HLSLExternalSource : public ExternalSemaSource {
         templateTypeParmDecls.push_back(templateTypeParmDecl);
         continue;
       }
+      if (pArgs[i].uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) {
+        if (TInfo == nullptr) {
+          TInfo = m_sema->getASTContext().CreateTypeSourceInfo(
+              m_context->UnsignedIntTy, 0);
+        }
+        IdentifierInfo *idT = &context.Idents.get("T");
+        IdentifierInfo *idA = &context.Idents.get("A");
+        TemplateTypeParmDecl *templateTypeParmDecl =
+            TemplateTypeParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc, 0,
+                                         0, idT, TypenameTrue,
+                                         ParameterPackFalse);
+        NonTypeTemplateParmDecl *nonTypeTemplateParmDecl =
+            NonTypeTemplateParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc,
+                                            0, 1, idA, context.UnsignedIntTy,
+                                            ParameterPackFalse, TInfo);
+        templateTypeParmDecl->setDefaultArgument(TInfo);
+        templateTypeParmDecls.push_back(templateTypeParmDecl);
+        templateTypeParmDecls.push_back(nonTypeTemplateParmDecl);
+      }
     }
     return templateTypeParmDecls;
   }
@@ -3554,6 +3587,19 @@ class HLSLExternalSource : public ExternalSemaSource {
       case LICOMPTYPE_HIT_OBJECT:
         paramTypes.push_back(GetBasicKindType(AR_OBJECT_HIT_OBJECT));
         break;
+      case LICOMPTYPE_VK_BUFFER_POINTER: {
+        const ArBasicKind *match =
+            std::find(g_ArBasicKindsAsTypes,
+                      &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)],
+                      AR_OBJECT_VK_BUFFER_POINTER);
+        DXASSERT(match !=
+                     &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)],
+                 "otherwise can't find constant in basic kinds");
+        size_t index = match - g_ArBasicKindsAsTypes;
+        paramTypes.push_back(
+            m_sema->getASTContext().getTypeDeclType(m_objectTypeDecls[index]));
+        break;
+      }
       default:
         DXASSERT(false, "Argument type of intrinsic function is not "
                         "supported");
@@ -3932,6 +3978,12 @@ class HLSLExternalSource : public ExternalSemaSource {
         recordDecl = DeclareTemplateTypeWithHandleInDeclContext(
             *m_context, m_vkNSDecl, typeName, 1, nullptr);
         recordDecl->setImplicit(true);
+      } else if (kind == AR_OBJECT_VK_BUFFER_POINTER) {
+        if (!m_vkNSDecl)
+          continue;
+        recordDecl = DeclareVkBufferPointerType(*m_context, m_vkNSDecl);
+        recordDecl->setImplicit(true);
+        m_vkBufferPointerTemplateDecl = recordDecl->getDescribedClassTemplate();
       }
 #endif
       else if (templateArgCount == 0) {
@@ -4044,7 +4096,8 @@ class HLSLExternalSource : public ExternalSemaSource {
   HLSLExternalSource()
       : m_matrixTemplateDecl(nullptr), m_vectorTemplateDecl(nullptr),
         m_vkIntegralConstantTemplateDecl(nullptr),
-        m_vkLiteralTemplateDecl(nullptr), m_hlslNSDecl(nullptr),
+        m_vkLiteralTemplateDecl(nullptr),
+        m_vkBufferPointerTemplateDecl(nullptr), m_hlslNSDecl(nullptr),
         m_vkNSDecl(nullptr), m_dxNSDecl(nullptr), m_context(nullptr),
         m_sema(nullptr), m_hlslStringTypedef(nullptr) {
     memset(m_matrixTypes, 0, sizeof(m_matrixTypes));
@@ -4802,7 +4855,8 @@ class HLSLExternalSource : public ExternalSemaSource {
     case AR_OBJECT_NODE_OUTPUT_ARRAY:
     case AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY:
     case AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS:
-    case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS: {
+    case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS:
+    case AR_OBJECT_VK_BUFFER_POINTER: {
       const ArBasicKind *match = std::find(
           g_ArBasicKindsAsTypes,
           &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], kind);
@@ -5318,6 +5372,8 @@ class HLSLExternalSource : public ExternalSemaSource {
               << type << GetMatrixOrVectorElementType(type);
         }
         return valid;
+      } else if (hlsl::IsVKBufferPointerType(qt)) {
+        return true;
       } else if (qt->isStructureOrClassType()) {
         const RecordType *recordType = qt->getAs<RecordType>();
         objectKind = ClassifyRecordType(recordType);
@@ -6790,6 +6846,7 @@ bool HLSLExternalSource::MatchArguments(
   if (pIntrinsic->pArgs[0].qwUsage &&
       pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_TYPE &&
       pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_FUNCTION &&
+      pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_FUNCTION_2 &&
       pIntrinsic->pArgs[0].uComponentTypeId !=
           INTRIN_COMPTYPE_FROM_NODEOUTPUT) {
     CAB(pIntrinsic->pArgs[0].uTemplateId < MaxIntrinsicArgs, 0);
@@ -6830,7 +6887,8 @@ bool HLSLExternalSource::MatchArguments(
 
     // Check template.
     if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_TYPE ||
-        pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION) {
+        pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION ||
+        pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) {
       continue; // Already verified that this is available.
     }
     if (pArgument->uLegalComponentTypes == LICOMPTYPE_USER_DEFINED_TYPE) {
@@ -6999,6 +7057,14 @@ bool HLSLExternalSource::MatchArguments(
       } else {
         pNewType = functionTemplateTypeArg;
       }
+    } else if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) {
+      if (i == 0 &&
+          (builtinOp == hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast ||
+           builtinOp == hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast)) {
+        pNewType = Args[0]->getType();
+      } else {
+        badArgIdx = std::min(badArgIdx, i);
+      }
     } else if (pArgument->uLegalComponentTypes ==
                LICOMPTYPE_USER_DEFINED_TYPE) {
       if (objectElement.isNull()) {
@@ -9685,6 +9751,11 @@ bool HLSLExternalSource::CanConvert(SourceLocation loc, Expr *sourceExpr,
     return false;
   }
 
+  // Cast vk::BufferPointer to pointer address.
+  if (SourceInfo.EltKind == AR_OBJECT_VK_BUFFER_POINTER) {
+    return TargetInfo.EltKind == AR_BASIC_UINT64;
+  }
+
   // Cast cbuffer to its result value.
   if ((SourceInfo.EltKind == AR_OBJECT_CONSTANT_BUFFER ||
        SourceInfo.EltKind == AR_OBJECT_TEXTURE_BUFFER) &&
@@ -11533,6 +11604,30 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) {
   return false;
 }
 
+static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE,
+                                     bool isStatic) {
+  const Expr *argExpr = CE->getArg(0);
+  QualType srcType = argExpr->getType();
+  QualType destType = CE->getType();
+  QualType srcTypeArg = hlsl::GetVKBufferPointerBufferType(srcType);
+  QualType destTypeArg = hlsl::GetVKBufferPointerBufferType(destType);
+
+  if (isStatic && srcTypeArg != destTypeArg &&
+      !S.IsDerivedFrom(srcTypeArg, destTypeArg)) {
+    S.Diags.Report(CE->getExprLoc(),
+                   diag::err_hlsl_vk_static_pointer_cast_type);
+    return true;
+  }
+
+  if (hlsl::GetVKBufferPointerAlignment(destType) >
+      hlsl::GetVKBufferPointerAlignment(srcType)) {
+    S.Diags.Report(CE->getExprLoc(), diag::err_hlsl_vk_pointer_cast_alignment);
+    return true;
+  }
+
+  return false;
+}
+
 // Check HLSL call constraints, not fatal to creating the AST.
 void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
                                  const FunctionProtoType *Proto) {
@@ -11551,6 +11646,12 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
   case hlsl::IntrinsicOp::IOP_Barrier:
     CheckBarrierCall(*this, FDecl, TheCall);
     break;
+  case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast:
+    CheckVKBufferPointerCast(*this, FDecl, TheCall, false);
+    break;
+  case hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast:
+    CheckVKBufferPointerCast(*this, FDecl, TheCall, true);
+    break;
   default:
     break;
   }
@@ -13801,6 +13902,10 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A,
         A.getRange(), S.Context, A.getAttributeSpellingListIndex());
     break;
   // SPIRV Change Starts
+  case AttributeList::AT_VKAliasedPointer: {
+    declAttr = ::new (S.Context) VKAliasedPointerAttr(
+        A.getRange(), S.Context, A.getAttributeSpellingListIndex());
+  } break;
   case AttributeList::AT_VKDecorateIdExt: {
     if (A.getNumArgs() == 0 || !A.getArg(0).is<clang::Expr *>()) {
       Handled = false;
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl
new file mode 100644
index 0000000000..f0f5c54a16
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl
@@ -0,0 +1,28 @@
+// RUN: %dxc -spirv -E main -T cs_6_7 %s | FileCheck %s
+
+// Bug was causing alignment miss
+
+struct Content {
+  int a;
+};
+
+typedef vk::BufferPointer<Content> BufferContent;
+typedef vk::BufferPointer<BufferContent> BufferBuffer;
+
+RWStructuredBuffer<BufferBuffer> rwbuf;
+
+void foo(BufferContent bc) {
+  bc.Get().a = 1;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  foo(rwbuf[0].Get());
+}
+
+// CHECK: [[L0:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} %{{[_0-9A-Za-z]*}} Aligned 8
+// CHECK: [[L1:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} [[L0]] Aligned 8
+// CHECK: [[L2:%[_0-9A-Za-z]*]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[L1]] %int_0
+// CHECK: OpStore [[L2]] %int_1 Aligned 4
+
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl
new file mode 100644
index 0000000000..fc5b9edad0
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl
@@ -0,0 +1,72 @@
+// RUN: %dxc -spirv -Od -T ps_6_0 -E MainPs %s | FileCheck %s
+
+struct Globals_s
+{
+    float4 g_vSomeConstantA;
+    float4 g_vTestFloat4;
+    float4 g_vSomeConstantB;
+};
+
+typedef vk::BufferPointer<Globals_s> Globals_p;
+
+struct TestPushConstant_t
+{
+    Globals_p m_nBufferDeviceAddress;
+};
+
+[[vk::push_constant]] TestPushConstant_t g_PushConstants;
+
+cbuffer cbuf {
+    [[vk::aliased_pointer]] Globals_p bp;
+}
+
+// CHECK: OpDecorate [[BP0:%[_0-9A-Za-z]*]] AliasedPointer
+// CHECK: OpDecorate [[BP1:%[_0-9A-Za-z]*]] AliasedPointer
+// CHECK: OpDecorate [[BP:%[_0-9A-Za-z]*]] AliasedPointer
+// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32
+// CHECK-DAG: [[F1:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 1
+// CHECK-DAG: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0
+// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4
+// CHECK: [[V4C:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F1]] [[F0]] [[F0]] [[F0]]
+// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK-DAG: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0
+// CHECK-DAG: [[I1:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 1
+// CHECK: [[GS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]]
+// CHECK: [[PGS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GS]]
+// CHECK: [[TT:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGS]]
+// CHECK: [[PTT:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[TT]]
+// CHECK: [[PFV4FLOAT:%[_0-9A-Za-z]*]] = OpTypePointer Function [[V4FLOAT]]
+// CHECK: [[PPGS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGS]]
+// CHECK: [[PBV4FLOAT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]]
+
+void f([[vk::aliased_pointer]] Globals_p bp) {
+}
+
+float4 MainPs(void) : SV_Target0
+{
+    float4 vTest = float4(1.0,0.0,0.0,0.0);
+    [[vk::aliased_pointer]] Globals_p bp0 = Globals_p(g_PushConstants.m_nBufferDeviceAddress);
+    [[vk::aliased_pointer]] Globals_p bp1 = Globals_p(g_PushConstants.m_nBufferDeviceAddress);
+    bp0.Get().g_vTestFloat4 = vTest;
+    f(bp0);
+    return bp1.Get().g_vTestFloat4; // Returns float4(1.0,0.0,0.0,0.0)
+}
+
+// CHECK: [[GP:%[_0-9A-Za-z]*]] = OpVariable [[PTT]] PushConstant
+// CHECK: [[VTEST:%[0-9A-Za-z]*]] = OpVariable [[PFV4FLOAT]] Function
+// CHECK: OpStore [[VTEST]] [[V4C]]
+// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGS]] [[GP]] [[I0]]
+// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad %_ptr_PhysicalStorageBuffer_Globals_s [[X1]]
+// CHECK: OpStore [[BP0]] [[X2]]
+// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGS]] [[GP]] [[I0]]
+// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[X3]]
+// CHECK: OpStore [[BP1]] [[X4]]
+// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[VTEST]]
+// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP0]] Aligned 16
+// CHECK: [[X7:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X6]] [[I1]]
+// CHECK: OpStore [[X7]] [[X5]] Aligned 16
+// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP1]] Aligned 16
+// CHECK: [[X9:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X8]] [[I1]]
+// CHECK: [[X10:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X9]] Aligned 16
+// CHECK: OpReturnValue [[X10]]
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl
new file mode 100644
index 0000000000..992d8b39fd
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl
@@ -0,0 +1,39 @@
+// RUN: %dxc -spirv -fcgl -T ps_6_0 %s | FileCheck %s
+
+struct S {
+  uint u;
+};
+
+typedef vk::BufferPointer<S> BP;
+
+struct PC {
+  BP bp;
+};
+
+[[vk::push_constant]] PC pc;
+
+// CHECK: [[UINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 0
+// CHECK: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0
+// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0
+// CHECK: [[S:%[_0-9A-Za-z]*]] = OpTypeStruct [[UINT]]
+// CHECK: [[PS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[S]]
+// CHECK: [[PU:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[UINT]]
+// CHECK: [[U1:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 1
+// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpVariable %{{[_0-9A-Za-z]*}} PushConstant
+
+void main()
+{
+// CHECK: [[IN:%[_0-9A-Za-z]*]] = OpVariable
+// CHECK: [[OUT:%[_0-9A-Za-z]*]] = OpVariable
+  uint u0, u1;
+
+// CHECK: [[X1:%[_0-9]+]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[PC]] [[I0]]
+// CHECK: [[X2:%[_0-9]+]] = OpLoad [[PS]] [[X1]] Aligned 4
+// CHECK: [[X3:%[_0-9]+]] = OpAccessChain [[PU]] [[X2]] [[I0]]
+// CHECK: [[X4:%[_0-9]+]] = OpLoad [[UINT]] [[IN]]
+// CHECK: [[X5:%[_0-9]+]] = OpAtomicExchange [[UINT]] [[X3]] [[U1]] [[U0]] [[X4]]
+// CHECK: OpStore [[OUT]] [[X5]]
+  InterlockedExchange(pc.bp.Get().u, u0, u1);
+}
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl
new file mode 100644
index 0000000000..86cf48c41e
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl
@@ -0,0 +1,19 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Content {
+  float a;
+};
+
+typedef vk::BufferPointer<Content> BufferContent;
+
+[[vk::push_constant]]
+BufferContent buffer;
+
+[numthreads(1, 1, 1)]
+void main() {
+  float tmp = buffer.Get().a;
+  buffer.Get().a = tmp;
+}
+
+// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl
new file mode 100644
index 0000000000..09585a7664
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl
@@ -0,0 +1,19 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Globals_s {
+  float4 a;
+};
+
+typedef vk::BufferPointer<Globals_s> Globals_p;
+typedef vk::BufferPointer<Globals_p> Globals_pp;
+
+[[vk::push_constant]]
+Globals_pp bda;
+
+[numthreads(1, 1, 1)]
+void main() {
+  float4 r = bda.Get().Get().a;
+}
+
+// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl
new file mode 100644
index 0000000000..e803b5b754
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl
@@ -0,0 +1,19 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Content {
+  uint a;
+};
+
+typedef vk::BufferPointer<uint> BufferContent;
+
+[[vk::push_constant]]
+BufferContent buffer;
+
+[numthreads(1, 1, 1)]
+void main() {
+  uint data = buffer.Get();
+  buffer.Get() = data;
+}
+
+// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl
new file mode 100644
index 0000000000..1029aa7f2e
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl
@@ -0,0 +1,18 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Content {
+  uint a;
+};
+
+typedef vk::BufferPointer<uint> BufferContent;
+
+[[vk::push_constant]]
+BufferContent buffer;
+
+[numthreads(1, 1, 1)]
+void main() {
+  buffer.Get() = 1;
+}
+
+// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl
new file mode 100644
index 0000000000..62bdb7f3cb
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl
@@ -0,0 +1,26 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Content {
+  int a;
+};
+
+typedef vk::BufferPointer<Content> BufferContent;
+typedef vk::BufferPointer<BufferContent> BufferBuffer;
+
+//[[vk::push_constant]]
+//BufferContent buffer;
+
+RWStructuredBuffer<BufferBuffer> rwbuf;
+
+// Wrong type in the parameter.
+void foo(BufferContent bc) {
+  bc.Get().a = 1;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  foo(rwbuf[0]);
+}
+
+// CHECK: no matching function for call to 'foo'
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl
new file mode 100644
index 0000000000..a89b286edf
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl
@@ -0,0 +1,23 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Content {
+  int a;
+};
+
+typedef vk::BufferPointer<Content> BufferContent;
+typedef vk::BufferPointer<BufferContent> BufferBuffer;
+
+RWStructuredBuffer<BufferContent> buf;
+
+void foo(const BufferContent bc) {
+  bc.Get().a = 1;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  static BufferContent bcs = buf[0];
+  static BufferBuffer bbs = (BufferContent)bcs;
+}
+
+// CHECK: cannot initialize a variable of type 'BufferPointer<BufferContent>' with an lvalue of type 'BufferPointer<Content>'
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl
new file mode 100644
index 0000000000..71fee1a795
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl
@@ -0,0 +1,101 @@
+// RUN: %dxc -spirv -Od -T ps_6_0 -E MainPs %s | FileCheck %s
+
+// CHECK: OpCapability PhysicalStorageBufferAddresses
+// CHECK: OpExtension "SPV_KHR_physical_storage_buffer"
+// CHECK: OpMemoryModel PhysicalStorageBuffer64 GLSL450
+// CHECK: OpEntryPoint Fragment [[MAIN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]]
+
+// Forward declaration
+typedef struct block_s block_t;
+typedef vk::BufferPointer<block_t, 32> block_p;
+
+struct block_s
+{
+      float4 x;
+      block_p next;
+};
+
+struct TestPushConstant_t
+{
+      block_p root;
+};
+
+[[vk::push_constant]] TestPushConstant_t g_PushConstants;
+
+// CHECK: OpDecorate [[GP:%[_0-9A-Za-z]*]] AliasedPointer
+// CHECK: OpDecorate [[COPY1:%[_0-9A-Za-z]*]] RestrictPointer
+// CHECK: OpDecorate [[COPY2:%[_0-9A-Za-z]*]] RestrictPointer
+// CHECK: OpMemberDecorate [[BLOCK:%[_0-9A-Za-z]*]] 1 Offset 16
+// CHECK: OpTypeForwardPointer [[PBLOCK:%[_0-9A-Za-z]*]] PhysicalStorageBuffer
+// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0
+// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1
+// CHECK: [[ULONG:%[_0-9A-Za-z]*]] = OpTypeInt 64 0
+// CHECK: [[UL0:%[_0-9A-Za-z]*]] = OpConstant [[ULONG]] 0
+// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32
+// CHECK: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0
+// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4
+// CHECK: [[CV4FLOAT:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F0]] [[F0]] [[F0]] [[F0]]
+// CHECK: [[BLOCK]] = OpTypeStruct [[V4FLOAT]] [[PBLOCK]]
+// CHECK: [[PBLOCK]] = OpTypePointer PhysicalStorageBuffer [[BLOCK]]
+// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PBLOCK]]
+// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]]
+// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]]
+// CHECK: [[PPBLOCK0:%[_0-9A-Za-z]*]] = OpTypePointer Function %_ptr_PhysicalStorageBuffer_block_s
+// CHECK: [[PPBLOCK1:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PBLOCK]]
+// CHECK: [[PPBLOCK2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[PBLOCK]]
+// CHECK: [[BOOL:%[_0-9A-Za-z]*]] = OpTypeBool
+// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]]
+// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant
+// CHECK: [[OUT]] = OpVariable [[PV4FLOAT1]] Output
+
+[numthreads(1,1,1)]
+float4 MainPs(void) : SV_Target0
+{
+  if (__has_feature(hlsl_vk_buffer_pointer)) {
+      [[vk::aliased_pointer]] block_p g_p =
+          vk::static_pointer_cast<block_t, 16>(g_PushConstants.root);
+      g_p = g_p.Get().next;
+      uint64_t addr = (uint64_t)g_p;
+      block_p copy1 = block_p(addr);
+      block_p copy2 = block_p(copy1);
+      if (addr == 0) // Null pointer test
+          return float4(0.0,0.0,0.0,0.0);
+      return g_p.Get().x;
+  }
+  return float4(0.0,0.0,0.0,0.0);
+}
+
+// CHECK: [[MAIN]] = OpFunction
+// CHECK-NEXT: OpLabel
+// CHECK-NEXT: [[RESULT:%[_0-9A-Za-z]*]] = OpFunctionCall [[V4FLOAT]] [[FUN:%[_0-9A-Za-z]*]]
+// CHECK: OpStore [[OUT]] [[RESULT]]
+// CHECK: OpFunctionEnd
+// CHECK: [[FUN]] = OpFunction [[V4FLOAT]]
+// CHECK: [[GP]] = OpVariable [[PPBLOCK0]] Function
+// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK1]] [[GPC]] [[S0]]
+// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X1]]
+// CHECK: OpStore [[GP]] [[X2]]
+// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32
+// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK2]] [[X3]] [[S1]]
+// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X4]] Aligned 8
+// CHECK: OpStore [[GP]] [[X5]]
+// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]]
+// CHECK: [[X7:%[_0-9A-Za-z]*]] = OpConvertPtrToU [[ULONG]] [[X6]]
+// CHECK: OpStore [[ADDR:%[_0-9A-Za-z]*]] [[X7]]
+// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[ULONG]] [[ADDR]]
+// CHECK: [[X9:%[_0-9A-Za-z]*]] = OpConvertUToPtr [[PBLOCK]] [[X8]]
+// CHECK: OpStore [[COPY1]] [[X9]]
+// CHECK: [[X10:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[COPY1]]
+// CHECK: OpStore [[COPY2]] [[X10]]
+// CHECK: [[X11:%[_0-9A-Za-z]*]] = OpLoad [[ULONG]] [[ADDR]]
+// CHECK: [[X12:%[_0-9A-Za-z]*]] = OpIEqual %bool [[X11]] [[UL0]]
+// CHECK: OpBranchConditional [[X12]] [[IF_TRUE:%[_0-9A-Za-z]*]] [[IF_MERGE:%[_0-9A-Za-z]*]]
+// CHECK: [[IF_TRUE]] = OpLabel
+// CHECK: OpReturnValue [[CV4FLOAT]]
+// CHECK: [[IF_MERGE]] = OpLabel
+// CHECK: [[X13:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32
+// CHECK: [[X14:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X13]] [[S0]]
+// CHECK: [[X15:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X14]] Aligned 16
+// CHECK: OpReturnValue [[X15]]
+// CHECK: OpFunctionEnd
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl
new file mode 100644
index 0000000000..c7d6f0ed2b
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl
@@ -0,0 +1,48 @@
+// RUN: %dxc -spirv -T ps_6_0 -E MainPs %s | FileCheck %s
+
+// CHECK: OpEntryPoint Fragment [[FUN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]]
+
+struct Globals_s
+{
+      float4 g_vSomeConstantA;
+      float4 g_vTestFloat4;
+      float4 g_vSomeConstantB;
+};
+
+typedef vk::BufferPointer<Globals_s> Globals_p;
+
+struct TestPushConstant_t
+{
+      Globals_p m_nBufferDeviceAddress;
+};
+
+[[vk::push_constant]] TestPushConstant_t g_PushConstants;
+
+// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0
+// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1
+// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32
+// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4
+// CHECK: [[GLOBALS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]]
+// CHECK: [[PGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GLOBALS]]
+// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGLOBALS]]
+// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]]
+// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]]
+// CHECK: [[PPGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGLOBALS]]
+// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]]
+// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant
+// CHECK-DAG: [[OUT]] = OpVariable [[PV4FLOAT1]] Output
+
+float4 MainPs(void) : SV_Target0
+{
+      float4 vTest = g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4;
+      return vTest;
+}
+
+// CHECK: [[FUN]] = OpFunction
+// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGLOBALS]] [[GPC]] [[S0]]
+// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]]
+// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]]
+// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X3]] Aligned 16
+// CHECK: OpStore [[OUT]] [[X4]]
+// CHECK: OpFunctionEnd
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl
new file mode 100644
index 0000000000..b2efd02cbd
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl
@@ -0,0 +1,52 @@
+// RUN: %dxc -spirv -T ps_6_0 -E MainPs %s | FileCheck %s
+
+// CHECK: OpEntryPoint Fragment [[FUN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]]
+
+struct Globals_s
+{
+      float4 g_vSomeConstantA;
+      float4 g_vTestFloat4;
+      float4 g_vSomeConstantB;
+};
+
+typedef vk::BufferPointer<Globals_s> Globals_p;
+
+struct TestPushConstant_t
+{
+      Globals_p m_nBufferDeviceAddress;
+};
+
+[[vk::push_constant]] TestPushConstant_t g_PushConstants;
+
+// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32
+// CHECK-DAG: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0
+// CHECK-DAG: [[F1:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 1
+// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4
+// CHECK-DAG: [[CV4FLOAT:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F1]] [[F0]] [[F0]] [[F0]]
+// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0
+// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1
+// CHECK: [[GLOBALS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]]
+// CHECK: [[PGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GLOBALS]]
+// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGLOBALS]]
+// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]]
+// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]]
+// CHECK: [[PPGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGLOBALS]]
+// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]]
+// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant
+// CHECK-DAG: [[OUT]] = OpVariable [[PV4FLOAT1]] Output
+
+float4 MainPs(void) : SV_Target0
+{
+      float4 vTest = float4(1.0,0.0,0.0,0.0);
+      g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4 = vTest;
+      return vTest;
+}
+
+// CHECK: [[FUN]] = OpFunction
+// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGLOBALS]] [[GPC]] [[S0]]
+// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]]
+// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]]
+// CHECK: OpStore [[X3]] [[CV4FLOAT]] Aligned 16
+// CHECK: OpStore [[OUT]] [[CV4FLOAT]]
+// CHECK: OpFunctionEnd
diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt
index 0ca5b0716b..55c3643d95 100644
--- a/utils/hct/gen_intrin_main.txt
+++ b/utils/hct/gen_intrin_main.txt
@@ -1,6 +1,9 @@
 // Copyright (C) Microsoft Corporation. All rights reserved.
 // This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 // See hctdb.py for the implementation of intrinsic file processing.
 //
 // Intrinsic declarations are grouped into namespaces that
@@ -393,7 +396,13 @@ void [[]] RawBufferStore(in u64 addr, in $funcT value);
 void [[]] RawBufferStore(in u64 addr, in $funcT value, in uint alignment);
 void [[]] ext_execution_mode(in uint mode, ...);
 void [[]] ext_execution_mode_id(in uint mode, ...);
+$funcT2 [[]] static_pointer_cast(in VkBufferPointer ptr);
+$funcT2 [[]] reinterpret_pointer_cast(in VkBufferPointer ptr);
+
+} namespace
 
+namespace BufferPointerMethods {
+$classT [[ro]] GetBufferContents();
 } namespace
 // SPIRV Change Ends
 
@@ -1147,4 +1156,3 @@ $classT [[]] SubpassLoad(in int sample) : subpassinputms_load;
 } namespace
 
 // SPIRV Change Ends
-
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 05bc7d472d..5eb35fb52a 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -1,5 +1,7 @@
 # Copyright (C) Microsoft Corporation. All rights reserved.
 # This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details.
+# Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+# All rights reserved.
 ###############################################################################
 # DXIL information.                                                           #
 ###############################################################################
@@ -8584,6 +8586,7 @@ def __init__(self, intrinsic_defs, opcode_data):
             "GroupNodeOutputRecords": "LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS",
             "ThreadNodeOutputRecords": "LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS",
             "DxHitObject": "LICOMPTYPE_HIT_OBJECT",
+            "VkBufferPointer": "LICOMPTYPE_VK_BUFFER_POINTER",
         }
 
         self.trans_rowcol = {"r": "IA_R", "c": "IA_C", "r2": "IA_R2", "c2": "IA_C2"}
@@ -8645,7 +8648,8 @@ def load_intrinsics(self, intrinsic_defs):
             (?:RW)?(?:Texture\w*|ByteAddressBuffer) |
             acceleration_struct | ray_desc | RayQuery | DxHitObject |
             Node\w* | RWNode\w* | EmptyNode\w* |
-            AnyNodeOutput\w* | NodeOutputRecord\w* | GroupShared\w*
+            AnyNodeOutput\w* | NodeOutputRecord\w* | GroupShared\w* |
+            VkBufferPointer
             $)""",
             flags=re.VERBOSE,
         )
@@ -8697,6 +8701,10 @@ def process_arg(desc, idx, done_args, intrinsic_name):
                 template_id = "-3"
                 component_id = "0"
                 type_name = "void"
+            elif type_name == "$funcT2":
+                template_id = "-4"
+                component_id = "0"
+                type_name = "void"
             elif type_name == "...":
                 assert idx != 0, "'...' can only be used in the parameter list"
                 template_id = "-2"
@@ -8825,6 +8833,8 @@ def do_object(m):
                 template_id = "INTRIN_TEMPLATE_VARARGS"
             elif template_id == "-3":
                 template_id = "INTRIN_TEMPLATE_FROM_FUNCTION"
+            elif template_id == "-4":
+                template_id = "INTRIN_TEMPLATE_FROM_FUNCTION_2"
             if component_id == "-1":
                 component_id = "INTRIN_COMPTYPE_FROM_TYPE_ELT0"
             if component_id == "-2":
diff --git a/utils/hct/hlsl_intrinsic_opcodes.json b/utils/hct/hlsl_intrinsic_opcodes.json
index 4c85069488..c4527277cd 100644
--- a/utils/hct/hlsl_intrinsic_opcodes.json
+++ b/utils/hct/hlsl_intrinsic_opcodes.json
@@ -1,6 +1,6 @@
 {
   "IntrinsicOpCodes": {
-    "Num_Intrinsics": 360,
+    "Num_Intrinsics": 363,
     "IOP_AcceptHitAndEndSearch": 0,
     "IOP_AddUint64": 1,
     "IOP_AllMemoryBarrier": 2,
@@ -360,6 +360,9 @@
     "MOP_InterlockedUMax": 356,
     "MOP_InterlockedUMin": 357,
     "MOP_DxHitObject_MakeNop": 358,
-    "IOP_DxMaybeReorderThread": 359
+    "IOP_DxMaybeReorderThread": 359,
+    "IOP_Vkreinterpret_pointer_cast": 360,
+    "IOP_Vkstatic_pointer_cast": 361,
+    "MOP_GetBufferContents": 362
   }
 }

From 2b1c2e640dae09adf1cb2dd52bc5ce860d73b02b Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Wed, 2 Apr 2025 10:09:22 -0700
Subject: [PATCH 63/88] Fix typo in exec tests comment (#7299)

Keep seeing this comment typo and wanted to rectify.
---
 .../unittests/HLSLExec/ExecutionTest.cpp      | 50 +++++++++----------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp
index 91b42f6b79..6db27d7a41 100644
--- a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp
+++ b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp
@@ -5632,7 +5632,7 @@ void ExecutionTest::RunBasicShaderModelTest(CComPtr<ID3D12Device> pDevice,
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryFPOp",
-      // this callbacked is called when the test is creating the resource to run
+      // this callback is called when the test is creating the resource to run
       // the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         UNREFERENCED_PARAMETER(Name);
@@ -6999,7 +6999,7 @@ TEST_F(ExecutionTest, UnaryFloatOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryFPOp"));
@@ -7067,7 +7067,7 @@ TEST_F(ExecutionTest, BinaryFloatOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp"));
@@ -7157,7 +7157,7 @@ TEST_F(ExecutionTest, TertiaryFloatOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp"));
@@ -7234,7 +7234,7 @@ TEST_F(ExecutionTest, UnaryHalfOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryFPOp"));
@@ -7314,7 +7314,7 @@ TEST_F(ExecutionTest, BinaryHalfOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp"));
@@ -7424,7 +7424,7 @@ TEST_F(ExecutionTest, TertiaryHalfOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp"));
@@ -7494,7 +7494,7 @@ TEST_F(ExecutionTest, UnaryIntOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryIntOp"));
@@ -7554,7 +7554,7 @@ TEST_F(ExecutionTest, UnaryUintOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryUintOp"));
@@ -7619,7 +7619,7 @@ TEST_F(ExecutionTest, BinaryIntOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryIntOp"));
@@ -7707,7 +7707,7 @@ TEST_F(ExecutionTest, TertiaryIntOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryIntOp"));
@@ -7777,7 +7777,7 @@ TEST_F(ExecutionTest, BinaryUintOpTest) {
   int numExpected = Validation_Expected2->size() == 0 ? 1 : 2;
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryUintOp"));
@@ -7869,7 +7869,7 @@ TEST_F(ExecutionTest, TertiaryUintOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryUintOp"));
@@ -7948,7 +7948,7 @@ TEST_F(ExecutionTest, UnaryInt16OpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryIntOp"));
@@ -8016,7 +8016,7 @@ TEST_F(ExecutionTest, UnaryUint16OpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryUintOp"));
@@ -8091,7 +8091,7 @@ TEST_F(ExecutionTest, BinaryInt16OpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryIntOp"));
@@ -8187,7 +8187,7 @@ TEST_F(ExecutionTest, TertiaryInt16OpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryIntOp"));
@@ -8264,7 +8264,7 @@ TEST_F(ExecutionTest, BinaryUint16OpTest) {
   int numExpected = Validation_Expected2->size() == 0 ? 1 : 2;
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryUintOp"));
@@ -8363,7 +8363,7 @@ TEST_F(ExecutionTest, TertiaryUint16OpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryUintOp"));
@@ -8948,7 +8948,7 @@ TEST_F(ExecutionTest, DotTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "DotOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SDotOp"));
@@ -9240,7 +9240,7 @@ TEST_F(ExecutionTest, Msad4Test) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "Msad4",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SMsad4"));
@@ -9342,7 +9342,7 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp"));
@@ -9455,7 +9455,7 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp"));
@@ -9883,7 +9883,7 @@ void ExecutionTest::WaveIntrinsicsActivePrefixTest(
          ++maskIndex) {
       std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(
           pDevice, m_support, "WaveIntrinsicsOp",
-          // this callbacked is called when the test
+          // this callback is called when the test
           // is creating the resource to run the test
           [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
             VERIFY_IS_TRUE(0 == _stricmp(Name, "SWaveIntrinsicsOp"));
@@ -12609,7 +12609,7 @@ TEST_F(ExecutionTest, HelperLaneTest) {
 
     std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(
         pDevice, m_support, "HelperLaneTestNoWave",
-        // this callbacked is called when the test is creating the resource to
+        // this callback is called when the test is creating the resource to
         // run the test
         [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
           VERIFY_IS_TRUE(0 == _stricmp(Name, "UAVBuffer0"));

From 3b1a29bf89520c0159669487feaaac5a98ab8ed5 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista@microsoft.com>
Date: Wed, 2 Apr 2025 16:19:43 -0700
Subject: [PATCH 64/88] [OMM] Add DXR Entry point test, non-library target
 test, conforming tests to spec. (#7281)

This PR adds 2 tests that were mentioned in the spec that haven't yet
been added.
1. A test that makes sure that restricted flags are diagnosed in DXR
entry shaders.
2. A test that makes sure that no diagnostics are emitted when a
restricted flag is used for a subobject in a non-library shadaer target.

Fixes https://github.com/microsoft/DirectXShaderCompiler/issues/7282
---
 .../SemaHLSL/rayquery-omm-DXR-entry-point.hlsl  | 17 +++++++++++++++++
 .../test/SemaHLSL/rayquery-omm-type-diag.hlsl   |  4 ++--
 .../raytracingpipelineconfig1-no-errors.hlsl    | 12 ++++++++++++
 3 files changed, 31 insertions(+), 2 deletions(-)
 create mode 100644 tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl

diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl
new file mode 100644
index 0000000000..722187cf43
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl
@@ -0,0 +1,17 @@
+// RUN: %dxc -T lib_6_3 -validator-version 1.8 -verify %s
+
+// expected-warning@+1{{potential misuse of built-in constant 'RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_3; introduced in shader model 6.9}}
+RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS };
+
+RaytracingAccelerationStructure RTAS;
+// DXR entry to test that restricted flags are diagnosed.
+[shader("raygeneration")]
+void main(void) {
+	RayDesc rayDesc;
+
+	// expected-warning@+2{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model lib_6_3; introduced in shader model 6.9}}
+	// expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_3; introduced in shader model 6.9}}
+	RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS> rayQuery;
+	// expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model lib_6_3; introduced in shader model 6.9}}
+	rayQuery.TraceRayInline(RTAS, RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc);
+}
diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl
index 981788a688..5e484d193e 100644
--- a/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl
+++ b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl
@@ -1,5 +1,5 @@
-// RUN: %dxc -T vs_6_9 -E RayQueryTests -verify %s
-// RUN: %dxc -T vs_6_5 -E RayQueryTests2 -verify %s
+// RUN: %dxc -T vs_6_9 -verify %s
+// RUN: %dxc -T vs_6_5 -verify %s
 
 // validate 2nd template argument flags
 // expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}}
diff --git a/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl
new file mode 100644
index 0000000000..272a46a87e
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl
@@ -0,0 +1,12 @@
+// RUN: %dxc -T ps_6_0 -verify %s
+
+// expected-no-diagnostics
+// No diagnostic is expected because this is a non-library target,
+// and SubObjects are ignored on non-library targets.
+
+RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS };
+
+[shader("pixel")]
+int main(int i : INDEX) : SV_Target {
+  return 1;
+}

From 65564102a78a99b191228cc88ef4ccee2f987783 Mon Sep 17 00:00:00 2001
From: Cassandra Beckley <cbeckley@google.com>
Date: Wed, 2 Apr 2025 18:38:02 -0700
Subject: [PATCH 65/88] [SPIR-V] Implement QuadAny and QuadAll (#7266)

If `"SPV_KHR_quad_control"` can be used, uses
`OpGroupNonUniformQuadAnyKHR` and `OpGroupNonUniformQuadAllKHR`. If not,
falls back to constructing the value using
`OpGroupNonUniformQuadSwap`.

Fixes #7247
---
 docs/SPIR-V.rst                               |  8 +++
 .../include/clang/SPIRV/FeatureManager.h      |  1 +
 .../clang/include/clang/SPIRV/SpirvBuilder.h  |  2 +-
 .../include/clang/SPIRV/SpirvInstruction.h    |  8 +--
 tools/clang/lib/SPIRV/CapabilityVisitor.cpp   |  3 ++
 tools/clang/lib/SPIRV/EmitVisitor.cpp         |  7 +--
 tools/clang/lib/SPIRV/FeatureManager.cpp      |  3 ++
 tools/clang/lib/SPIRV/SpirvBuilder.cpp        |  2 +-
 tools/clang/lib/SPIRV/SpirvEmitter.cpp        | 51 +++++++++++++++++++
 tools/clang/lib/SPIRV/SpirvEmitter.h          |  4 ++
 tools/clang/lib/SPIRV/SpirvInstruction.cpp    |  9 +++-
 .../test/CodeGenSPIRV/sm6.quad-any-all.hlsl   | 41 +++++++++++++++
 12 files changed, 130 insertions(+), 9 deletions(-)
 create mode 100644 tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl

diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst
index 899b587492..b5e9c05079 100644
--- a/docs/SPIR-V.rst
+++ b/docs/SPIR-V.rst
@@ -320,6 +320,7 @@ Supported extensions
 * SPV_KHR_maximal_reconvergence
 * SPV_KHR_float_controls
 * SPV_NV_shader_subgroup_partitioned
+* SPV_KHR_quad_control
 
 Vulkan specific attributes
 --------------------------
@@ -4008,6 +4009,8 @@ Quad          ``QuadReadAcrossX()``        ``OpGroupNonUniformQuadSwap``
 Quad          ``QuadReadAcrossY()``        ``OpGroupNonUniformQuadSwap``
 Quad          ``QuadReadAcrossDiagonal()`` ``OpGroupNonUniformQuadSwap``
 Quad          ``QuadReadLaneAt()``         ``OpGroupNonUniformQuadBroadcast``
+Quad          ``QuadAny()``                ``OpGroupNonUniformQuadAnyKHR``
+Quad          ``QuadAll()``                ``OpGroupNonUniformQuadAllKHR``
 N/A           ``WaveMatch()``              ``OpGroupNonUniformPartitionNV``
 Multiprefix   ``WaveMultiPrefixSum()``     ``OpGroupNonUniform*Add``           ``PartitionedExclusiveScanNV``
 Multiprefix   ``WaveMultiPrefixProduct()`` ``OpGroupNonUniform*Mul``           ``PartitionedExclusiveScanNV``
@@ -4016,6 +4019,11 @@ Multiprefix   ``WaveMultiPrefixBitOr()``   ``OpGroupNonUniformLogicalOr``      `
 Multiprefix   ``WaveMultiPrefixBitXor()``  ``OpGroupNonUniformLogicalXor``     ``PartitionedExclusiveScanNV``
 ============= ============================ =================================== ==============================
 
+``QuadAny`` and ``QuadAll`` will use the ``OpGroupNonUniformQuadAnyKHR`` and
+``OpGroupNonUniformQuadAllKHR`` instructions if the ``SPV_KHR_quad_control``
+extension is enabled. If it is not, they will fall back to constructing the
+value using multiple calls to ``OpGroupNonUniformQuadBroadcast``.
+
 The Implicit ``vk`` Namespace
 =============================
 
diff --git a/tools/clang/include/clang/SPIRV/FeatureManager.h b/tools/clang/include/clang/SPIRV/FeatureManager.h
index 8a9755ae79..3c1871df37 100644
--- a/tools/clang/include/clang/SPIRV/FeatureManager.h
+++ b/tools/clang/include/clang/SPIRV/FeatureManager.h
@@ -64,6 +64,7 @@ enum class Extension {
   KHR_maximal_reconvergence,
   KHR_float_controls,
   NV_shader_subgroup_partitioned,
+  KHR_quad_control,
   Unknown,
 };
 
diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h
index ed2cb3b6fd..5e03d1ef96 100644
--- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h
+++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h
@@ -242,7 +242,7 @@ class SpirvBuilder {
   /// \brief Creates an operation with the given OpGroupNonUniform* SPIR-V
   /// opcode.
   SpirvGroupNonUniformOp *createGroupNonUniformOp(
-      spv::Op op, QualType resultType, spv::Scope execScope,
+      spv::Op op, QualType resultType, llvm::Optional<spv::Scope> execScope,
       llvm::ArrayRef<SpirvInstruction *> operands, SourceLocation,
       llvm::Optional<spv::GroupOperation> groupOp = llvm::None);
 
diff --git a/tools/clang/include/clang/SPIRV/SpirvInstruction.h b/tools/clang/include/clang/SPIRV/SpirvInstruction.h
index 7a7ad3aa4d..f49a295610 100644
--- a/tools/clang/include/clang/SPIRV/SpirvInstruction.h
+++ b/tools/clang/include/clang/SPIRV/SpirvInstruction.h
@@ -1566,7 +1566,8 @@ class SpirvFunctionCall : public SpirvInstruction {
 /// \brief OpGroupNonUniform* instructions
 class SpirvGroupNonUniformOp : public SpirvInstruction {
 public:
-  SpirvGroupNonUniformOp(spv::Op opcode, QualType resultType, spv::Scope scope,
+  SpirvGroupNonUniformOp(spv::Op opcode, QualType resultType,
+                         llvm::Optional<spv::Scope> scope,
                          llvm::ArrayRef<SpirvInstruction *> operands,
                          SourceLocation loc,
                          llvm::Optional<spv::GroupOperation> group);
@@ -1580,7 +1581,8 @@ class SpirvGroupNonUniformOp : public SpirvInstruction {
 
   bool invokeVisitor(Visitor *v) override;
 
-  spv::Scope getExecutionScope() const { return execScope; }
+  bool hasExecutionScope() const { return execScope.hasValue(); }
+  spv::Scope getExecutionScope() const { return execScope.getValue(); }
 
   llvm::ArrayRef<SpirvInstruction *> getOperands() const { return operands; }
 
@@ -1598,7 +1600,7 @@ class SpirvGroupNonUniformOp : public SpirvInstruction {
   }
 
 private:
-  spv::Scope execScope;
+  llvm::Optional<spv::Scope> execScope;
   llvm::SmallVector<SpirvInstruction *, 4> operands;
   llvm::Optional<spv::GroupOperation> groupOp;
 };
diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
index 6fd0c6d950..24dfdc2e9a 100644
--- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
+++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
@@ -887,6 +887,9 @@ bool CapabilityVisitor::visit(SpirvModule *, Visitor::Phase phase) {
 
   addCapability(spv::Capability::InterpolationFunction);
 
+  addExtensionAndCapabilitiesIfEnabled(Extension::KHR_quad_control,
+                                       {spv::Capability::QuadControlKHR});
+
   return true;
 }
 
diff --git a/tools/clang/lib/SPIRV/EmitVisitor.cpp b/tools/clang/lib/SPIRV/EmitVisitor.cpp
index 9c0368f7a1..eb00f59632 100644
--- a/tools/clang/lib/SPIRV/EmitVisitor.cpp
+++ b/tools/clang/lib/SPIRV/EmitVisitor.cpp
@@ -1134,9 +1134,10 @@ bool EmitVisitor::visit(SpirvGroupNonUniformOp *inst) {
   initInstruction(inst);
   curInst.push_back(inst->getResultTypeId());
   curInst.push_back(getOrAssignResultId<SpirvInstruction>(inst));
-  curInst.push_back(typeHandler.getOrCreateConstantInt(
-      llvm::APInt(32, static_cast<uint32_t>(inst->getExecutionScope())),
-      context.getUIntType(32), /* isSpecConst */ false));
+  if (inst->hasExecutionScope())
+    curInst.push_back(typeHandler.getOrCreateConstantInt(
+        llvm::APInt(32, static_cast<uint32_t>(inst->getExecutionScope())),
+        context.getUIntType(32), /* isSpecConst */ false));
   if (inst->hasGroupOp())
     curInst.push_back(static_cast<uint32_t>(inst->getGroupOp()));
   for (auto *operand : inst->getOperands())
diff --git a/tools/clang/lib/SPIRV/FeatureManager.cpp b/tools/clang/lib/SPIRV/FeatureManager.cpp
index a8ee1de000..7fb449fee9 100644
--- a/tools/clang/lib/SPIRV/FeatureManager.cpp
+++ b/tools/clang/lib/SPIRV/FeatureManager.cpp
@@ -226,6 +226,7 @@ Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) {
       .Case("SPV_KHR_float_controls", Extension::KHR_float_controls)
       .Case("SPV_NV_shader_subgroup_partitioned",
             Extension::NV_shader_subgroup_partitioned)
+      .Case("SPV_KHR_quad_control", Extension::KHR_quad_control)
       .Default(Extension::Unknown);
 }
 
@@ -297,6 +298,8 @@ const char *FeatureManager::getExtensionName(Extension symbol) {
     return "SPV_KHR_float_controls";
   case Extension::NV_shader_subgroup_partitioned:
     return "SPV_NV_shader_subgroup_partitioned";
+  case Extension::KHR_quad_control:
+    return "SPV_KHR_quad_control";
   default:
     break;
   }
diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp
index 6b3f43fc77..689fc0715f 100644
--- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp
+++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp
@@ -453,7 +453,7 @@ SpirvSpecConstantBinaryOp *SpirvBuilder::createSpecConstantBinaryOp(
 }
 
 SpirvGroupNonUniformOp *SpirvBuilder::createGroupNonUniformOp(
-    spv::Op op, QualType resultType, spv::Scope execScope,
+    spv::Op op, QualType resultType, llvm::Optional<spv::Scope> execScope,
     llvm::ArrayRef<SpirvInstruction *> operands, SourceLocation loc,
     llvm::Optional<spv::GroupOperation> groupOp) {
   assert(insertPoint && "null insert point");
diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
index 7cc84fa2fc..eed4f6369f 100644
--- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@@ -9271,6 +9271,10 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
   case hlsl::IntrinsicOp::IOP_QuadReadLaneAt:
     retVal = processWaveQuadWideShuffle(callExpr, hlslOpcode);
     break;
+  case hlsl::IntrinsicOp::IOP_QuadAny:
+  case hlsl::IntrinsicOp::IOP_QuadAll:
+    retVal = processWaveQuadAnyAll(callExpr, hlslOpcode);
+    break;
   case hlsl::IntrinsicOp::IOP_abort:
   case hlsl::IntrinsicOp::IOP_GetRenderTargetSampleCount:
   case hlsl::IntrinsicOp::IOP_GetRenderTargetSamplePosition: {
@@ -10233,6 +10237,53 @@ SpirvEmitter::processWaveQuadWideShuffle(const CallExpr *callExpr,
       opcode, retType, spv::Scope::Subgroup, {value, target}, srcLoc);
 }
 
+SpirvInstruction *SpirvEmitter::processWaveQuadAnyAll(const CallExpr *callExpr,
+                                                      hlsl::IntrinsicOp op) {
+  // Signatures:
+  // bool QuadAny(bool localValue)
+  // bool QuadAll(bool localValue)
+  assert(callExpr->getNumArgs() == 1);
+  assert(op == hlsl::IntrinsicOp::IOP_QuadAny ||
+         op == hlsl::IntrinsicOp::IOP_QuadAll);
+  featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_1, "Wave Operation",
+                                  callExpr->getExprLoc());
+
+  auto *predicate = doExpr(callExpr->getArg(0));
+  const auto srcLoc = callExpr->getExprLoc();
+
+  if (!featureManager.isExtensionEnabled(Extension::KHR_quad_control)) {
+    // We can't use QuadAny/QuadAll, so implement them using QuadSwap. We
+    // will read the value at each quad invocation, then combine them.
+
+    spv::Op reducer = op == hlsl::IntrinsicOp::IOP_QuadAny
+                          ? spv::Op::OpLogicalOr
+                          : spv::Op::OpLogicalAnd;
+
+    SpirvInstruction *result = predicate;
+
+    for (size_t i = 0; i < 3; i++) {
+      SpirvInstruction *invocationValue = spvBuilder.createGroupNonUniformOp(
+          spv::Op::OpGroupNonUniformQuadSwap, astContext.BoolTy,
+          spv::Scope::Subgroup,
+          {predicate, spvBuilder.getConstantInt(astContext.UnsignedIntTy,
+                                                llvm::APInt(32, i))},
+          srcLoc);
+      result = spvBuilder.createBinaryOp(reducer, astContext.BoolTy, result,
+                                         invocationValue, srcLoc);
+    }
+
+    return result;
+  }
+
+  spv::Op opcode = op == hlsl::IntrinsicOp::IOP_QuadAny
+                       ? spv::Op::OpGroupNonUniformQuadAnyKHR
+                       : spv::Op::OpGroupNonUniformQuadAllKHR;
+
+  return spvBuilder.createGroupNonUniformOp(opcode, astContext.BoolTy,
+                                            llvm::Optional<spv::Scope>(),
+                                            {predicate}, srcLoc);
+}
+
 SpirvInstruction *
 SpirvEmitter::processWaveActiveAllEqual(const CallExpr *callExpr) {
   assert(callExpr->getNumArgs() == 1);
diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h
index 0a5ff308c2..79d2c43c35 100644
--- a/tools/clang/lib/SPIRV/SpirvEmitter.h
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.h
@@ -670,6 +670,10 @@ class SpirvEmitter : public ASTConsumer {
   SpirvInstruction *processWaveQuadWideShuffle(const CallExpr *,
                                                hlsl::IntrinsicOp op);
 
+  /// Processes SM6.7 quad any/all.
+  SpirvInstruction *processWaveQuadAnyAll(const CallExpr *,
+                                          hlsl::IntrinsicOp op);
+
   /// Generates the Spir-V instructions needed to implement the given call to
   /// WaveActiveAllEqual. Returns a pointer to the instruction that produces the
   /// final result.
diff --git a/tools/clang/lib/SPIRV/SpirvInstruction.cpp b/tools/clang/lib/SPIRV/SpirvInstruction.cpp
index 6deb11d946..f41de03adc 100644
--- a/tools/clang/lib/SPIRV/SpirvInstruction.cpp
+++ b/tools/clang/lib/SPIRV/SpirvInstruction.cpp
@@ -705,7 +705,7 @@ SpirvFunctionCall::SpirvFunctionCall(QualType resultType, SourceLocation loc,
       function(fn), args(argsVec.begin(), argsVec.end()) {}
 
 SpirvGroupNonUniformOp::SpirvGroupNonUniformOp(
-    spv::Op op, QualType resultType, spv::Scope scope,
+    spv::Op op, QualType resultType, llvm::Optional<spv::Scope> scope,
     llvm::ArrayRef<SpirvInstruction *> operandsVec, SourceLocation loc,
     llvm::Optional<spv::GroupOperation> group)
     : SpirvInstruction(IK_GroupNonUniformOp, op, resultType, loc),
@@ -737,6 +737,8 @@ SpirvGroupNonUniformOp::SpirvGroupNonUniformOp(
   case spv::Op::OpGroupNonUniformLogicalAnd:
   case spv::Op::OpGroupNonUniformLogicalOr:
   case spv::Op::OpGroupNonUniformLogicalXor:
+  case spv::Op::OpGroupNonUniformQuadAnyKHR:
+  case spv::Op::OpGroupNonUniformQuadAllKHR:
     assert(operandsVec.size() == 1);
     break;
 
@@ -768,6 +770,11 @@ SpirvGroupNonUniformOp::SpirvGroupNonUniformOp(
     assert(false && "Unexpected Group non-uniform opcode");
     break;
   }
+
+  if (op != spv::Op::OpGroupNonUniformQuadAnyKHR &&
+      op != spv::Op::OpGroupNonUniformQuadAllKHR) {
+    assert(scope.hasValue());
+  }
 }
 
 SpirvImageOp::SpirvImageOp(
diff --git a/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl b/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl
new file mode 100644
index 0000000000..fb9f6e0d76
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl
@@ -0,0 +1,41 @@
+// RUN: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.1 -fcgl  %s -spirv | FileCheck %s --check-prefixes=CHECK,QUAD
+// RUN: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.1 -fspv-extension=SPV_KHR_16bit_storage -fcgl  %s -spirv | FileCheck %s --check-prefixes=CHECK,NOQUAD
+// RUN: not %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.0 -fcgl  %s -spirv 2>&1 | FileCheck %s --check-prefixes=ERROR
+
+// CHECK: ; Version: 1.3
+
+// QUAD: OpCapability QuadControlKHR
+// QUAD: OpExtension "SPV_KHR_quad_control"
+
+RWStructuredBuffer<float3> values;
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+  uint outIdx = (id.y * 8) + id.x;
+
+// CHECK:        [[val1:%[0-9]+]] = OpIEqual %bool {{%[0-9]+}}
+// QUAD-NEXT:         {{%[0-9]+}} = OpGroupNonUniformQuadAnyKHR %bool [[val1]]
+
+// NOQUAD-NEXT: [[inv0:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_0
+// NOQUAD-NEXT:  [[or0:%[0-9]+]] = OpLogicalOr %bool [[val1]] [[inv0]]
+// NOQUAD-NEXT: [[inv1:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_1
+// NOQUAD-NEXT:  [[or1:%[0-9]+]] = OpLogicalOr %bool [[or0]] [[inv1]]
+// NOQUAD-NEXT: [[inv2:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_2
+// NOQUAD-NEXT:  [[or2:%[0-9]+]] = OpLogicalOr %bool [[or1]] [[inv2]]
+
+// ERROR: 27:24: error: Vulkan 1.1 is required for Wave Operation but not permitted to use
+    values[outIdx].x = QuadAny(outIdx % 4 == 0) ? 1.0 : 2.0;
+
+// CHECK:        [[val2:%[0-9]+]] = OpIEqual %bool {{%[0-9]+}}
+// QUAD-NEXT:         {{%[0-9]+}} = OpGroupNonUniformQuadAllKHR %bool [[val2]]
+
+// NOQUAD-NEXT: [[inv0:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_0
+// NOQUAD-NEXT:  [[or0:%[0-9]+]] = OpLogicalAnd %bool [[val2]] [[inv0]]
+// NOQUAD-NEXT: [[inv1:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_1
+// NOQUAD-NEXT:  [[or1:%[0-9]+]] = OpLogicalAnd %bool [[or0]] [[inv1]]
+// NOQUAD-NEXT: [[inv2:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_2
+// NOQUAD-NEXT:  [[or2:%[0-9]+]] = OpLogicalAnd %bool [[or1]] [[inv2]]
+
+// ERROR: 40:24: error: Vulkan 1.1 is required for Wave Operation but not permitted to use
+    values[outIdx].y = QuadAll(outIdx % 2 == 0) ? 3.0 : 4.0;
+}

From 90102440f822dde23d1ee1e6b2970db2aaf1f849 Mon Sep 17 00:00:00 2001
From: Urs Hanselmann <6864721+urshanselmann@users.noreply.github.com>
Date: Thu, 3 Apr 2025 15:55:41 +0200
Subject: [PATCH 66/88] Add UUID compiler extension check on Clang (#7286)

Fixes #7248

Fix Clang Compilation on Linux without Microsoft extensions enabled.

## Rationale
Clang support depends on the `-fms-extensions` compiler flag.
[[1]](https://clang.llvm.org/docs/UsersManual.html#microsoft-extensions)
If enabled, the `_MSC_EXTENSIONS` macro is defined.
[[2]](https://github.com/llvm/llvm-project/blob/19a319667b567a26a20f9829a0ae7e6a5c259cba/clang/lib/Basic/Targets/OSTargets.cpp#L248)
---
 include/dxc/WinAdapter.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/dxc/WinAdapter.h b/include/dxc/WinAdapter.h
index b8c6646871..d02ad1ac38 100644
--- a/include/dxc/WinAdapter.h
+++ b/include/dxc/WinAdapter.h
@@ -51,7 +51,8 @@
 #define _countof(a) (sizeof(a) / sizeof(*(a)))
 
 // If it is GCC, there is no UUID support and we must emulate it.
-#ifndef __clang__
+// Clang support depends on the -fms-extensions compiler flag.
+#if !defined(__clang__) || !defined(_MSC_EXTENSIONS)
 #define __EMULATE_UUID 1
 #endif // __clang__
 

From 6a73640b91f823c4b9d9cc2c89eb2d3d93b0377f Mon Sep 17 00:00:00 2001
From: Chris B <cbieneman@microsoft.com>
Date: Thu, 3 Apr 2025 08:56:07 -0500
Subject: [PATCH 67/88] Update DXC's CONTRIBUTING file (#7265)

This change seeks to address some recent questions about how the LLLVM
Coding Standards are applied in DXC.

---------

Co-authored-by: Ashley Coleman <ascoleman@microsoft.com>
---
 CONTRIBUTING.md | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 233211f150..840b4f0f17 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -40,10 +40,32 @@ Before submitting a feature or substantial code contribution please discuss it w
 
 ### Coding guidelines
 
-The coding, style, and general engineering guidelines follow those described in the docs/CodingStandards.rst. For additional guidelines in code specific to HLSL, see the docs/HLSLChanges.rst file.
+The coding, style, and general engineering guidelines follow those described in the [LLVM Coding Standards](docs/CodingStandards.rst). For additional guidelines in code specific to HLSL, see the [HLSL Changes](docs/HLSLChanges.rst) docs.
 
 DXC has adopted a clang-format requirement for all incoming changes to C and C++ files. PRs to DXC should have the *changed code* clang formatted to the LLVM style, and leave the remaining portions of the file unchanged. This can be done using the `git-clang-format` tool or IDE driven workflows. A GitHub action will run on all PRs to validate that the change is properly formatted.
 
+#### Applying LLVM Standards
+
+All new code contributed to DXC should follow the LLVM coding standards.
+
+Note that the LLVM Coding Standards have a golden rule:
+
+> **If you are extending, enhancing, or bug fixing already implemented code, use the style that is already being used so that the source is uniform and easy to follow.**
+
+The golden rule should continue to be applied to places where DXC is self-consistent. A good example is DXC's common use of `PascalCase` instead of `camelCase` for APIs in some parts of the HLSL implementation. In any place where DXC is not self-consistent new code should follow the LLVM Coding Standard.
+
+A good secondary rule to follow is:
+
+> **When in doubt, follow LLVM.**
+
+Adopting LLVM's coding standards provides a consistent set of rules and guidelines to hold all contributions to. This allows patch authors to clearly understand the expectations placed on contributions, and allows reviewers to have a bar to measure contributions against. Aligning with LLVM by default ensures the path of least resistance for everyone.
+
+Since many of the LLVM Coding Standards are not enforced automatically we rely on code reviews to provide feedback and ensure contributions align with the expected coding standards. Since we rely on reviewers for enforcement and humans make mistakes, please keep in mind:
+
+> **Code review is a conversation.**
+
+It is completely reasonable for a patch author to question feedback and provide additional context about why something was done the way it was. Reviewers often see narrow slices in diffs rather than the full context of a file or part of the compiler, so they may not always provide perfect feedback. This is especially true with the application of the "golden rule" since it depends on understanding a wider context.
+
 ### Documenting Pull Requests
 
 Pull request descriptions should have the following format:

From c9170e5fc5d39d472af1d5e5c2cf368a4501bc1a Mon Sep 17 00:00:00 2001
From: Steven Perron <stevenperron@google.com>
Date: Thu, 3 Apr 2025 12:42:15 -0400
Subject: [PATCH 68/88] Update SPIRV-Tools (#7303)

Fixes #7181
---
 external/SPIRV-Tools | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools
index 393d5c7df1..4bd1536ed7 160000
--- a/external/SPIRV-Tools
+++ b/external/SPIRV-Tools
@@ -1 +1 @@
-Subproject commit 393d5c7df150532045c50affffea2df22e8231b0
+Subproject commit 4bd1536ed79003a5194a4bd8c9aa2fa17a84c15b

From 85f34327588ded72e949ed438d85653576f144e4 Mon Sep 17 00:00:00 2001
From: Dan Brown <61992655+danbrown-amd@users.noreply.github.com>
Date: Thu, 3 Apr 2025 14:44:09 -0600
Subject: [PATCH 69/88] Fixes non-SPIR-V build, broken by PR #7163 ([SPIRV]
 Implements vk::BufferPointer proposal) (#7306)

#ifdef ENABLE_SPIRV_CODEGEN was omitted in several places.
---
 include/dxc/dxcapi.internal.h     |  5 ++++-
 tools/clang/lib/Sema/SemaHLSL.cpp | 21 +++++++++++++++++++--
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h
index f183bb6cf0..d37054194b 100644
--- a/include/dxc/dxcapi.internal.h
+++ b/include/dxc/dxcapi.internal.h
@@ -132,9 +132,12 @@ enum LEGAL_INTRINSIC_COMPTYPES {
 
   LICOMPTYPE_HIT_OBJECT = 51,
 
+#ifdef ENABLE_SPIRV_CODEGEN
   LICOMPTYPE_VK_BUFFER_POINTER = 52,
-
   LICOMPTYPE_COUNT = 53
+#else
+  LICOMPTYPE_COUNT = 52
+#endif
 };
 
 static const BYTE IA_SPECIAL_BASE = 0xf0;
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index f001cb70d9..f9e011f8d4 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -1237,8 +1237,10 @@ static const ArBasicKind g_AnyOutputRecordCT[] = {
 static const ArBasicKind g_DxHitObjectCT[] = {AR_OBJECT_HIT_OBJECT,
                                               AR_BASIC_UNKNOWN};
 
+#ifdef ENABLE_SPIRV_CODEGEN
 static const ArBasicKind g_VKBufferPointerCT[] = {AR_OBJECT_VK_BUFFER_POINTER,
                                                   AR_BASIC_UNKNOWN};
+#endif
 
 // Basic kinds, indexed by a LEGAL_INTRINSIC_COMPTYPES value.
 const ArBasicKind *g_LegalIntrinsicCompTypes[] = {
@@ -1295,7 +1297,9 @@ const ArBasicKind *g_LegalIntrinsicCompTypes[] = {
     g_GroupNodeOutputRecordsCT,  // LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS
     g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS
     g_DxHitObjectCT,             // LICOMPTYPE_HIT_OBJECT
-    g_VKBufferPointerCT,         // LICOMPTYPE_VK_BUFFER_POINTER
+#ifdef ENABLE_SPIRV_CODEGEN
+    g_VKBufferPointerCT, // LICOMPTYPE_VK_BUFFER_POINTER
+#endif
 };
 static_assert(
     ARRAYSIZE(g_LegalIntrinsicCompTypes) == LICOMPTYPE_COUNT,
@@ -3587,6 +3591,7 @@ class HLSLExternalSource : public ExternalSemaSource {
       case LICOMPTYPE_HIT_OBJECT:
         paramTypes.push_back(GetBasicKindType(AR_OBJECT_HIT_OBJECT));
         break;
+#ifdef ENABLE_SPIRV_CODEGEN
       case LICOMPTYPE_VK_BUFFER_POINTER: {
         const ArBasicKind *match =
             std::find(g_ArBasicKindsAsTypes,
@@ -3600,6 +3605,7 @@ class HLSLExternalSource : public ExternalSemaSource {
             m_sema->getASTContext().getTypeDeclType(m_objectTypeDecls[index]));
         break;
       }
+#endif
       default:
         DXASSERT(false, "Argument type of intrinsic function is not "
                         "supported");
@@ -4856,7 +4862,10 @@ class HLSLExternalSource : public ExternalSemaSource {
     case AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY:
     case AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS:
     case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS:
-    case AR_OBJECT_VK_BUFFER_POINTER: {
+#ifdef ENABLE_SPIRV_CODEGEN
+    case AR_OBJECT_VK_BUFFER_POINTER:
+#endif
+    {
       const ArBasicKind *match = std::find(
           g_ArBasicKindsAsTypes,
           &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], kind);
@@ -5372,8 +5381,10 @@ class HLSLExternalSource : public ExternalSemaSource {
               << type << GetMatrixOrVectorElementType(type);
         }
         return valid;
+#ifdef ENABLE_SPIRV_CODEGEN
       } else if (hlsl::IsVKBufferPointerType(qt)) {
         return true;
+#endif
       } else if (qt->isStructureOrClassType()) {
         const RecordType *recordType = qt->getAs<RecordType>();
         objectKind = ClassifyRecordType(recordType);
@@ -9751,10 +9762,12 @@ bool HLSLExternalSource::CanConvert(SourceLocation loc, Expr *sourceExpr,
     return false;
   }
 
+#ifdef ENABLE_SPIRV_CODEGEN
   // Cast vk::BufferPointer to pointer address.
   if (SourceInfo.EltKind == AR_OBJECT_VK_BUFFER_POINTER) {
     return TargetInfo.EltKind == AR_BASIC_UINT64;
   }
+#endif
 
   // Cast cbuffer to its result value.
   if ((SourceInfo.EltKind == AR_OBJECT_CONSTANT_BUFFER ||
@@ -11604,6 +11617,7 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) {
   return false;
 }
 
+#ifdef ENABLE_SPIRV_CODEGEN
 static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE,
                                      bool isStatic) {
   const Expr *argExpr = CE->getArg(0);
@@ -11627,6 +11641,7 @@ static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE,
 
   return false;
 }
+#endif
 
 // Check HLSL call constraints, not fatal to creating the AST.
 void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
@@ -11646,12 +11661,14 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
   case hlsl::IntrinsicOp::IOP_Barrier:
     CheckBarrierCall(*this, FDecl, TheCall);
     break;
+#ifdef ENABLE_SPIRV_CODEGEN
   case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast:
     CheckVKBufferPointerCast(*this, FDecl, TheCall, false);
     break;
   case hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast:
     CheckVKBufferPointerCast(*this, FDecl, TheCall, true);
     break;
+#endif
   default:
     break;
   }

From e50f599ff302a0ecf08146f6986c738dc4149abb Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Fri, 4 Apr 2025 09:44:57 -0700
Subject: [PATCH 70/88] [NFC] Standardize DxilValidation variable
 capitalization (#7307)

Capitalize all the variables and rename a few in DxilValidation.cpp in
keeping with
https://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly

As this file was easily mistaken for applying to the golden rule:
https://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly
it is at serious risk of receiving changes that will get hung up by
requirements to follow the LLVM coding guidelines. This brings the cases
where variable capitalization is not in line with the coding standards
to avoid such pitfalls in the future.
---
 lib/DxilValidation/DxilValidation.cpp | 3288 ++++++++++++-------------
 1 file changed, 1644 insertions(+), 1644 deletions(-)

diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
index cac074adc3..97bde6ca24 100644
--- a/lib/DxilValidation/DxilValidation.cpp
+++ b/lib/DxilValidation/DxilValidation.cpp
@@ -65,8 +65,8 @@ using std::vector;
 namespace hlsl {
 
 // PrintDiagnosticContext methods.
-PrintDiagnosticContext::PrintDiagnosticContext(DiagnosticPrinter &printer)
-    : m_Printer(printer), m_errorsFound(false), m_warningsFound(false) {}
+PrintDiagnosticContext::PrintDiagnosticContext(DiagnosticPrinter &Printer)
+    : m_Printer(Printer), m_errorsFound(false), m_warningsFound(false) {}
 
 bool PrintDiagnosticContext::HasErrors() const { return m_errorsFound; }
 bool PrintDiagnosticContext::HasWarnings() const { return m_warningsFound; }
@@ -97,68 +97,68 @@ struct PSExecutionInfo {
 };
 
 static unsigned ValidateSignatureRowCol(Instruction *I,
-                                        DxilSignatureElement &SE, Value *rowVal,
-                                        Value *colVal, EntryStatus &Status,
+                                        DxilSignatureElement &SE, Value *RowVal,
+                                        Value *ColVal, EntryStatus &Status,
                                         ValidationContext &ValCtx) {
-  if (ConstantInt *constRow = dyn_cast<ConstantInt>(rowVal)) {
-    unsigned row = constRow->getLimitedValue();
-    if (row >= SE.GetRows()) {
-      std::string range = std::string("0~") + std::to_string(SE.GetRows());
+  if (ConstantInt *ConstRow = dyn_cast<ConstantInt>(RowVal)) {
+    unsigned Row = ConstRow->getLimitedValue();
+    if (Row >= SE.GetRows()) {
+      std::string Range = std::string("0~") + std::to_string(SE.GetRows());
       ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOperandRange,
-                                  {"Row", range, std::to_string(row)});
+                                  {"Row", Range, std::to_string(Row)});
     }
   }
 
-  if (!isa<ConstantInt>(colVal)) {
-    // col must be const
+  if (!isa<ConstantInt>(ColVal)) {
+    // Col must be const
     ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst,
                                 {"Col", "LoadInput/StoreOutput"});
     return 0;
   }
 
-  unsigned col = cast<ConstantInt>(colVal)->getLimitedValue();
+  unsigned Col = cast<ConstantInt>(ColVal)->getLimitedValue();
 
-  if (col > SE.GetCols()) {
-    std::string range = std::string("0~") + std::to_string(SE.GetCols());
+  if (Col > SE.GetCols()) {
+    std::string Range = std::string("0~") + std::to_string(SE.GetCols());
     ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOperandRange,
-                                {"Col", range, std::to_string(col)});
+                                {"Col", Range, std::to_string(Col)});
   } else {
     if (SE.IsOutput())
-      Status.outputCols[SE.GetID()] |= 1 << col;
+      Status.outputCols[SE.GetID()] |= 1 << Col;
     if (SE.IsPatchConstOrPrim())
-      Status.patchConstOrPrimCols[SE.GetID()] |= 1 << col;
+      Status.patchConstOrPrimCols[SE.GetID()] |= 1 << Col;
   }
 
-  return col;
+  return Col;
 }
 
 static DxilSignatureElement *
-ValidateSignatureAccess(Instruction *I, DxilSignature &sig, Value *sigID,
-                        Value *rowVal, Value *colVal, EntryStatus &Status,
+ValidateSignatureAccess(Instruction *I, DxilSignature &Sig, Value *SigId,
+                        Value *RowVal, Value *ColVal, EntryStatus &Status,
                         ValidationContext &ValCtx) {
-  if (!isa<ConstantInt>(sigID)) {
+  if (!isa<ConstantInt>(SigId)) {
     // inputID must be const
     ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst,
                                 {"SignatureID", "LoadInput/StoreOutput"});
     return nullptr;
   }
 
-  unsigned SEIdx = cast<ConstantInt>(sigID)->getLimitedValue();
-  if (sig.GetElements().size() <= SEIdx) {
+  unsigned SEIdx = cast<ConstantInt>(SigId)->getLimitedValue();
+  if (Sig.GetElements().size() <= SEIdx) {
     ValCtx.EmitInstrError(I, ValidationRule::InstrOpConstRange);
     return nullptr;
   }
 
-  DxilSignatureElement &SE = sig.GetElement(SEIdx);
-  bool isOutput = sig.IsOutput();
+  DxilSignatureElement &SE = Sig.GetElement(SEIdx);
+  bool IsOutput = Sig.IsOutput();
 
-  unsigned col = ValidateSignatureRowCol(I, SE, rowVal, colVal, Status, ValCtx);
+  unsigned Col = ValidateSignatureRowCol(I, SE, RowVal, ColVal, Status, ValCtx);
 
-  if (isOutput && SE.GetSemantic()->GetKind() == DXIL::SemanticKind::Position) {
-    unsigned mask = Status.OutputPositionMask[SE.GetOutputStream()];
-    mask |= 1 << col;
+  if (IsOutput && SE.GetSemantic()->GetKind() == DXIL::SemanticKind::Position) {
+    unsigned Mask = Status.OutputPositionMask[SE.GetOutputStream()];
+    Mask |= 1 << Col;
     if (SE.GetOutputStream() < DXIL::kNumOutputStreams)
-      Status.OutputPositionMask[SE.GetOutputStream()] = mask;
+      Status.OutputPositionMask[SE.GetOutputStream()] = Mask;
   }
   return &SE;
 }
@@ -183,9 +183,9 @@ static DxilResourceProperties GetResourceFromHandle(Value *Handle,
   return RP;
 }
 
-static DXIL::SamplerKind GetSamplerKind(Value *samplerHandle,
+static DXIL::SamplerKind GetSamplerKind(Value *SamplerHandle,
                                         ValidationContext &ValCtx) {
-  DxilResourceProperties RP = GetResourceFromHandle(samplerHandle, ValCtx);
+  DxilResourceProperties RP = GetResourceFromHandle(SamplerHandle, ValCtx);
 
   if (RP.getResourceClass() != DXIL::ResourceClass::Sampler) {
     // must be sampler.
@@ -200,14 +200,14 @@ static DXIL::SamplerKind GetSamplerKind(Value *samplerHandle,
 }
 
 static DXIL::ResourceKind
-GetResourceKindAndCompTy(Value *handle, DXIL::ComponentType &CompTy,
+GetResourceKindAndCompTy(Value *Handle, DXIL::ComponentType &CompTy,
                          DXIL::ResourceClass &ResClass,
                          ValidationContext &ValCtx) {
   CompTy = DXIL::ComponentType::Invalid;
   ResClass = DXIL::ResourceClass::Invalid;
   // TODO: validate ROV is used only in PS.
 
-  DxilResourceProperties RP = GetResourceFromHandle(handle, ValCtx);
+  DxilResourceProperties RP = GetResourceFromHandle(Handle, ValCtx);
   ResClass = RP.getResourceClass();
 
   switch (ResClass) {
@@ -230,19 +230,19 @@ GetResourceKindAndCompTy(Value *handle, DXIL::ComponentType &CompTy,
   return RP.getResourceKind();
 }
 
-DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &typeSys,
-                                        std::deque<unsigned> &offsets) {
+DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &TypeSys,
+                                        std::deque<unsigned> &Offsets) {
   unsigned CurIdx = 1;
-  unsigned LastIdx = offsets.size() - 1;
+  unsigned LastIdx = Offsets.size() - 1;
   DxilStructAnnotation *StructAnnot = nullptr;
 
-  for (; CurIdx < offsets.size(); ++CurIdx) {
+  for (; CurIdx < Offsets.size(); ++CurIdx) {
     if (const StructType *EltST = dyn_cast<StructType>(Ty)) {
-      if (DxilStructAnnotation *EltAnnot = typeSys.GetStructAnnotation(EltST)) {
+      if (DxilStructAnnotation *EltAnnot = TypeSys.GetStructAnnotation(EltST)) {
         StructAnnot = EltAnnot;
-        Ty = EltST->getElementType(offsets[CurIdx]);
+        Ty = EltST->getElementType(Offsets[CurIdx]);
         if (CurIdx == LastIdx) {
-          return &StructAnnot->GetFieldAnnotation(offsets[CurIdx]);
+          return &StructAnnot->GetFieldAnnotation(Offsets[CurIdx]);
         }
       } else {
         return nullptr;
@@ -252,16 +252,16 @@ DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &typeSys,
       StructAnnot = nullptr;
     } else {
       if (StructAnnot)
-        return &StructAnnot->GetFieldAnnotation(offsets[CurIdx]);
+        return &StructAnnot->GetFieldAnnotation(Offsets[CurIdx]);
     }
   }
   return nullptr;
 }
 
-DxilResourceProperties ValidationContext::GetResourceFromVal(Value *resVal) {
-  auto it = ResPropMap.find(resVal);
-  if (it != ResPropMap.end()) {
-    return it->second;
+DxilResourceProperties ValidationContext::GetResourceFromVal(Value *ResVal) {
+  auto It = ResPropMap.find(ResVal);
+  if (It != ResPropMap.end()) {
+    return It->second;
   } else {
     DxilResourceProperties RP;
     return RP;
@@ -269,34 +269,34 @@ DxilResourceProperties ValidationContext::GetResourceFromVal(Value *resVal) {
 }
 
 struct ResRetUsage {
-  bool x;
-  bool y;
-  bool z;
-  bool w;
-  bool status;
-  ResRetUsage() : x(false), y(false), z(false), w(false), status(false) {}
+  bool X;
+  bool Y;
+  bool Z;
+  bool W;
+  bool Status;
+  ResRetUsage() : X(false), Y(false), Z(false), W(false), Status(false) {}
 };
 
-static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet,
+static void CollectGetDimResRetUsage(ResRetUsage &Usage, Instruction *ResRet,
                                      ValidationContext &ValCtx) {
   for (User *U : ResRet->users()) {
     if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U)) {
-      for (unsigned idx : EVI->getIndices()) {
-        switch (idx) {
+      for (unsigned Idx : EVI->getIndices()) {
+        switch (Idx) {
         case 0:
-          usage.x = true;
+          Usage.X = true;
           break;
         case 1:
-          usage.y = true;
+          Usage.Y = true;
           break;
         case 2:
-          usage.z = true;
+          Usage.Z = true;
           break;
         case 3:
-          usage.w = true;
+          Usage.W = true;
           break;
         case DXIL::kResRetStatusIndex:
-          usage.status = true;
+          Usage.Status = true;
           break;
         default:
           // Emit index out of bound.
@@ -306,7 +306,7 @@ static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet,
         }
       }
     } else if (PHINode *PHI = dyn_cast<PHINode>(U)) {
-      CollectGetDimResRetUsage(usage, PHI, ValCtx);
+      CollectGetDimResRetUsage(Usage, PHI, ValCtx);
     } else {
       Instruction *User = cast<Instruction>(U);
       ValCtx.EmitInstrError(User, ValidationRule::InstrDxilStructUser);
@@ -314,18 +314,18 @@ static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet,
   }
 }
 
-static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind resKind,
-                                  ArrayRef<Value *> coords,
+static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind ResKind,
+                                  ArrayRef<Value *> Coords,
                                   ValidationContext &ValCtx) {
-  const unsigned kMaxNumCoords = 4;
-  unsigned numCoords = DxilResource::GetNumCoords(resKind);
-  for (unsigned i = 0; i < kMaxNumCoords; i++) {
-    if (i < numCoords) {
-      if (isa<UndefValue>(coords[i])) {
+  const unsigned KMaxNumCoords = 4;
+  unsigned NumCoords = DxilResource::GetNumCoords(ResKind);
+  for (unsigned I = 0; I < KMaxNumCoords; I++) {
+    if (I < NumCoords) {
+      if (isa<UndefValue>(Coords[I])) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateMiss);
       }
     } else {
-      if (!isa<UndefValue>(coords[i])) {
+      if (!isa<UndefValue>(Coords[I])) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrResourceCoordinateTooMany);
       }
@@ -334,18 +334,18 @@ static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind resKind,
 }
 
 static void ValidateCalcLODResourceDimensionCoord(CallInst *CI,
-                                                  DXIL::ResourceKind resKind,
-                                                  ArrayRef<Value *> coords,
+                                                  DXIL::ResourceKind ResKind,
+                                                  ArrayRef<Value *> Coords,
                                                   ValidationContext &ValCtx) {
   const unsigned kMaxNumDimCoords = 3;
-  unsigned numCoords = DxilResource::GetNumDimensionsForCalcLOD(resKind);
-  for (unsigned i = 0; i < kMaxNumDimCoords; i++) {
-    if (i < numCoords) {
-      if (isa<UndefValue>(coords[i])) {
+  unsigned NumCoords = DxilResource::GetNumDimensionsForCalcLOD(ResKind);
+  for (unsigned I = 0; I < kMaxNumDimCoords; I++) {
+    if (I < NumCoords) {
+      if (isa<UndefValue>(Coords[I])) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateMiss);
       }
     } else {
-      if (!isa<UndefValue>(coords[i])) {
+      if (!isa<UndefValue>(Coords[I])) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrResourceCoordinateTooMany);
       }
@@ -353,21 +353,21 @@ static void ValidateCalcLODResourceDimensionCoord(CallInst *CI,
   }
 }
 
-static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind,
-                                   ArrayRef<Value *> offsets,
+static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind ResKind,
+                                   ArrayRef<Value *> Offsets,
                                    ValidationContext &ValCtx) {
   const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
 
-  unsigned numOffsets = DxilResource::GetNumOffsets(resKind);
-  bool hasOffset = !isa<UndefValue>(offsets[0]);
+  unsigned NumOffsets = DxilResource::GetNumOffsets(ResKind);
+  bool HasOffset = !isa<UndefValue>(Offsets[0]);
 
-  auto validateOffset = [&](Value *offset) {
+  auto ValidateOffset = [&](Value *Offset) {
     // 6.7 Advanced Textures allow programmable offsets
     if (pSM->IsSM67Plus())
       return;
-    if (ConstantInt *cOffset = dyn_cast<ConstantInt>(offset)) {
-      int offset = cOffset->getValue().getSExtValue();
-      if (offset > 7 || offset < -8) {
+    if (ConstantInt *cOffset = dyn_cast<ConstantInt>(Offset)) {
+      int Offset = cOffset->getValue().getSExtValue();
+      if (Offset > 7 || Offset < -8) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrTextureOffset);
       }
     } else {
@@ -375,20 +375,20 @@ static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind,
     }
   };
 
-  if (hasOffset) {
-    validateOffset(offsets[0]);
+  if (HasOffset) {
+    ValidateOffset(Offsets[0]);
   }
 
-  for (unsigned i = 1; i < offsets.size(); i++) {
-    if (i < numOffsets) {
-      if (hasOffset) {
-        if (isa<UndefValue>(offsets[i]))
+  for (unsigned I = 1; I < Offsets.size(); I++) {
+    if (I < NumOffsets) {
+      if (HasOffset) {
+        if (isa<UndefValue>(Offsets[I]))
           ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetMiss);
         else
-          validateOffset(offsets[i]);
+          ValidateOffset(Offsets[I]);
       }
     } else {
-      if (!isa<UndefValue>(offsets[i])) {
+      if (!isa<UndefValue>(Offsets[I])) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany);
       }
     }
@@ -405,53 +405,53 @@ static void ValidateDerivativeOp(CallInst *CI, ValidationContext &ValCtx) {
         {"Derivatives in CS/MS/AS", "Shader Model 6.6+"});
 }
 
-static void ValidateSampleInst(CallInst *CI, Value *srvHandle,
-                               Value *samplerHandle, ArrayRef<Value *> coords,
-                               ArrayRef<Value *> offsets, bool IsSampleC,
+static void ValidateSampleInst(CallInst *CI, Value *SrvHandle,
+                               Value *SamplerHandle, ArrayRef<Value *> Coords,
+                               ArrayRef<Value *> Offsets, bool IsSampleC,
                                ValidationContext &ValCtx) {
   if (!IsSampleC) {
-    if (GetSamplerKind(samplerHandle, ValCtx) != DXIL::SamplerKind::Default) {
+    if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Default) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSample);
     }
   } else {
-    if (GetSamplerKind(samplerHandle, ValCtx) !=
+    if (GetSamplerKind(SamplerHandle, ValCtx) !=
         DXIL::SamplerKind::Comparison) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSampleC);
     }
   }
 
-  DXIL::ComponentType compTy;
-  DXIL::ResourceClass resClass;
-  DXIL::ResourceKind resKind =
-      GetResourceKindAndCompTy(srvHandle, compTy, resClass, ValCtx);
-  bool isSampleCompTy = compTy == DXIL::ComponentType::F32;
-  isSampleCompTy |= compTy == DXIL::ComponentType::SNormF32;
-  isSampleCompTy |= compTy == DXIL::ComponentType::UNormF32;
-  isSampleCompTy |= compTy == DXIL::ComponentType::F16;
-  isSampleCompTy |= compTy == DXIL::ComponentType::SNormF16;
-  isSampleCompTy |= compTy == DXIL::ComponentType::UNormF16;
+  DXIL::ComponentType CompTy;
+  DXIL::ResourceClass ResClass;
+  DXIL::ResourceKind ResKind =
+      GetResourceKindAndCompTy(SrvHandle, CompTy, ResClass, ValCtx);
+  bool IsSampleCompTy = CompTy == DXIL::ComponentType::F32;
+  IsSampleCompTy |= CompTy == DXIL::ComponentType::SNormF32;
+  IsSampleCompTy |= CompTy == DXIL::ComponentType::UNormF32;
+  IsSampleCompTy |= CompTy == DXIL::ComponentType::F16;
+  IsSampleCompTy |= CompTy == DXIL::ComponentType::SNormF16;
+  IsSampleCompTy |= CompTy == DXIL::ComponentType::UNormF16;
   const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
   if (pSM->IsSM67Plus() && !IsSampleC) {
-    isSampleCompTy |= compTy == DXIL::ComponentType::I16;
-    isSampleCompTy |= compTy == DXIL::ComponentType::U16;
-    isSampleCompTy |= compTy == DXIL::ComponentType::I32;
-    isSampleCompTy |= compTy == DXIL::ComponentType::U32;
+    IsSampleCompTy |= CompTy == DXIL::ComponentType::I16;
+    IsSampleCompTy |= CompTy == DXIL::ComponentType::U16;
+    IsSampleCompTy |= CompTy == DXIL::ComponentType::I32;
+    IsSampleCompTy |= CompTy == DXIL::ComponentType::U32;
   }
-  if (!isSampleCompTy) {
+  if (!IsSampleCompTy) {
     ValCtx.EmitInstrError(CI, ValidationRule::InstrSampleCompType);
   }
 
-  if (resClass != DXIL::ResourceClass::SRV) {
+  if (ResClass != DXIL::ResourceClass::SRV) {
     ValCtx.EmitInstrError(CI,
                           ValidationRule::InstrResourceClassForSamplerGather);
   }
 
-  ValidationRule rule = ValidationRule::InstrResourceKindForSample;
+  ValidationRule Rule = ValidationRule::InstrResourceKindForSample;
   if (IsSampleC) {
-    rule = ValidationRule::InstrResourceKindForSampleC;
+    Rule = ValidationRule::InstrResourceKindForSampleC;
   }
 
-  switch (resKind) {
+  switch (ResKind) {
   case DXIL::ResourceKind::Texture1D:
   case DXIL::ResourceKind::Texture1DArray:
   case DXIL::ResourceKind::Texture2D:
@@ -461,64 +461,64 @@ static void ValidateSampleInst(CallInst *CI, Value *srvHandle,
     break;
   case DXIL::ResourceKind::Texture3D:
     if (IsSampleC) {
-      ValCtx.EmitInstrError(CI, rule);
+      ValCtx.EmitInstrError(CI, Rule);
     }
     break;
   default:
-    ValCtx.EmitInstrError(CI, rule);
+    ValCtx.EmitInstrError(CI, Rule);
     return;
   }
 
   // Coord match resource kind.
-  ValidateResourceCoord(CI, resKind, coords, ValCtx);
+  ValidateResourceCoord(CI, ResKind, Coords, ValCtx);
   // Offset match resource kind.
-  ValidateResourceOffset(CI, resKind, offsets, ValCtx);
+  ValidateResourceOffset(CI, ResKind, Offsets, ValCtx);
 }
 
-static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle,
-                           ArrayRef<Value *> coords, ArrayRef<Value *> offsets,
+static void ValidateGather(CallInst *CI, Value *SrvHandle, Value *SamplerHandle,
+                           ArrayRef<Value *> Coords, ArrayRef<Value *> Offsets,
                            bool IsSampleC, ValidationContext &ValCtx) {
   if (!IsSampleC) {
-    if (GetSamplerKind(samplerHandle, ValCtx) != DXIL::SamplerKind::Default) {
+    if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Default) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSample);
     }
   } else {
-    if (GetSamplerKind(samplerHandle, ValCtx) !=
+    if (GetSamplerKind(SamplerHandle, ValCtx) !=
         DXIL::SamplerKind::Comparison) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSampleC);
     }
   }
 
-  DXIL::ComponentType compTy;
-  DXIL::ResourceClass resClass;
-  DXIL::ResourceKind resKind =
-      GetResourceKindAndCompTy(srvHandle, compTy, resClass, ValCtx);
+  DXIL::ComponentType CompTy;
+  DXIL::ResourceClass ResClass;
+  DXIL::ResourceKind ResKind =
+      GetResourceKindAndCompTy(SrvHandle, CompTy, ResClass, ValCtx);
 
-  if (resClass != DXIL::ResourceClass::SRV) {
+  if (ResClass != DXIL::ResourceClass::SRV) {
     ValCtx.EmitInstrError(CI,
                           ValidationRule::InstrResourceClassForSamplerGather);
     return;
   }
 
   // Coord match resource kind.
-  ValidateResourceCoord(CI, resKind, coords, ValCtx);
+  ValidateResourceCoord(CI, ResKind, Coords, ValCtx);
   // Offset match resource kind.
-  switch (resKind) {
+  switch (ResKind) {
   case DXIL::ResourceKind::Texture2D:
   case DXIL::ResourceKind::Texture2DArray: {
-    bool hasOffset = !isa<UndefValue>(offsets[0]);
-    if (hasOffset) {
-      if (isa<UndefValue>(offsets[1])) {
+    bool HasOffset = !isa<UndefValue>(Offsets[0]);
+    if (HasOffset) {
+      if (isa<UndefValue>(Offsets[1])) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetMiss);
       }
     }
   } break;
   case DXIL::ResourceKind::TextureCube:
   case DXIL::ResourceKind::TextureCubeArray: {
-    if (!isa<UndefValue>(offsets[0])) {
+    if (!isa<UndefValue>(Offsets[0])) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany);
     }
-    if (!isa<UndefValue>(offsets[1])) {
+    if (!isa<UndefValue>(Offsets[1])) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany);
     }
   } break;
@@ -529,21 +529,21 @@ static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle,
   }
 }
 
-static unsigned StoreValueToMask(ArrayRef<Value *> vals) {
-  unsigned mask = 0;
-  for (unsigned i = 0; i < 4; i++) {
-    if (!isa<UndefValue>(vals[i])) {
-      mask |= 1 << i;
+static unsigned StoreValueToMask(ArrayRef<Value *> Vals) {
+  unsigned Mask = 0;
+  for (unsigned I = 0; I < 4; I++) {
+    if (!isa<UndefValue>(Vals[I])) {
+      Mask |= 1 << I;
     }
   }
-  return mask;
+  return Mask;
 }
 
-static int GetCBufSize(Value *cbHandle, ValidationContext &ValCtx) {
-  DxilResourceProperties RP = GetResourceFromHandle(cbHandle, ValCtx);
+static int GetCBufSize(Value *CbHandle, ValidationContext &ValCtx) {
+  DxilResourceProperties RP = GetResourceFromHandle(CbHandle, ValCtx);
 
   if (RP.getResourceClass() != DXIL::ResourceClass::CBuffer) {
-    ValCtx.EmitInstrError(cast<CallInst>(cbHandle),
+    ValCtx.EmitInstrError(cast<CallInst>(CbHandle),
                           ValidationRule::InstrCBufferClassForCBufferHandle);
     return -1;
   }
@@ -554,7 +554,7 @@ static int GetCBufSize(Value *cbHandle, ValidationContext &ValCtx) {
 // Make sure none of the handle arguments are undef / zero-initializer,
 // Also, do not accept any resource handles with invalid dxil resource
 // properties
-void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode,
+void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode Opcode,
                                       ValidationContext &ValCtx) {
 
   for (Value *op : CI->operands()) {
@@ -563,13 +563,13 @@ void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode,
     const Type *pNodeRecordHandleTy =
         ValCtx.DxilMod.GetOP()->GetNodeRecordHandleType();
 
-    const Type *argTy = op->getType();
-    if (argTy == pNodeHandleTy || argTy == pNodeRecordHandleTy ||
-        argTy == pHandleTy) {
+    const Type *ArgTy = op->getType();
+    if (ArgTy == pNodeHandleTy || ArgTy == pNodeRecordHandleTy ||
+        ArgTy == pHandleTy) {
 
       if (isa<UndefValue>(op) || isa<ConstantAggregateZero>(op)) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized);
-      } else if (argTy == pHandleTy) {
+      } else if (ArgTy == pHandleTy) {
         // GetResourceFromHandle will emit an error on an invalid handle
         GetResourceFromHandle(op, ValCtx);
       }
@@ -577,10 +577,10 @@ void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode,
   }
 }
 
-void ValidateHandleArgs(CallInst *CI, DXIL::OpCode opcode,
+void ValidateHandleArgs(CallInst *CI, DXIL::OpCode Opcode,
                         ValidationContext &ValCtx) {
 
-  switch (opcode) {
+  switch (Opcode) {
     // TODO: add case DXIL::OpCode::IndexNodeRecordHandle:
 
   case DXIL::OpCode::AnnotateHandle:
@@ -591,12 +591,12 @@ void ValidateHandleArgs(CallInst *CI, DXIL::OpCode opcode,
     break;
 
   default:
-    ValidateHandleArgsForInstruction(CI, opcode, ValCtx);
+    ValidateHandleArgsForInstruction(CI, Opcode, ValCtx);
     break;
   }
 }
 
-static unsigned GetNumVertices(DXIL::InputPrimitive inputPrimitive) {
+static unsigned GetNumVertices(DXIL::InputPrimitive InputPrimitive) {
   const unsigned InputPrimitiveVertexTab[] = {
       0,  // Undefined = 0,
       1,  // Point = 1,
@@ -641,26 +641,26 @@ static unsigned GetNumVertices(DXIL::InputPrimitive inputPrimitive) {
       0,  // LastEntry,
   };
 
-  unsigned primitiveIdx = static_cast<unsigned>(inputPrimitive);
-  return InputPrimitiveVertexTab[primitiveIdx];
+  unsigned PrimitiveIdx = static_cast<unsigned>(InputPrimitive);
+  return InputPrimitiveVertexTab[PrimitiveIdx];
 }
 
-static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
+static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode Opcode,
                                     ValidationContext &ValCtx) {
   Function *F = CI->getParent()->getParent();
   DxilModule &DM = ValCtx.DxilMod;
-  bool bIsPatchConstantFunc = false;
+  bool IsPatchConstantFunc = false;
   if (!DM.HasDxilEntryProps(F)) {
-    auto it = ValCtx.PatchConstantFuncMap.find(F);
-    if (it == ValCtx.PatchConstantFuncMap.end()) {
+    auto It = ValCtx.PatchConstantFuncMap.find(F);
+    if (It == ValCtx.PatchConstantFuncMap.end()) {
       // Missing entry props.
       ValCtx.EmitInstrError(CI,
                             ValidationRule::InstrSignatureOperationNotInEntry);
       return;
     }
     // Use hull entry instead of patch constant function.
-    F = it->second.front();
-    bIsPatchConstantFunc = true;
+    F = It->second.front();
+    IsPatchConstantFunc = true;
   }
   if (!ValCtx.HasEntryStatus(F)) {
     return;
@@ -668,67 +668,67 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
 
   EntryStatus &Status = ValCtx.GetEntryStatus(F);
   DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F);
-  DxilFunctionProps &props = EntryProps.props;
+  DxilFunctionProps &Props = EntryProps.props;
   DxilEntrySignature &S = EntryProps.sig;
 
-  switch (opcode) {
+  switch (Opcode) {
   case DXIL::OpCode::LoadInput: {
-    Value *inputID = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
-    DxilSignature &inputSig = S.InputSignature;
-    Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
-    Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
-    ValidateSignatureAccess(CI, inputSig, inputID, row, col, Status, ValCtx);
-
-    // Check vertexID in ps/vs. and none array input.
-    Value *vertexID =
+    Value *InputId = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
+    DxilSignature &InputSig = S.InputSignature;
+    Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
+    Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
+    ValidateSignatureAccess(CI, InputSig, InputId, Row, Col, Status, ValCtx);
+
+    // Check VertexId in ps/vs. and none array input.
+    Value *VertexId =
         CI->getArgOperand(DXIL::OperandIndex::kLoadInputVertexIDOpIdx);
-    bool usedVertexID = vertexID && !isa<UndefValue>(vertexID);
-    if (props.IsVS() || props.IsPS()) {
-      if (usedVertexID) {
-        // use vertexID in VS/PS input.
+    bool UsedVertexId = VertexId && !isa<UndefValue>(VertexId);
+    if (Props.IsVS() || Props.IsPS()) {
+      if (UsedVertexId) {
+        // Use VertexId in VS/PS input.
         ValCtx.EmitInstrError(CI, ValidationRule::SmOperand);
         return;
       }
     } else {
-      if (ConstantInt *cVertexID = dyn_cast<ConstantInt>(vertexID)) {
-        int immVertexID = cVertexID->getValue().getLimitedValue();
-        if (cVertexID->getValue().isNegative()) {
-          immVertexID = cVertexID->getValue().getSExtValue();
+      if (ConstantInt *cVertexId = dyn_cast<ConstantInt>(VertexId)) {
+        int ImmVertexId = cVertexId->getValue().getLimitedValue();
+        if (cVertexId->getValue().isNegative()) {
+          ImmVertexId = cVertexId->getValue().getSExtValue();
         }
-        const int low = 0;
-        int high = 0;
-        if (props.IsGS()) {
-          DXIL::InputPrimitive inputPrimitive =
-              props.ShaderProps.GS.inputPrimitive;
-          high = GetNumVertices(inputPrimitive);
-        } else if (props.IsDS()) {
-          high = props.ShaderProps.DS.inputControlPoints;
-        } else if (props.IsHS()) {
-          high = props.ShaderProps.HS.inputControlPoints;
+        const int Low = 0;
+        int High = 0;
+        if (Props.IsGS()) {
+          DXIL::InputPrimitive InputPrimitive =
+              Props.ShaderProps.GS.inputPrimitive;
+          High = GetNumVertices(InputPrimitive);
+        } else if (Props.IsDS()) {
+          High = Props.ShaderProps.DS.inputControlPoints;
+        } else if (Props.IsHS()) {
+          High = Props.ShaderProps.HS.inputControlPoints;
         } else {
           ValCtx.EmitInstrFormatError(CI,
                                       ValidationRule::SmOpcodeInInvalidFunction,
                                       {"LoadInput", "VS/HS/DS/GS/PS"});
         }
-        if (immVertexID < low || immVertexID >= high) {
-          std::string range = std::to_string(low) + "~" + std::to_string(high);
+        if (ImmVertexId < Low || ImmVertexId >= High) {
+          std::string Range = std::to_string(Low) + "~" + std::to_string(High);
           ValCtx.EmitInstrFormatError(
               CI, ValidationRule::InstrOperandRange,
-              {"VertexID", range, std::to_string(immVertexID)});
+              {"VertexID", Range, std::to_string(ImmVertexId)});
         }
       }
     }
   } break;
   case DXIL::OpCode::DomainLocation: {
-    Value *colValue =
+    Value *ColValue =
         CI->getArgOperand(DXIL::OperandIndex::kDomainLocationColOpIdx);
-    if (!isa<ConstantInt>(colValue)) {
-      // col must be const
+    if (!isa<ConstantInt>(ColValue)) {
+      // Col must be const
       ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrOpConst,
                                   {"Col", "DomainLocation"});
     } else {
-      unsigned col = cast<ConstantInt>(colValue)->getLimitedValue();
-      if (col >= Status.domainLocSize) {
+      unsigned Col = cast<ConstantInt>(ColValue)->getLimitedValue();
+      if (Col >= Status.domainLocSize) {
         ValCtx.EmitInstrError(CI, ValidationRule::SmDomainLocationIdxOOB);
       }
     }
@@ -736,60 +736,60 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
   case DXIL::OpCode::StoreOutput:
   case DXIL::OpCode::StoreVertexOutput:
   case DXIL::OpCode::StorePrimitiveOutput: {
-    Value *outputID =
+    Value *OutputId =
         CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx);
-    DxilSignature &outputSig = opcode == DXIL::OpCode::StorePrimitiveOutput
+    DxilSignature &OutputSig = Opcode == DXIL::OpCode::StorePrimitiveOutput
                                    ? S.PatchConstOrPrimSignature
                                    : S.OutputSignature;
-    Value *row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx);
-    Value *col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx);
-    ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status, ValCtx);
+    Value *Row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx);
+    Value *Col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx);
+    ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status, ValCtx);
   } break;
   case DXIL::OpCode::OutputControlPointID: {
     // Only used in hull shader.
-    Function *func = CI->getParent()->getParent();
+    Function *Func = CI->getParent()->getParent();
     // Make sure this is inside hs shader entry function.
-    if (!(props.IsHS() && F == func)) {
+    if (!(Props.IsHS() && F == Func)) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"OutputControlPointID", "hull function"});
     }
   } break;
   case DXIL::OpCode::LoadOutputControlPoint: {
     // Only used in patch constant function.
-    Function *func = CI->getParent()->getParent();
-    if (ValCtx.entryFuncCallSet.count(func) > 0) {
+    Function *Func = CI->getParent()->getParent();
+    if (ValCtx.entryFuncCallSet.count(Func) > 0) {
       ValCtx.EmitInstrFormatError(
           CI, ValidationRule::SmOpcodeInInvalidFunction,
           {"LoadOutputControlPoint", "PatchConstant function"});
     }
-    Value *outputID =
+    Value *OutputId =
         CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx);
-    DxilSignature &outputSig = S.OutputSignature;
-    Value *row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx);
-    Value *col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx);
-    ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status, ValCtx);
+    DxilSignature &OutputSig = S.OutputSignature;
+    Value *Row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx);
+    Value *Col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx);
+    ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status, ValCtx);
   } break;
   case DXIL::OpCode::StorePatchConstant: {
     // Only used in patch constant function.
-    Function *func = CI->getParent()->getParent();
-    if (!bIsPatchConstantFunc) {
+    Function *Func = CI->getParent()->getParent();
+    if (!IsPatchConstantFunc) {
       ValCtx.EmitInstrFormatError(
           CI, ValidationRule::SmOpcodeInInvalidFunction,
           {"StorePatchConstant", "PatchConstant function"});
     } else {
-      auto &hullShaders = ValCtx.PatchConstantFuncMap[func];
-      for (Function *F : hullShaders) {
+      auto &HullShaders = ValCtx.PatchConstantFuncMap[Func];
+      for (Function *F : HullShaders) {
         EntryStatus &Status = ValCtx.GetEntryStatus(F);
         DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F);
         DxilEntrySignature &S = EntryProps.sig;
-        Value *outputID =
+        Value *OutputId =
             CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx);
-        DxilSignature &outputSig = S.PatchConstOrPrimSignature;
-        Value *row =
+        DxilSignature &OutputSig = S.PatchConstOrPrimSignature;
+        Value *Row =
             CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx);
-        Value *col =
+        Value *Col =
             CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx);
-        ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status,
+        ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status,
                                 ValCtx);
       }
     }
@@ -807,12 +807,12 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
   case DXIL::OpCode::EvalSampleIndex:
   case DXIL::OpCode::EvalSnapped: {
     // Eval* share same operand index with load input.
-    Value *inputID = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
-    DxilSignature &inputSig = S.InputSignature;
-    Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
-    Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
+    Value *InputId = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
+    DxilSignature &InputSig = S.InputSignature;
+    Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
+    Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
     DxilSignatureElement *pSE = ValidateSignatureAccess(
-        CI, inputSig, inputID, row, col, Status, ValCtx);
+        CI, InputSig, InputId, Row, Col, Status, ValCtx);
     if (pSE) {
       switch (pSE->GetInterpolationMode()->GetKind()) {
       case DXIL::InterpolationMode::Linear:
@@ -836,11 +836,11 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
   } break;
   case DXIL::OpCode::AttributeAtVertex: {
     Value *Attribute = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx);
-    DxilSignature &inputSig = S.InputSignature;
-    Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
-    Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
+    DxilSignature &InputSig = S.InputSignature;
+    Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
+    Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
     DxilSignatureElement *pSE = ValidateSignatureAccess(
-        CI, inputSig, Attribute, row, col, Status, ValCtx);
+        CI, InputSig, Attribute, Row, Col, Status, ValCtx);
     if (pSE && pSE->GetInterpolationMode()->GetKind() !=
                    hlsl::InterpolationMode::Kind::Constant) {
       ValCtx.EmitInstrFormatError(
@@ -851,35 +851,35 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
   case DXIL::OpCode::CutStream:
   case DXIL::OpCode::EmitThenCutStream:
   case DXIL::OpCode::EmitStream: {
-    if (props.IsGS()) {
-      auto &GS = props.ShaderProps.GS;
-      unsigned streamMask = 0;
-      for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) {
-        if (GS.streamPrimitiveTopologies[i] !=
+    if (Props.IsGS()) {
+      auto &GS = Props.ShaderProps.GS;
+      unsigned StreamMask = 0;
+      for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) {
+        if (GS.streamPrimitiveTopologies[I] !=
             DXIL::PrimitiveTopology::Undefined) {
-          streamMask |= 1 << i;
+          StreamMask |= 1 << I;
         }
       }
-      Value *streamID =
+      Value *StreamId =
           CI->getArgOperand(DXIL::OperandIndex::kStreamEmitCutIDOpIdx);
-      if (ConstantInt *cStreamID = dyn_cast<ConstantInt>(streamID)) {
-        int immStreamID = cStreamID->getValue().getLimitedValue();
-        if (cStreamID->getValue().isNegative() || immStreamID >= 4) {
+      if (ConstantInt *cStreamId = dyn_cast<ConstantInt>(StreamId)) {
+        int ImmStreamId = cStreamId->getValue().getLimitedValue();
+        if (cStreamId->getValue().isNegative() || ImmStreamId >= 4) {
           ValCtx.EmitInstrFormatError(
               CI, ValidationRule::InstrOperandRange,
-              {"StreamID", "0~4", std::to_string(immStreamID)});
+              {"StreamID", "0~4", std::to_string(ImmStreamId)});
         } else {
-          unsigned immMask = 1 << immStreamID;
-          if ((streamMask & immMask) == 0) {
-            std::string range;
-            for (unsigned i = 0; i < 4; i++) {
-              if (streamMask & (1 << i)) {
-                range += std::to_string(i) + " ";
+          unsigned ImmMask = 1 << ImmStreamId;
+          if ((StreamMask & ImmMask) == 0) {
+            std::string Range;
+            for (unsigned I = 0; I < 4; I++) {
+              if (StreamMask & (1 << I)) {
+                Range += std::to_string(I) + " ";
               }
             }
             ValCtx.EmitInstrFormatError(
                 CI, ValidationRule::InstrOperandRange,
-                {"StreamID", range, std::to_string(immStreamID)});
+                {"StreamID", Range, std::to_string(ImmStreamId)});
           }
         }
 
@@ -893,25 +893,25 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
   } break;
   case DXIL::OpCode::EmitIndices: {
-    if (!props.IsMS()) {
+    if (!Props.IsMS()) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"EmitIndices", "Mesh shader"});
     }
   } break;
   case DXIL::OpCode::SetMeshOutputCounts: {
-    if (!props.IsMS()) {
+    if (!Props.IsMS()) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"SetMeshOutputCounts", "Mesh shader"});
     }
   } break;
   case DXIL::OpCode::GetMeshPayload: {
-    if (!props.IsMS()) {
+    if (!Props.IsMS()) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"GetMeshPayload", "Mesh shader"});
     }
   } break;
   case DXIL::OpCode::DispatchMesh: {
-    if (!props.IsAS()) {
+    if (!Props.IsAS()) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"DispatchMesh", "Amplification shader"});
     }
@@ -925,9 +925,9 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
   }
 }
 
-static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode opcode,
+static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode Opcode,
                                             ValidationContext &ValCtx) {
-  switch (opcode) {
+  switch (Opcode) {
   // Imm input value validation.
   case DXIL::OpCode::Asin: {
     DxilInst_Asin I(CI);
@@ -973,77 +973,77 @@ static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode opcode,
 // Validate the type-defined mask compared to the store value mask which
 // indicates which parts were defined returns true if caller should continue
 // validation
-static bool ValidateStorageMasks(Instruction *I, DXIL::OpCode opcode,
-                                 ConstantInt *mask, unsigned stValMask,
-                                 bool isTyped, ValidationContext &ValCtx) {
-  if (!mask) {
+static bool ValidateStorageMasks(Instruction *I, DXIL::OpCode Opcode,
+                                 ConstantInt *Mask, unsigned StValMask,
+                                 bool IsTyped, ValidationContext &ValCtx) {
+  if (!Mask) {
     // Mask for buffer store should be immediate.
     ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst,
-                                {"Mask", hlsl::OP::GetOpCodeName(opcode)});
+                                {"Mask", hlsl::OP::GetOpCodeName(Opcode)});
     return false;
   }
 
-  unsigned uMask = mask->getLimitedValue();
-  if (isTyped && uMask != 0xf) {
+  unsigned UMask = Mask->getLimitedValue();
+  if (IsTyped && UMask != 0xf) {
     ValCtx.EmitInstrError(I, ValidationRule::InstrWriteMaskForTypedUAVStore);
   }
 
   // write mask must be contiguous (.x .xy .xyz or .xyzw)
-  if (!((uMask == 0xf) || (uMask == 0x7) || (uMask == 0x3) || (uMask == 0x1))) {
+  if (!((UMask == 0xf) || (UMask == 0x7) || (UMask == 0x3) || (UMask == 0x1))) {
     ValCtx.EmitInstrError(I, ValidationRule::InstrWriteMaskGapForUAV);
   }
 
-  // If a bit is set in the uMask (expected values) that isn't set in stValMask
+  // If a bit is set in the UMask (expected values) that isn't set in StValMask
   // (user provided values) then the user failed to define some of the output
   // values.
-  if (uMask & ~stValMask)
+  if (UMask & ~StValMask)
     ValCtx.EmitInstrError(I, ValidationRule::InstrUndefinedValueForUAVStore);
-  else if (uMask != stValMask)
+  else if (UMask != StValMask)
     ValCtx.EmitInstrFormatError(
         I, ValidationRule::InstrWriteMaskMatchValueForUAVStore,
-        {std::to_string(uMask), std::to_string(stValMask)});
+        {std::to_string(UMask), std::to_string(StValMask)});
 
   return true;
 }
 
-static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
+static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode,
                                    ValidationContext &ValCtx) {
-  switch (opcode) {
+  switch (Opcode) {
   case DXIL::OpCode::GetDimensions: {
-    DxilInst_GetDimensions getDim(CI);
-    Value *handle = getDim.get_handle();
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(handle, compTy, resClass, ValCtx);
+    DxilInst_GetDimensions GetDim(CI);
+    Value *Handle = GetDim.get_handle();
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx);
 
     // Check the result component use.
-    ResRetUsage usage;
-    CollectGetDimResRetUsage(usage, CI, ValCtx);
+    ResRetUsage Usage;
+    CollectGetDimResRetUsage(Usage, CI, ValCtx);
 
     // Mip level only for texture.
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::Texture1D:
-      if (usage.y) {
+      if (Usage.Y) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"y", "Texture1D"});
       }
-      if (usage.z) {
+      if (Usage.Z) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"z", "Texture1D"});
       }
       break;
     case DXIL::ResourceKind::Texture1DArray:
-      if (usage.z) {
+      if (Usage.Z) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"z", "Texture1DArray"});
       }
       break;
     case DXIL::ResourceKind::Texture2D:
-      if (usage.z) {
+      if (Usage.Z) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"z", "Texture2D"});
@@ -1052,7 +1052,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     case DXIL::ResourceKind::Texture2DArray:
       break;
     case DXIL::ResourceKind::Texture2DMS:
-      if (usage.z) {
+      if (Usage.Z) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"z", "Texture2DMS"});
@@ -1063,7 +1063,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     case DXIL::ResourceKind::Texture3D:
       break;
     case DXIL::ResourceKind::TextureCube:
-      if (usage.z) {
+      if (Usage.Z) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"z", "TextureCube"});
@@ -1075,12 +1075,12 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     case DXIL::ResourceKind::RawBuffer:
     case DXIL::ResourceKind::TypedBuffer:
     case DXIL::ResourceKind::TBuffer: {
-      Value *mip = getDim.get_mipLevel();
-      if (!isa<UndefValue>(mip)) {
+      Value *Mip = GetDim.get_mipLevel();
+      if (!isa<UndefValue>(Mip)) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrMipLevelForGetDimension);
       }
-      if (resKind != DXIL::ResourceKind::Invalid) {
-        if (usage.y || usage.z || usage.w) {
+      if (ResKind != DXIL::ResourceKind::Invalid) {
+        if (Usage.Y || Usage.Z || Usage.W) {
           ValCtx.EmitInstrFormatError(
               CI, ValidationRule::InstrUndefResultForGetDimension,
               {"invalid", "resource"});
@@ -1092,38 +1092,38 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     } break;
     }
 
-    if (usage.status) {
+    if (Usage.Status) {
       ValCtx.EmitInstrFormatError(
           CI, ValidationRule::InstrUndefResultForGetDimension,
           {"invalid", "resource"});
     }
   } break;
   case DXIL::OpCode::CalculateLOD: {
-    DxilInst_CalculateLOD lod(CI);
-    Value *samplerHandle = lod.get_sampler();
-    DXIL::SamplerKind samplerKind = GetSamplerKind(samplerHandle, ValCtx);
-    if (samplerKind != DXIL::SamplerKind::Default) {
+    DxilInst_CalculateLOD LOD(CI);
+    Value *SamplerHandle = LOD.get_sampler();
+    DXIL::SamplerKind SamplerKind = GetSamplerKind(SamplerHandle, ValCtx);
+    if (SamplerKind != DXIL::SamplerKind::Default) {
       // After SM68, Comparison is supported.
       if (!ValCtx.DxilMod.GetShaderModel()->IsSM68Plus() ||
-          samplerKind != DXIL::SamplerKind::Comparison)
+          SamplerKind != DXIL::SamplerKind::Comparison)
         ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForLOD);
     }
-    Value *handle = lod.get_handle();
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(handle, compTy, resClass, ValCtx);
-    if (resClass != DXIL::ResourceClass::SRV) {
+    Value *Handle = LOD.get_handle();
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx);
+    if (ResClass != DXIL::ResourceClass::SRV) {
       ValCtx.EmitInstrError(CI,
                             ValidationRule::InstrResourceClassForSamplerGather);
       return;
     }
     // Coord match resource.
     ValidateCalcLODResourceDimensionCoord(
-        CI, resKind, {lod.get_coord0(), lod.get_coord1(), lod.get_coord2()},
+        CI, ResKind, {LOD.get_coord0(), LOD.get_coord1(), LOD.get_coord2()},
         ValCtx);
 
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::Texture1D:
     case DXIL::ResourceKind::Texture1DArray:
     case DXIL::ResourceKind::Texture2D:
@@ -1140,67 +1140,67 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     ValidateDerivativeOp(CI, ValCtx);
   } break;
   case DXIL::OpCode::TextureGather: {
-    DxilInst_TextureGather gather(CI);
-    ValidateGather(CI, gather.get_srv(), gather.get_sampler(),
-                   {gather.get_coord0(), gather.get_coord1(),
-                    gather.get_coord2(), gather.get_coord3()},
-                   {gather.get_offset0(), gather.get_offset1()},
+    DxilInst_TextureGather Gather(CI);
+    ValidateGather(CI, Gather.get_srv(), Gather.get_sampler(),
+                   {Gather.get_coord0(), Gather.get_coord1(),
+                    Gather.get_coord2(), Gather.get_coord3()},
+                   {Gather.get_offset0(), Gather.get_offset1()},
                    /*IsSampleC*/ false, ValCtx);
   } break;
   case DXIL::OpCode::TextureGatherCmp: {
-    DxilInst_TextureGatherCmp gather(CI);
-    ValidateGather(CI, gather.get_srv(), gather.get_sampler(),
-                   {gather.get_coord0(), gather.get_coord1(),
-                    gather.get_coord2(), gather.get_coord3()},
-                   {gather.get_offset0(), gather.get_offset1()},
+    DxilInst_TextureGatherCmp Gather(CI);
+    ValidateGather(CI, Gather.get_srv(), Gather.get_sampler(),
+                   {Gather.get_coord0(), Gather.get_coord1(),
+                    Gather.get_coord2(), Gather.get_coord3()},
+                   {Gather.get_offset0(), Gather.get_offset1()},
                    /*IsSampleC*/ true, ValCtx);
   } break;
   case DXIL::OpCode::Sample: {
-    DxilInst_Sample sample(CI);
+    DxilInst_Sample Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ false, ValCtx);
     ValidateDerivativeOp(CI, ValCtx);
   } break;
   case DXIL::OpCode::SampleCmp: {
-    DxilInst_SampleCmp sample(CI);
+    DxilInst_SampleCmp Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ true, ValCtx);
     ValidateDerivativeOp(CI, ValCtx);
   } break;
   case DXIL::OpCode::SampleCmpLevel: {
     // sampler must be comparison mode.
-    DxilInst_SampleCmpLevel sample(CI);
+    DxilInst_SampleCmpLevel Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ true, ValCtx);
   } break;
   case DXIL::OpCode::SampleCmpLevelZero: {
     // sampler must be comparison mode.
-    DxilInst_SampleCmpLevelZero sample(CI);
+    DxilInst_SampleCmpLevelZero Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ true, ValCtx);
   } break;
   case DXIL::OpCode::SampleBias: {
-    DxilInst_SampleBias sample(CI);
-    Value *bias = sample.get_bias();
-    if (ConstantFP *cBias = dyn_cast<ConstantFP>(bias)) {
-      float fBias = cBias->getValueAPF().convertToFloat();
-      if (fBias < DXIL::kMinMipLodBias || fBias > DXIL::kMaxMipLodBias) {
+    DxilInst_SampleBias Sample(CI);
+    Value *Bias = Sample.get_bias();
+    if (ConstantFP *cBias = dyn_cast<ConstantFP>(Bias)) {
+      float FBias = cBias->getValueAPF().convertToFloat();
+      if (FBias < DXIL::kMinMipLodBias || FBias > DXIL::kMaxMipLodBias) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrImmBiasForSampleB,
             {std::to_string(DXIL::kMinMipLodBias),
@@ -1210,19 +1210,19 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
 
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ false, ValCtx);
     ValidateDerivativeOp(CI, ValCtx);
   } break;
   case DXIL::OpCode::SampleCmpBias: {
-    DxilInst_SampleCmpBias sample(CI);
-    Value *bias = sample.get_bias();
-    if (ConstantFP *cBias = dyn_cast<ConstantFP>(bias)) {
-      float fBias = cBias->getValueAPF().convertToFloat();
-      if (fBias < DXIL::kMinMipLodBias || fBias > DXIL::kMaxMipLodBias) {
+    DxilInst_SampleCmpBias Sample(CI);
+    Value *Bias = Sample.get_bias();
+    if (ConstantFP *cBias = dyn_cast<ConstantFP>(Bias)) {
+      float FBias = cBias->getValueAPF().convertToFloat();
+      if (FBias < DXIL::kMinMipLodBias || FBias > DXIL::kMaxMipLodBias) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrImmBiasForSampleB,
             {std::to_string(DXIL::kMinMipLodBias),
@@ -1232,38 +1232,38 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
 
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ true, ValCtx);
     ValidateDerivativeOp(CI, ValCtx);
   } break;
   case DXIL::OpCode::SampleGrad: {
-    DxilInst_SampleGrad sample(CI);
+    DxilInst_SampleGrad Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ false, ValCtx);
   } break;
   case DXIL::OpCode::SampleCmpGrad: {
-    DxilInst_SampleCmpGrad sample(CI);
+    DxilInst_SampleCmpGrad Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ true, ValCtx);
   } break;
   case DXIL::OpCode::SampleLevel: {
-    DxilInst_SampleLevel sample(CI);
+    DxilInst_SampleLevel Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ false, ValCtx);
   } break;
   case DXIL::OpCode::CheckAccessFullyMapped: {
@@ -1273,53 +1273,53 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
       ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped);
     } else {
       Value *V = EVI->getOperand(0);
-      bool isLegal = EVI->getNumIndices() == 1 &&
+      bool IsLegal = EVI->getNumIndices() == 1 &&
                      EVI->getIndices()[0] == DXIL::kResRetStatusIndex &&
                      ValCtx.DxilMod.GetOP()->IsResRetType(V->getType());
-      if (!isLegal) {
+      if (!IsLegal) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped);
       }
     }
   } break;
   case DXIL::OpCode::BufferStore: {
-    DxilInst_BufferStore bufSt(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(bufSt.get_uav(), compTy, resClass, ValCtx);
+    DxilInst_BufferStore BufSt(CI);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(BufSt.get_uav(), CompTy, ResClass, ValCtx);
 
-    if (resClass != DXIL::ResourceClass::UAV) {
+    if (ResClass != DXIL::ResourceClass::UAV) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore);
     }
 
-    ConstantInt *mask = dyn_cast<ConstantInt>(bufSt.get_mask());
-    unsigned stValMask =
-        StoreValueToMask({bufSt.get_value0(), bufSt.get_value1(),
-                          bufSt.get_value2(), bufSt.get_value3()});
+    ConstantInt *Mask = dyn_cast<ConstantInt>(BufSt.get_mask());
+    unsigned StValMask =
+        StoreValueToMask({BufSt.get_value0(), BufSt.get_value1(),
+                          BufSt.get_value2(), BufSt.get_value3()});
 
-    if (!ValidateStorageMasks(CI, opcode, mask, stValMask,
-                              resKind == DXIL::ResourceKind::TypedBuffer ||
-                                  resKind == DXIL::ResourceKind::TBuffer,
+    if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask,
+                              ResKind == DXIL::ResourceKind::TypedBuffer ||
+                                  ResKind == DXIL::ResourceKind::TBuffer,
                               ValCtx))
       return;
-    Value *offset = bufSt.get_coord1();
+    Value *Offset = BufSt.get_coord1();
 
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::RawBuffer:
-      if (!isa<UndefValue>(offset)) {
+      if (!isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(
             CI, ValidationRule::InstrCoordinateCountForRawTypedBuf);
       }
       break;
     case DXIL::ResourceKind::TypedBuffer:
     case DXIL::ResourceKind::TBuffer:
-      if (!isa<UndefValue>(offset)) {
+      if (!isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(
             CI, ValidationRule::InstrCoordinateCountForRawTypedBuf);
       }
       break;
     case DXIL::ResourceKind::StructuredBuffer:
-      if (isa<UndefValue>(offset)) {
+      if (isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrCoordinateCountForStructBuf);
       }
@@ -1332,26 +1332,26 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
 
   } break;
   case DXIL::OpCode::TextureStore: {
-    DxilInst_TextureStore texSt(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(texSt.get_srv(), compTy, resClass, ValCtx);
+    DxilInst_TextureStore TexSt(CI);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(TexSt.get_srv(), CompTy, ResClass, ValCtx);
 
-    if (resClass != DXIL::ResourceClass::UAV) {
+    if (ResClass != DXIL::ResourceClass::UAV) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore);
     }
 
-    ConstantInt *mask = dyn_cast<ConstantInt>(texSt.get_mask());
-    unsigned stValMask =
-        StoreValueToMask({texSt.get_value0(), texSt.get_value1(),
-                          texSt.get_value2(), texSt.get_value3()});
+    ConstantInt *Mask = dyn_cast<ConstantInt>(TexSt.get_mask());
+    unsigned StValMask =
+        StoreValueToMask({TexSt.get_value0(), TexSt.get_value1(),
+                          TexSt.get_value2(), TexSt.get_value3()});
 
-    if (!ValidateStorageMasks(CI, opcode, mask, stValMask, true /*isTyped*/,
+    if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, true /*IsTyped*/,
                               ValCtx))
       return;
 
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::Texture1D:
     case DXIL::ResourceKind::Texture1DArray:
     case DXIL::ResourceKind::Texture2D:
@@ -1367,30 +1367,30 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
   } break;
   case DXIL::OpCode::BufferLoad: {
-    DxilInst_BufferLoad bufLd(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(bufLd.get_srv(), compTy, resClass, ValCtx);
-
-    if (resClass != DXIL::ResourceClass::SRV &&
-        resClass != DXIL::ResourceClass::UAV) {
+    DxilInst_BufferLoad BufLd(CI);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(BufLd.get_srv(), CompTy, ResClass, ValCtx);
+
+    if (ResClass != DXIL::ResourceClass::SRV &&
+        ResClass != DXIL::ResourceClass::UAV) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad);
     }
 
-    Value *offset = bufLd.get_wot();
+    Value *Offset = BufLd.get_wot();
 
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::RawBuffer:
     case DXIL::ResourceKind::TypedBuffer:
     case DXIL::ResourceKind::TBuffer:
-      if (!isa<UndefValue>(offset)) {
+      if (!isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(
             CI, ValidationRule::InstrCoordinateCountForRawTypedBuf);
       }
       break;
     case DXIL::ResourceKind::StructuredBuffer:
-      if (isa<UndefValue>(offset)) {
+      if (isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrCoordinateCountForStructBuf);
       }
@@ -1403,33 +1403,33 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
 
   } break;
   case DXIL::OpCode::TextureLoad: {
-    DxilInst_TextureLoad texLd(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(texLd.get_srv(), compTy, resClass, ValCtx);
-
-    Value *mipLevel = texLd.get_mipLevelOrSampleCount();
-
-    if (resClass == DXIL::ResourceClass::UAV) {
-      bool noOffset = isa<UndefValue>(texLd.get_offset0());
-      noOffset &= isa<UndefValue>(texLd.get_offset1());
-      noOffset &= isa<UndefValue>(texLd.get_offset2());
-      if (!noOffset) {
+    DxilInst_TextureLoad TexLd(CI);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(TexLd.get_srv(), CompTy, ResClass, ValCtx);
+
+    Value *MipLevel = TexLd.get_mipLevelOrSampleCount();
+
+    if (ResClass == DXIL::ResourceClass::UAV) {
+      bool NoOffset = isa<UndefValue>(TexLd.get_offset0());
+      NoOffset &= isa<UndefValue>(TexLd.get_offset1());
+      NoOffset &= isa<UndefValue>(TexLd.get_offset2());
+      if (!NoOffset) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrOffsetOnUAVLoad);
       }
-      if (!isa<UndefValue>(mipLevel)) {
-        if (resKind != DXIL::ResourceKind::Texture2DMS &&
-            resKind != DXIL::ResourceKind::Texture2DMSArray)
+      if (!isa<UndefValue>(MipLevel)) {
+        if (ResKind != DXIL::ResourceKind::Texture2DMS &&
+            ResKind != DXIL::ResourceKind::Texture2DMSArray)
           ValCtx.EmitInstrError(CI, ValidationRule::InstrMipOnUAVLoad);
       }
     } else {
-      if (resClass != DXIL::ResourceClass::SRV) {
+      if (ResClass != DXIL::ResourceClass::SRV) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad);
       }
     }
 
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::Texture1D:
     case DXIL::ResourceKind::Texture1DArray:
     case DXIL::ResourceKind::Texture2D:
@@ -1438,7 +1438,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
       break;
     case DXIL::ResourceKind::Texture2DMS:
     case DXIL::ResourceKind::Texture2DMSArray: {
-      if (isa<UndefValue>(mipLevel)) {
+      if (isa<UndefValue>(MipLevel)) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrSampleIndexForLoad2DMS);
       }
     } break;
@@ -1449,28 +1449,28 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
 
     ValidateResourceOffset(
-        CI, resKind,
-        {texLd.get_offset0(), texLd.get_offset1(), texLd.get_offset2()},
+        CI, ResKind,
+        {TexLd.get_offset0(), TexLd.get_offset1(), TexLd.get_offset2()},
         ValCtx);
   } break;
   case DXIL::OpCode::CBufferLoad: {
     DxilInst_CBufferLoad CBLoad(CI);
-    Value *regIndex = CBLoad.get_byteOffset();
-    if (ConstantInt *cIndex = dyn_cast<ConstantInt>(regIndex)) {
-      int offset = cIndex->getLimitedValue();
-      int size = GetCBufSize(CBLoad.get_handle(), ValCtx);
-      if (size > 0 && offset >= size) {
+    Value *RegIndex = CBLoad.get_byteOffset();
+    if (ConstantInt *cIndex = dyn_cast<ConstantInt>(RegIndex)) {
+      int Offset = cIndex->getLimitedValue();
+      int Size = GetCBufSize(CBLoad.get_handle(), ValCtx);
+      if (Size > 0 && Offset >= Size) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrCBufferOutOfBound);
       }
     }
   } break;
   case DXIL::OpCode::CBufferLoadLegacy: {
     DxilInst_CBufferLoadLegacy CBLoad(CI);
-    Value *regIndex = CBLoad.get_regIndex();
-    if (ConstantInt *cIndex = dyn_cast<ConstantInt>(regIndex)) {
-      int offset = cIndex->getLimitedValue() * 16; // 16 bytes align
-      int size = GetCBufSize(CBLoad.get_handle(), ValCtx);
-      if (size > 0 && offset >= size) {
+    Value *RegIndex = CBLoad.get_regIndex();
+    if (ConstantInt *cIndex = dyn_cast<ConstantInt>(RegIndex)) {
+      int Offset = cIndex->getLimitedValue() * 16; // 16 bytes align
+      int Size = GetCBufSize(CBLoad.get_handle(), ValCtx);
+      if (Size > 0 && Offset >= Size) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrCBufferOutOfBound);
       }
     }
@@ -1483,35 +1483,35 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
         ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore);
       }
     }
-    DxilInst_RawBufferLoad bufLd(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(bufLd.get_srv(), compTy, resClass, ValCtx);
+    DxilInst_RawBufferLoad BufLd(CI);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(BufLd.get_srv(), CompTy, ResClass, ValCtx);
 
-    if (resClass != DXIL::ResourceClass::SRV &&
-        resClass != DXIL::ResourceClass::UAV) {
+    if (ResClass != DXIL::ResourceClass::SRV &&
+        ResClass != DXIL::ResourceClass::UAV) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad);
     }
 
-    Value *offset = bufLd.get_elementOffset();
-    Value *align = bufLd.get_alignment();
-    unsigned alignSize = 0;
-    if (!isa<ConstantInt>(align)) {
+    Value *Offset = BufLd.get_elementOffset();
+    Value *Align = BufLd.get_alignment();
+    unsigned AlignSize = 0;
+    if (!isa<ConstantInt>(Align)) {
       ValCtx.EmitInstrError(CI,
                             ValidationRule::InstrCoordinateCountForRawTypedBuf);
     } else {
-      alignSize = bufLd.get_alignment_val();
+      AlignSize = BufLd.get_alignment_val();
     }
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::RawBuffer:
-      if (!isa<UndefValue>(offset)) {
+      if (!isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(
             CI, ValidationRule::InstrCoordinateCountForRawTypedBuf);
       }
       break;
     case DXIL::ResourceKind::StructuredBuffer:
-      if (isa<UndefValue>(offset)) {
+      if (isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrCoordinateCountForStructBuf);
       }
@@ -1530,43 +1530,43 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
         ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore);
       }
     }
-    DxilInst_RawBufferStore bufSt(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(bufSt.get_uav(), compTy, resClass, ValCtx);
+    DxilInst_RawBufferStore BufSt(CI);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(BufSt.get_uav(), CompTy, ResClass, ValCtx);
 
-    if (resClass != DXIL::ResourceClass::UAV) {
+    if (ResClass != DXIL::ResourceClass::UAV) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore);
     }
 
-    ConstantInt *mask = dyn_cast<ConstantInt>(bufSt.get_mask());
-    unsigned stValMask =
-        StoreValueToMask({bufSt.get_value0(), bufSt.get_value1(),
-                          bufSt.get_value2(), bufSt.get_value3()});
+    ConstantInt *Mask = dyn_cast<ConstantInt>(BufSt.get_mask());
+    unsigned StValMask =
+        StoreValueToMask({BufSt.get_value0(), BufSt.get_value1(),
+                          BufSt.get_value2(), BufSt.get_value3()});
 
-    if (!ValidateStorageMasks(CI, opcode, mask, stValMask, false /*isTyped*/,
+    if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, false /*IsTyped*/,
                               ValCtx))
       return;
 
-    Value *offset = bufSt.get_elementOffset();
-    Value *align = bufSt.get_alignment();
-    unsigned alignSize = 0;
-    if (!isa<ConstantInt>(align)) {
+    Value *Offset = BufSt.get_elementOffset();
+    Value *Align = BufSt.get_alignment();
+    unsigned AlignSize = 0;
+    if (!isa<ConstantInt>(Align)) {
       ValCtx.EmitInstrError(CI,
                             ValidationRule::InstrCoordinateCountForRawTypedBuf);
     } else {
-      alignSize = bufSt.get_alignment_val();
+      AlignSize = BufSt.get_alignment_val();
     }
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::RawBuffer:
-      if (!isa<UndefValue>(offset)) {
+      if (!isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(
             CI, ValidationRule::InstrCoordinateCountForRawTypedBuf);
       }
       break;
     case DXIL::ResourceKind::StructuredBuffer:
-      if (isa<UndefValue>(offset)) {
+      if (isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrCoordinateCountForStructBuf);
       }
@@ -1578,9 +1578,9 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
   } break;
   case DXIL::OpCode::TraceRay: {
-    DxilInst_TraceRay traceRay(CI);
-    Value *hdl = traceRay.get_AccelerationStructure();
-    DxilResourceProperties RP = ValCtx.GetResourceFromVal(hdl);
+    DxilInst_TraceRay TraceRay(CI);
+    Value *Hdl = TraceRay.get_AccelerationStructure();
+    DxilResourceProperties RP = ValCtx.GetResourceFromVal(Hdl);
     if (RP.getResourceClass() == DXIL::ResourceClass::Invalid) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay);
       return;
@@ -1595,12 +1595,12 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
 }
 
 static void ValidateBarrierFlagArg(ValidationContext &ValCtx, CallInst *CI,
-                                   Value *Arg, unsigned validMask,
-                                   StringRef flagName, StringRef opName) {
+                                   Value *Arg, unsigned ValidMask,
+                                   StringRef FlagName, StringRef OpName) {
   if (ConstantInt *CArg = dyn_cast<ConstantInt>(Arg)) {
-    if ((CArg->getLimitedValue() & (uint32_t)(~validMask)) != 0) {
+    if ((CArg->getLimitedValue() & (uint32_t)(~ValidMask)) != 0) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrBarrierFlagInvalid,
-                                  {flagName, opName});
+                                  {FlagName, OpName});
     }
   } else {
     ValCtx.EmitInstrError(CI,
@@ -1622,35 +1622,35 @@ std::string GetLaunchTypeStr(DXIL::NodeLaunchType LT) {
 }
 
 static void ValidateDxilOperationCallInProfile(CallInst *CI,
-                                               DXIL::OpCode opcode,
+                                               DXIL::OpCode Opcode,
                                                const ShaderModel *pSM,
                                                ValidationContext &ValCtx) {
-  DXIL::ShaderKind shaderKind =
+  DXIL::ShaderKind ShaderKind =
       pSM ? pSM->GetKind() : DXIL::ShaderKind::Invalid;
   llvm::Function *F = CI->getParent()->getParent();
-  DXIL::NodeLaunchType nodeLaunchType = DXIL::NodeLaunchType::Invalid;
-  if (DXIL::ShaderKind::Library == shaderKind) {
+  DXIL::NodeLaunchType NodeLaunchType = DXIL::NodeLaunchType::Invalid;
+  if (DXIL::ShaderKind::Library == ShaderKind) {
     if (ValCtx.DxilMod.HasDxilFunctionProps(F)) {
-      DxilEntryProps &entryProps = ValCtx.DxilMod.GetDxilEntryProps(F);
-      shaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind;
-      if (shaderKind == DXIL::ShaderKind::Node)
-        nodeLaunchType = entryProps.props.Node.LaunchType;
+      DxilEntryProps &EntryProps = ValCtx.DxilMod.GetDxilEntryProps(F);
+      ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind;
+      if (ShaderKind == DXIL::ShaderKind::Node)
+        NodeLaunchType = EntryProps.props.Node.LaunchType;
 
     } else if (ValCtx.DxilMod.IsPatchConstantShader(F))
-      shaderKind = DXIL::ShaderKind::Hull;
+      ShaderKind = DXIL::ShaderKind::Hull;
   }
 
   // These shader models are treted like compute
-  bool isCSLike = shaderKind == DXIL::ShaderKind::Compute ||
-                  shaderKind == DXIL::ShaderKind::Mesh ||
-                  shaderKind == DXIL::ShaderKind::Amplification ||
-                  shaderKind == DXIL::ShaderKind::Node;
+  bool IsCSLike = ShaderKind == DXIL::ShaderKind::Compute ||
+                  ShaderKind == DXIL::ShaderKind::Mesh ||
+                  ShaderKind == DXIL::ShaderKind::Amplification ||
+                  ShaderKind == DXIL::ShaderKind::Node;
   // Is called from a library function
-  bool isLibFunc = shaderKind == DXIL::ShaderKind::Library;
+  bool IsLibFunc = ShaderKind == DXIL::ShaderKind::Library;
 
-  ValidateHandleArgs(CI, opcode, ValCtx);
+  ValidateHandleArgs(CI, Opcode, ValCtx);
 
-  switch (opcode) {
+  switch (Opcode) {
   // Imm input value validation.
   case DXIL::OpCode::Asin:
   case DXIL::OpCode::Acos:
@@ -1659,7 +1659,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   case DXIL::OpCode::DerivFineY:
   case DXIL::OpCode::DerivCoarseX:
   case DXIL::OpCode::DerivCoarseY:
-    ValidateImmOperandForMathDxilOp(CI, opcode, ValCtx);
+    ValidateImmOperandForMathDxilOp(CI, Opcode, ValCtx);
     break;
   // Resource validation.
   case DXIL::OpCode::GetDimensions:
@@ -1684,7 +1684,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   case DXIL::OpCode::CBufferLoadLegacy:
   case DXIL::OpCode::RawBufferLoad:
   case DXIL::OpCode::RawBufferStore:
-    ValidateResourceDxilOp(CI, opcode, ValCtx);
+    ValidateResourceDxilOp(CI, Opcode, ValCtx);
     break;
   // Input output.
   case DXIL::OpCode::LoadInput:
@@ -1705,13 +1705,13 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   case DXIL::OpCode::EmitStream:
   case DXIL::OpCode::EmitThenCutStream:
   case DXIL::OpCode::CutStream:
-    ValidateSignatureDxilOp(CI, opcode, ValCtx);
+    ValidateSignatureDxilOp(CI, Opcode, ValCtx);
     break;
   // Special.
   case DXIL::OpCode::AllocateRayQuery: {
     // validate flags are immediate and compatible
-    llvm::Value *constRayFlag = CI->getOperand(1);
-    if (!llvm::isa<llvm::Constant>(constRayFlag)) {
+    llvm::Value *ConstRayFlag = CI->getOperand(1);
+    if (!llvm::isa<llvm::Constant>(ConstRayFlag)) {
       ValCtx.EmitInstrError(CI,
                             ValidationRule::DeclAllocateRayQueryFlagsAreConst);
     }
@@ -1719,9 +1719,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   }
   case DXIL::OpCode::AllocateRayQuery2: {
     // validate flags are immediate and compatible
-    llvm::Value *constRayFlag = CI->getOperand(1);
+    llvm::Value *ConstRayFlag = CI->getOperand(1);
     llvm::Value *RayQueryFlag = CI->getOperand(2);
-    if (!llvm::isa<llvm::Constant>(constRayFlag) ||
+    if (!llvm::isa<llvm::Constant>(ConstRayFlag) ||
         !llvm::isa<llvm::Constant>(RayQueryFlag)) {
       ValCtx.EmitInstrError(CI,
                             ValidationRule::DeclAllocateRayQuery2FlagsAreConst);
@@ -1730,7 +1730,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
     // When the ForceOMM2State ConstRayFlag is given as an argument to
     // a RayQuery object, AllowOpacityMicromaps is expected
     // as a RayQueryFlag argument
-    llvm::ConstantInt *Arg1 = llvm::cast<llvm::ConstantInt>(constRayFlag);
+    llvm::ConstantInt *Arg1 = llvm::cast<llvm::ConstantInt>(ConstRayFlag);
     llvm::ConstantInt *Arg2 = llvm::cast<llvm::ConstantInt>(RayQueryFlag);
     if ((Arg1->getValue().getSExtValue() &
          (unsigned)DXIL::RayFlag::ForceOMM2State) &&
@@ -1744,9 +1744,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   }
 
   case DXIL::OpCode::BufferUpdateCounter: {
-    DxilInst_BufferUpdateCounter updateCounter(CI);
-    Value *handle = updateCounter.get_uav();
-    DxilResourceProperties RP = ValCtx.GetResourceFromVal(handle);
+    DxilInst_BufferUpdateCounter UpdateCounter(CI);
+    Value *Handle = UpdateCounter.get_uav();
+    DxilResourceProperties RP = ValCtx.GetResourceFromVal(Handle);
 
     if (!RP.isUAV()) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBufferUpdateCounterOnUAV);
@@ -1761,20 +1761,20 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
           CI, ValidationRule::InstrBufferUpdateCounterOnResHasCounter);
     }
 
-    Value *inc = updateCounter.get_inc();
-    if (ConstantInt *cInc = dyn_cast<ConstantInt>(inc)) {
-      bool isInc = cInc->getLimitedValue() == 1;
+    Value *Inc = UpdateCounter.get_inc();
+    if (ConstantInt *cInc = dyn_cast<ConstantInt>(Inc)) {
+      bool IsInc = cInc->getLimitedValue() == 1;
       if (!ValCtx.isLibProfile) {
-        auto it = ValCtx.HandleResIndexMap.find(handle);
-        if (it != ValCtx.HandleResIndexMap.end()) {
-          unsigned resIndex = it->second;
-          if (ValCtx.UavCounterIncMap.count(resIndex)) {
-            if (isInc != ValCtx.UavCounterIncMap[resIndex]) {
+        auto It = ValCtx.HandleResIndexMap.find(Handle);
+        if (It != ValCtx.HandleResIndexMap.end()) {
+          unsigned ResIndex = It->second;
+          if (ValCtx.UavCounterIncMap.count(ResIndex)) {
+            if (IsInc != ValCtx.UavCounterIncMap[ResIndex]) {
               ValCtx.EmitInstrError(CI,
                                     ValidationRule::InstrOnlyOneAllocConsume);
             }
           } else {
-            ValCtx.UavCounterIncMap[resIndex] = isInc;
+            ValCtx.UavCounterIncMap[ResIndex] = IsInc;
           }
         }
 
@@ -1789,35 +1789,35 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
 
   } break;
   case DXIL::OpCode::Barrier: {
-    DxilInst_Barrier barrier(CI);
-    Value *mode = barrier.get_barrierMode();
-    ConstantInt *cMode = dyn_cast<ConstantInt>(mode);
-    if (!cMode) {
+    DxilInst_Barrier Barrier(CI);
+    Value *Mode = Barrier.get_barrierMode();
+    ConstantInt *CMode = dyn_cast<ConstantInt>(Mode);
+    if (!CMode) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrOpConst,
                                   {"Mode", "Barrier"});
       return;
     }
 
-    const unsigned uglobal =
+    const unsigned Uglobal =
         static_cast<unsigned>(DXIL::BarrierMode::UAVFenceGlobal);
-    const unsigned g = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence);
-    const unsigned ut =
+    const unsigned G = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence);
+    const unsigned Ut =
         static_cast<unsigned>(DXIL::BarrierMode::UAVFenceThreadGroup);
-    unsigned barrierMode = cMode->getLimitedValue();
+    unsigned BarrierMode = CMode->getLimitedValue();
 
-    if (isCSLike || isLibFunc) {
-      bool bHasUGlobal = barrierMode & uglobal;
-      bool bHasGroup = barrierMode & g;
-      bool bHasUGroup = barrierMode & ut;
-      if (bHasUGlobal && bHasUGroup) {
+    if (IsCSLike || IsLibFunc) {
+      bool HasUGlobal = BarrierMode & Uglobal;
+      bool HasGroup = BarrierMode & G;
+      bool HasUGroup = BarrierMode & Ut;
+      if (HasUGlobal && HasUGroup) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrBarrierModeUselessUGroup);
       }
-      if (!bHasUGlobal && !bHasGroup && !bHasUGroup) {
+      if (!HasUGlobal && !HasGroup && !HasUGroup) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeNoMemory);
       }
     } else {
-      if (uglobal != barrierMode) {
+      if (Uglobal != BarrierMode) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS);
       }
     }
@@ -1831,28 +1831,28 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
     ValidateBarrierFlagArg(ValCtx, CI, DI.get_SemanticFlags(),
                            (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask,
                            "semantic", "BarrierByMemoryType");
-    if (!isLibFunc && shaderKind != DXIL::ShaderKind::Node &&
+    if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node &&
         OP::BarrierRequiresNode(CI)) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode);
     }
-    if (!isCSLike && !isLibFunc && OP::BarrierRequiresGroup(CI)) {
+    if (!IsCSLike && !IsLibFunc && OP::BarrierRequiresGroup(CI)) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS);
     }
   } break;
   case DXIL::OpCode::BarrierByNodeRecordHandle:
   case DXIL::OpCode::BarrierByMemoryHandle: {
-    std::string opName = opcode == DXIL::OpCode::BarrierByNodeRecordHandle
+    std::string OpName = Opcode == DXIL::OpCode::BarrierByNodeRecordHandle
                              ? "barrierByNodeRecordHandle"
                              : "barrierByMemoryHandle";
     DxilInst_BarrierByMemoryHandle DIMH(CI);
     ValidateBarrierFlagArg(ValCtx, CI, DIMH.get_SemanticFlags(),
                            (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask,
-                           "semantic", opName);
-    if (!isLibFunc && shaderKind != DXIL::ShaderKind::Node &&
+                           "semantic", OpName);
+    if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node &&
         OP::BarrierRequiresNode(CI)) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode);
     }
-    if (!isCSLike && !isLibFunc && OP::BarrierRequiresGroup(CI)) {
+    if (!IsCSLike && !IsLibFunc && OP::BarrierRequiresGroup(CI)) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS);
     }
   } break;
@@ -1864,7 +1864,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
     break;
   case DXIL::OpCode::AtomicBinOp:
   case DXIL::OpCode::AtomicCompareExchange: {
-    Type *pOverloadType = OP::GetOverloadType(opcode, CI->getCalledFunction());
+    Type *pOverloadType = OP::GetOverloadType(Opcode, CI->getCalledFunction());
     if ((pOverloadType->isIntegerTy(64)) && !pSM->IsSM66Plus())
       ValCtx.EmitInstrFormatError(
           CI, ValidationRule::SmOpcodeInInvalidFunction,
@@ -1890,73 +1890,73 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
     break;
 
   case DXIL::OpCode::ThreadId: // SV_DispatchThreadID
-    if (shaderKind != DXIL::ShaderKind::Node) {
+    if (ShaderKind != DXIL::ShaderKind::Node) {
       break;
     }
 
-    if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting)
+    if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting)
       break;
 
     ValCtx.EmitInstrFormatError(
         CI, ValidationRule::InstrSVConflictingLaunchMode,
-        {"ThreadId", "SV_DispatchThreadID", GetLaunchTypeStr(nodeLaunchType)});
+        {"ThreadId", "SV_DispatchThreadID", GetLaunchTypeStr(NodeLaunchType)});
     break;
 
   case DXIL::OpCode::GroupId: // SV_GroupId
-    if (shaderKind != DXIL::ShaderKind::Node) {
+    if (ShaderKind != DXIL::ShaderKind::Node) {
       break;
     }
 
-    if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting)
+    if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting)
       break;
 
     ValCtx.EmitInstrFormatError(
         CI, ValidationRule::InstrSVConflictingLaunchMode,
-        {"GroupId", "SV_GroupId", GetLaunchTypeStr(nodeLaunchType)});
+        {"GroupId", "SV_GroupId", GetLaunchTypeStr(NodeLaunchType)});
     break;
 
   case DXIL::OpCode::ThreadIdInGroup: // SV_GroupThreadID
-    if (shaderKind != DXIL::ShaderKind::Node) {
+    if (ShaderKind != DXIL::ShaderKind::Node) {
       break;
     }
 
-    if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting ||
-        nodeLaunchType == DXIL::NodeLaunchType::Coalescing)
+    if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting ||
+        NodeLaunchType == DXIL::NodeLaunchType::Coalescing)
       break;
 
     ValCtx.EmitInstrFormatError(CI,
                                 ValidationRule::InstrSVConflictingLaunchMode,
                                 {"ThreadIdInGroup", "SV_GroupThreadID",
-                                 GetLaunchTypeStr(nodeLaunchType)});
+                                 GetLaunchTypeStr(NodeLaunchType)});
 
     break;
 
   case DXIL::OpCode::FlattenedThreadIdInGroup: // SV_GroupIndex
-    if (shaderKind != DXIL::ShaderKind::Node) {
+    if (ShaderKind != DXIL::ShaderKind::Node) {
       break;
     }
 
-    if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting ||
-        nodeLaunchType == DXIL::NodeLaunchType::Coalescing)
+    if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting ||
+        NodeLaunchType == DXIL::NodeLaunchType::Coalescing)
       break;
 
     ValCtx.EmitInstrFormatError(CI,
                                 ValidationRule::InstrSVConflictingLaunchMode,
                                 {"FlattenedThreadIdInGroup", "SV_GroupIndex",
-                                 GetLaunchTypeStr(nodeLaunchType)});
+                                 GetLaunchTypeStr(NodeLaunchType)});
 
     break;
 
   default:
-    // TODO: make sure every opcode is checked.
+    // TODO: make sure every Opcode is checked.
     // Skip opcodes don't need special check.
     break;
   }
 }
 
 static bool IsDxilFunction(llvm::Function *F) {
-  unsigned argSize = F->arg_size();
-  if (argSize < 1) {
+  unsigned ArgSize = F->arg_size();
+  if (ArgSize < 1) {
     // Cannot be a DXIL operation.
     return false;
   }
@@ -1991,9 +1991,9 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) {
   }
 
   const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
-  OP *hlslOP = ValCtx.DxilMod.GetOP();
-  bool isDxilOp = OP::IsDxilOpFunc(F);
-  Type *voidTy = Type::getVoidTy(F->getContext());
+  OP *HlslOP = ValCtx.DxilMod.GetOP();
+  bool IsDxilOp = OP::IsDxilOpFunc(F);
+  Type *VoidTy = Type::getVoidTy(F->getContext());
 
   for (User *user : F->users()) {
     CallInst *CI = dyn_cast<CallInst>(user);
@@ -2004,32 +2004,32 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) {
     }
 
     // Skip call to external user defined function
-    if (!isDxilOp)
+    if (!IsDxilOp)
       continue;
 
-    Value *argOpcode = CI->getArgOperand(0);
-    ConstantInt *constOpcode = dyn_cast<ConstantInt>(argOpcode);
-    if (!constOpcode) {
-      // opcode not immediate; function body will validate this error.
+    Value *ArgOpcode = CI->getArgOperand(0);
+    ConstantInt *ConstOpcode = dyn_cast<ConstantInt>(ArgOpcode);
+    if (!ConstOpcode) {
+      // Opcode not immediate; function body will validate this error.
       continue;
     }
 
-    unsigned opcode = constOpcode->getLimitedValue();
-    if (opcode >= (unsigned)DXIL::OpCode::NumOpCodes) {
-      // invalid opcode; function body will validate this error.
+    unsigned Opcode = ConstOpcode->getLimitedValue();
+    if (Opcode >= (unsigned)DXIL::OpCode::NumOpCodes) {
+      // invalid Opcode; function body will validate this error.
       continue;
     }
 
-    DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode;
+    DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode;
 
     // In some cases, no overloads are provided (void is exclusive to others)
-    Function *dxilFunc;
-    if (hlslOP->IsOverloadLegal(dxilOpcode, voidTy)) {
-      dxilFunc = hlslOP->GetOpFunc(dxilOpcode, voidTy);
+    Function *DxilFunc;
+    if (HlslOP->IsOverloadLegal(DxilOpcode, VoidTy)) {
+      DxilFunc = HlslOP->GetOpFunc(DxilOpcode, VoidTy);
     } else {
-      Type *Ty = OP::GetOverloadType(dxilOpcode, CI->getCalledFunction());
+      Type *Ty = OP::GetOverloadType(DxilOpcode, CI->getCalledFunction());
       try {
-        if (!hlslOP->IsOverloadLegal(dxilOpcode, Ty)) {
+        if (!HlslOP->IsOverloadLegal(DxilOpcode, Ty)) {
           ValCtx.EmitInstrError(CI, ValidationRule::InstrOload);
           continue;
         }
@@ -2037,75 +2037,75 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrOload);
         continue;
       }
-      dxilFunc = hlslOP->GetOpFunc(dxilOpcode, Ty);
+      DxilFunc = HlslOP->GetOpFunc(DxilOpcode, Ty);
     }
 
-    if (!dxilFunc) {
-      // Cannot find dxilFunction based on opcode and type.
+    if (!DxilFunc) {
+      // Cannot find DxilFunction based on Opcode and type.
       ValCtx.EmitInstrError(CI, ValidationRule::InstrOload);
       continue;
     }
 
-    if (dxilFunc->getFunctionType() != F->getFunctionType()) {
+    if (DxilFunc->getFunctionType() != F->getFunctionType()) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrCallOload,
-                                  {dxilFunc->getName()});
+                                  {DxilFunc->getName()});
       continue;
     }
 
     unsigned major = pSM->GetMajor();
     unsigned minor = pSM->GetMinor();
     if (ValCtx.isLibProfile) {
-      Function *callingFunction = CI->getParent()->getParent();
+      Function *CallingFunction = CI->getParent()->getParent();
       DXIL::ShaderKind SK = DXIL::ShaderKind::Library;
-      if (ValCtx.DxilMod.HasDxilFunctionProps(callingFunction))
-        SK = ValCtx.DxilMod.GetDxilFunctionProps(callingFunction).shaderKind;
-      else if (ValCtx.DxilMod.IsPatchConstantShader(callingFunction))
+      if (ValCtx.DxilMod.HasDxilFunctionProps(CallingFunction))
+        SK = ValCtx.DxilMod.GetDxilFunctionProps(CallingFunction).shaderKind;
+      else if (ValCtx.DxilMod.IsPatchConstantShader(CallingFunction))
         SK = DXIL::ShaderKind::Hull;
-      if (!ValidateOpcodeInProfile(dxilOpcode, SK, major, minor)) {
+      if (!ValidateOpcodeInProfile(DxilOpcode, SK, major, minor)) {
         // Opcode not available in profile.
         // produces: "lib_6_3(ps)", or "lib_6_3(anyhit)" for shader types
         // Or: "lib_6_3(lib)" for library function
-        std::string shaderModel = pSM->GetName();
-        shaderModel += std::string("(") + ShaderModel::GetKindName(SK) + ")";
+        std::string ShaderModel = pSM->GetName();
+        ShaderModel += std::string("(") + ShaderModel::GetKindName(SK) + ")";
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::SmOpcode,
-            {hlslOP->GetOpCodeName(dxilOpcode), shaderModel});
+            {HlslOP->GetOpCodeName(DxilOpcode), ShaderModel});
         continue;
       }
     } else {
-      if (!ValidateOpcodeInProfile(dxilOpcode, pSM->GetKind(), major, minor)) {
+      if (!ValidateOpcodeInProfile(DxilOpcode, pSM->GetKind(), major, minor)) {
         // Opcode not available in profile.
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::SmOpcode,
-            {hlslOP->GetOpCodeName(dxilOpcode), pSM->GetName()});
+            {HlslOP->GetOpCodeName(DxilOpcode), pSM->GetName()});
         continue;
       }
     }
 
     // Check more detail.
-    ValidateDxilOperationCallInProfile(CI, dxilOpcode, pSM, ValCtx);
+    ValidateDxilOperationCallInProfile(CI, DxilOpcode, pSM, ValCtx);
   }
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 // Instruction validation functions.                                         //
 
-static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) {
-  if (ST == hlslOP->GetBinaryWithCarryType())
+static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *HlslOP) {
+  if (ST == HlslOP->GetBinaryWithCarryType())
     return true;
-  if (ST == hlslOP->GetBinaryWithTwoOutputsType())
+  if (ST == HlslOP->GetBinaryWithTwoOutputsType())
     return true;
-  if (ST == hlslOP->GetFourI32Type())
+  if (ST == HlslOP->GetFourI32Type())
     return true;
-  if (ST == hlslOP->GetFourI16Type())
+  if (ST == HlslOP->GetFourI16Type())
     return true;
-  if (ST == hlslOP->GetDimensionsType())
+  if (ST == HlslOP->GetDimensionsType())
     return true;
-  if (ST == hlslOP->GetHandleType())
+  if (ST == HlslOP->GetHandleType())
     return true;
-  if (ST == hlslOP->GetSamplePosType())
+  if (ST == HlslOP->GetSamplePosType())
     return true;
-  if (ST == hlslOP->GetSplitDoubleType())
+  if (ST == HlslOP->GetSplitDoubleType())
     return true;
 
   unsigned EltNum = ST->getNumElements();
@@ -2114,14 +2114,14 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) {
   case 2:
     // Check if it's a native vector resret.
     if (EltTy->isVectorTy())
-      return ST == hlslOP->GetResRetType(EltTy);
+      return ST == HlslOP->GetResRetType(EltTy);
     LLVM_FALLTHROUGH;
   case 4:
   case 8: // 2 for doubles, 8 for halfs.
-    return ST == hlslOP->GetCBufferRetType(EltTy);
+    return ST == HlslOP->GetCBufferRetType(EltTy);
     break;
   case 5:
-    return ST == hlslOP->GetResRetType(EltTy);
+    return ST == HlslOP->GetResRetType(EltTy);
     break;
   default:
     return false;
@@ -2132,11 +2132,11 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) {
 // inner type (UDT struct member) may be: [N dim array of]( UDT struct | scalar
 // ) scalar type may be: ( float(16|32|64) | int(16|32|64) )
 static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
-                         bool bInner = false) {
+                         bool IsInner = false) {
   DXASSERT_NOMSG(Ty != nullptr);
   if (Ty->isPointerTy()) {
     Type *EltTy = Ty->getPointerElementType();
-    if (bInner || EltTy->isPointerTy()) {
+    if (IsInner || EltTy->isPointerTy()) {
       ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoPtrToPtr);
       return false;
     }
@@ -2144,7 +2144,7 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
   }
   if (Ty->isArrayTy()) {
     Type *EltTy = Ty->getArrayElementType();
-    if (!bInner && isa<ArrayType>(EltTy)) {
+    if (!IsInner && isa<ArrayType>(EltTy)) {
       // Outermost array should be converted to single-dim,
       // but arrays inside struct are allowed to be multi-dim
       ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoMultiDim);
@@ -2155,7 +2155,7 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
     Ty = EltTy;
   }
   if (Ty->isStructTy()) {
-    bool result = true;
+    bool Result = true;
     StructType *ST = cast<StructType>(Ty);
 
     StringRef Name = ST->getName();
@@ -2163,28 +2163,28 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
       // Allow handle type.
       if (ValCtx.HandleTy == Ty)
         return true;
-      hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP();
-      if (IsDxilBuiltinStructType(ST, hlslOP)) {
+      hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP();
+      if (IsDxilBuiltinStructType(ST, HlslOP)) {
         ValCtx.EmitTypeError(Ty, ValidationRule::InstrDxilStructUser);
-        result = false;
+        Result = false;
       }
 
       ValCtx.EmitTypeError(Ty, ValidationRule::DeclDxilNsReserved);
-      result = false;
+      Result = false;
     }
     for (auto e : ST->elements()) {
-      if (!ValidateType(e, ValCtx, /*bInner*/ true)) {
-        result = false;
+      if (!ValidateType(e, ValCtx, /*IsInner*/ true)) {
+        Result = false;
       }
     }
-    return result;
+    return Result;
   }
   if (Ty->isFloatTy() || Ty->isHalfTy() || Ty->isDoubleTy()) {
     return true;
   }
   if (Ty->isIntegerTy()) {
-    unsigned width = Ty->getIntegerBitWidth();
-    if (width != 1 && width != 8 && width != 16 && width != 32 && width != 64) {
+    unsigned Width = Ty->getIntegerBitWidth();
+    if (Width != 1 && Width != 8 && Width != 16 && Width != 32 && Width != 64) {
       ValCtx.EmitTypeError(Ty, ValidationRule::TypesIntWidth);
       return false;
     }
@@ -2207,13 +2207,13 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
 }
 
 static bool GetNodeOperandAsInt(ValidationContext &ValCtx, MDNode *pMD,
-                                unsigned index, uint64_t *pValue) {
-  *pValue = 0;
-  if (pMD->getNumOperands() < index) {
+                                unsigned Index, uint64_t *PValue) {
+  *PValue = 0;
+  if (pMD->getNumOperands() < Index) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed);
     return false;
   }
-  ConstantAsMetadata *C = dyn_cast<ConstantAsMetadata>(pMD->getOperand(index));
+  ConstantAsMetadata *C = dyn_cast<ConstantAsMetadata>(pMD->getOperand(Index));
   if (C == nullptr) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed);
     return false;
@@ -2223,7 +2223,7 @@ static bool GetNodeOperandAsInt(ValidationContext &ValCtx, MDNode *pMD,
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed);
     return false;
   }
-  *pValue = CI->getValue().getZExtValue();
+  *PValue = CI->getValue().getZExtValue();
   return true;
 }
 
@@ -2237,14 +2237,14 @@ static bool IsPrecise(Instruction &I, ValidationContext &ValCtx) {
     return false;
   }
 
-  uint64_t val;
-  if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &val)) {
+  uint64_t Val;
+  if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &Val)) {
     return false;
   }
-  if (val == 1) {
+  if (Val == 1) {
     return true;
   }
-  if (val != 0) {
+  if (Val != 0) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaValueRange);
   }
   return false;
@@ -2263,12 +2263,12 @@ static bool IsValueMinPrec(DxilModule &DxilMod, Value *V) {
 }
 
 static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx,
-                                 CallInst *setMeshOutputCounts,
-                                 CallInst *getMeshPayload) {
+                                 CallInst *SetMeshOutputCounts,
+                                 CallInst *GetMeshPayload) {
   if (ValCtx.DxilMod.HasDxilFunctionProps(F)) {
-    DXIL::ShaderKind shaderKind =
+    DXIL::ShaderKind ShaderKind =
         ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind;
-    if (shaderKind != DXIL::ShaderKind::Mesh)
+    if (ShaderKind != DXIL::ShaderKind::Mesh)
       return;
   } else {
     return;
@@ -2277,10 +2277,10 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx,
   DominatorTreeAnalysis DTA;
   DominatorTree DT = DTA.run(*F);
 
-  for (auto b = F->begin(), bend = F->end(); b != bend; ++b) {
-    bool foundSetMeshOutputCountsInCurrentBB = false;
-    for (auto i = b->begin(), iend = b->end(); i != iend; ++i) {
-      llvm::Instruction &I = *i;
+  for (auto B = F->begin(), BEnd = F->end(); B != BEnd; ++B) {
+    bool FoundSetMeshOutputCountsInCurrentBb = false;
+    for (auto It = B->begin(), ItEnd = B->end(); It != ItEnd; ++It) {
+      llvm::Instruction &I = *It;
 
       // Calls to external functions.
       CallInst *CI = dyn_cast<CallInst>(&I);
@@ -2296,22 +2296,22 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx,
             continue;
           }
 
-          if (CI == setMeshOutputCounts) {
-            foundSetMeshOutputCountsInCurrentBB = true;
+          if (CI == SetMeshOutputCounts) {
+            FoundSetMeshOutputCountsInCurrentBb = true;
           }
-          Value *opcodeVal = CI->getOperand(0);
-          ConstantInt *OpcodeConst = dyn_cast<ConstantInt>(opcodeVal);
-          unsigned opcode = OpcodeConst->getLimitedValue();
-          DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode;
-
-          if (dxilOpcode == DXIL::OpCode::StoreVertexOutput ||
-              dxilOpcode == DXIL::OpCode::StorePrimitiveOutput ||
-              dxilOpcode == DXIL::OpCode::EmitIndices) {
-            if (setMeshOutputCounts == nullptr) {
+          Value *OpcodeVal = CI->getOperand(0);
+          ConstantInt *OpcodeConst = dyn_cast<ConstantInt>(OpcodeVal);
+          unsigned Opcode = OpcodeConst->getLimitedValue();
+          DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode;
+
+          if (DxilOpcode == DXIL::OpCode::StoreVertexOutput ||
+              DxilOpcode == DXIL::OpCode::StorePrimitiveOutput ||
+              DxilOpcode == DXIL::OpCode::EmitIndices) {
+            if (SetMeshOutputCounts == nullptr) {
               ValCtx.EmitInstrError(
                   &I, ValidationRule::InstrMissingSetMeshOutputCounts);
-            } else if (!foundSetMeshOutputCountsInCurrentBB &&
-                       !DT.dominates(setMeshOutputCounts->getParent(),
+            } else if (!FoundSetMeshOutputCountsInCurrentBb &&
+                       !DT.dominates(SetMeshOutputCounts->getParent(),
                                      I.getParent())) {
               ValCtx.EmitInstrError(
                   &I, ValidationRule::InstrNonDominatingSetMeshOutputCounts);
@@ -2322,61 +2322,61 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx,
     }
   }
 
-  if (getMeshPayload) {
-    PointerType *payloadPTy = cast<PointerType>(getMeshPayload->getType());
-    StructType *payloadTy =
-        cast<StructType>(payloadPTy->getPointerElementType());
+  if (GetMeshPayload) {
+    PointerType *PayloadPTy = cast<PointerType>(GetMeshPayload->getType());
+    StructType *PayloadTy =
+        cast<StructType>(PayloadPTy->getPointerElementType());
     const DataLayout &DL = F->getParent()->getDataLayout();
-    unsigned payloadSize = DL.getTypeAllocSize(payloadTy);
+    unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy);
 
-    DxilFunctionProps &prop = ValCtx.DxilMod.GetDxilFunctionProps(F);
+    DxilFunctionProps &Prop = ValCtx.DxilMod.GetDxilFunctionProps(F);
 
-    if (prop.ShaderProps.MS.payloadSizeInBytes < payloadSize) {
+    if (Prop.ShaderProps.MS.payloadSizeInBytes < PayloadSize) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmMeshShaderPayloadSizeDeclared,
-          {F->getName(), std::to_string(payloadSize),
-           std::to_string(prop.ShaderProps.MS.payloadSizeInBytes)});
+          {F->getName(), std::to_string(PayloadSize),
+           std::to_string(Prop.ShaderProps.MS.payloadSizeInBytes)});
     }
 
-    if (prop.ShaderProps.MS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) {
+    if (Prop.ShaderProps.MS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmMeshShaderPayloadSize,
-          {F->getName(), std::to_string(prop.ShaderProps.MS.payloadSizeInBytes),
+          {F->getName(), std::to_string(Prop.ShaderProps.MS.payloadSizeInBytes),
            std::to_string(DXIL::kMaxMSASPayloadBytes)});
     }
   }
 }
 
 static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx,
-                                 CallInst *dispatchMesh) {
+                                 CallInst *DispatchMesh) {
   if (ValCtx.DxilMod.HasDxilFunctionProps(F)) {
-    DXIL::ShaderKind shaderKind =
+    DXIL::ShaderKind ShaderKind =
         ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind;
-    if (shaderKind != DXIL::ShaderKind::Amplification)
+    if (ShaderKind != DXIL::ShaderKind::Amplification)
       return;
 
-    if (dispatchMesh) {
-      DxilInst_DispatchMesh dispatchMeshCall(dispatchMesh);
-      Value *operandVal = dispatchMeshCall.get_payload();
-      Type *payloadTy = operandVal->getType();
+    if (DispatchMesh) {
+      DxilInst_DispatchMesh DispatchMeshCall(DispatchMesh);
+      Value *OperandVal = DispatchMeshCall.get_payload();
+      Type *PayloadTy = OperandVal->getType();
       const DataLayout &DL = F->getParent()->getDataLayout();
-      unsigned payloadSize = DL.getTypeAllocSize(payloadTy);
+      unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy);
 
-      DxilFunctionProps &prop = ValCtx.DxilMod.GetDxilFunctionProps(F);
+      DxilFunctionProps &Prop = ValCtx.DxilMod.GetDxilFunctionProps(F);
 
-      if (prop.ShaderProps.AS.payloadSizeInBytes < payloadSize) {
+      if (Prop.ShaderProps.AS.payloadSizeInBytes < PayloadSize) {
         ValCtx.EmitInstrFormatError(
-            dispatchMesh,
+            DispatchMesh,
             ValidationRule::SmAmplificationShaderPayloadSizeDeclared,
-            {F->getName(), std::to_string(payloadSize),
-             std::to_string(prop.ShaderProps.AS.payloadSizeInBytes)});
+            {F->getName(), std::to_string(PayloadSize),
+             std::to_string(Prop.ShaderProps.AS.payloadSizeInBytes)});
       }
 
-      if (prop.ShaderProps.AS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) {
+      if (Prop.ShaderProps.AS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) {
         ValCtx.EmitInstrFormatError(
-            dispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize,
+            DispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize,
             {F->getName(),
-             std::to_string(prop.ShaderProps.AS.payloadSizeInBytes),
+             std::to_string(Prop.ShaderProps.AS.payloadSizeInBytes),
              std::to_string(DXIL::kMaxMSASPayloadBytes)});
       }
     }
@@ -2385,7 +2385,7 @@ static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx,
     return;
   }
 
-  if (dispatchMesh == nullptr) {
+  if (DispatchMesh == nullptr) {
     ValCtx.EmitFnError(F, ValidationRule::InstrNotOnceDispatchMesh);
     return;
   }
@@ -2393,30 +2393,30 @@ static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx,
   PostDominatorTree PDT;
   PDT.runOnFunction(*F);
 
-  if (!PDT.dominates(dispatchMesh->getParent(), &F->getEntryBlock())) {
-    ValCtx.EmitInstrError(dispatchMesh,
+  if (!PDT.dominates(DispatchMesh->getParent(), &F->getEntryBlock())) {
+    ValCtx.EmitInstrError(DispatchMesh,
                           ValidationRule::InstrNonDominatingDispatchMesh);
   }
 
-  Function *dispatchMeshFunc = dispatchMesh->getCalledFunction();
-  FunctionType *dispatchMeshFuncTy = dispatchMeshFunc->getFunctionType();
-  PointerType *payloadPTy =
-      cast<PointerType>(dispatchMeshFuncTy->getParamType(4));
-  StructType *payloadTy = cast<StructType>(payloadPTy->getPointerElementType());
+  Function *DispatchMeshFunc = DispatchMesh->getCalledFunction();
+  FunctionType *DispatchMeshFuncTy = DispatchMeshFunc->getFunctionType();
+  PointerType *PayloadPTy =
+      cast<PointerType>(DispatchMeshFuncTy->getParamType(4));
+  StructType *PayloadTy = cast<StructType>(PayloadPTy->getPointerElementType());
   const DataLayout &DL = F->getParent()->getDataLayout();
-  unsigned payloadSize = DL.getTypeAllocSize(payloadTy);
+  unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy);
 
-  if (payloadSize > DXIL::kMaxMSASPayloadBytes) {
+  if (PayloadSize > DXIL::kMaxMSASPayloadBytes) {
     ValCtx.EmitInstrFormatError(
-        dispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize,
-        {F->getName(), std::to_string(payloadSize),
+        DispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize,
+        {F->getName(), std::to_string(PayloadSize),
          std::to_string(DXIL::kMaxMSASPayloadBytes)});
   }
 }
 
-static void ValidateControlFlowHint(BasicBlock &bb, ValidationContext &ValCtx) {
+static void ValidateControlFlowHint(BasicBlock &BB, ValidationContext &ValCtx) {
   // Validate controlflow hint.
-  TerminatorInst *TI = bb.getTerminator();
+  TerminatorInst *TI = BB.getTerminator();
   if (!TI)
     return;
 
@@ -2427,33 +2427,33 @@ static void ValidateControlFlowHint(BasicBlock &bb, ValidationContext &ValCtx) {
   if (pNode->getNumOperands() < 3)
     return;
 
-  bool bHasBranch = false;
-  bool bHasFlatten = false;
-  bool bForceCase = false;
+  bool HasBranch = false;
+  bool HasFlatten = false;
+  bool ForceCase = false;
 
-  for (unsigned i = 2; i < pNode->getNumOperands(); i++) {
-    uint64_t value = 0;
-    if (GetNodeOperandAsInt(ValCtx, pNode, i, &value)) {
-      DXIL::ControlFlowHint hint = static_cast<DXIL::ControlFlowHint>(value);
-      switch (hint) {
+  for (unsigned I = 2; I < pNode->getNumOperands(); I++) {
+    uint64_t Value = 0;
+    if (GetNodeOperandAsInt(ValCtx, pNode, I, &Value)) {
+      DXIL::ControlFlowHint Hint = static_cast<DXIL::ControlFlowHint>(Value);
+      switch (Hint) {
       case DXIL::ControlFlowHint::Flatten:
-        bHasFlatten = true;
+        HasFlatten = true;
         break;
       case DXIL::ControlFlowHint::Branch:
-        bHasBranch = true;
+        HasBranch = true;
         break;
       case DXIL::ControlFlowHint::ForceCase:
-        bForceCase = true;
+        ForceCase = true;
         break;
       default:
         ValCtx.EmitMetaError(pNode, ValidationRule::MetaInvalidControlFlowHint);
       }
     }
   }
-  if (bHasBranch && bHasFlatten) {
+  if (HasBranch && HasFlatten) {
     ValCtx.EmitMetaError(pNode, ValidationRule::MetaBranchFlatten);
   }
-  if (bForceCase && !isa<SwitchInst>(TI)) {
+  if (ForceCase && !isa<SwitchInst>(TI)) {
     ValCtx.EmitMetaError(pNode, ValidationRule::MetaForceCaseOnSwitch);
   }
 }
@@ -2466,30 +2466,30 @@ static void ValidateTBAAMetadata(MDNode *Node, ValidationContext &ValCtx) {
     }
   } break;
   case 2: {
-    MDNode *rootNode = dyn_cast<MDNode>(Node->getOperand(1));
-    if (!rootNode) {
+    MDNode *RootNode = dyn_cast<MDNode>(Node->getOperand(1));
+    if (!RootNode) {
       ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed);
     } else {
-      ValidateTBAAMetadata(rootNode, ValCtx);
+      ValidateTBAAMetadata(RootNode, ValCtx);
     }
   } break;
   case 3: {
-    MDNode *rootNode = dyn_cast<MDNode>(Node->getOperand(1));
-    if (!rootNode) {
+    MDNode *RootNode = dyn_cast<MDNode>(Node->getOperand(1));
+    if (!RootNode) {
       ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed);
     } else {
-      ValidateTBAAMetadata(rootNode, ValCtx);
+      ValidateTBAAMetadata(RootNode, ValCtx);
     }
-    ConstantAsMetadata *pointsToConstMem =
+    ConstantAsMetadata *PointsToConstMem =
         dyn_cast<ConstantAsMetadata>(Node->getOperand(2));
-    if (!pointsToConstMem) {
+    if (!PointsToConstMem) {
       ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed);
     } else {
-      ConstantInt *isConst =
-          dyn_cast<ConstantInt>(pointsToConstMem->getValue());
-      if (!isConst) {
+      ConstantInt *IsConst =
+          dyn_cast<ConstantInt>(PointsToConstMem->getValue());
+      if (!IsConst) {
         ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed);
-      } else if (isConst->getValue().getLimitedValue() > 1) {
+      } else if (IsConst->getValue().getLimitedValue() > 1) {
         ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed);
       }
     }
@@ -2570,11 +2570,11 @@ static void ValidateNonUniformMetadata(Instruction &I, MDNode *pMD,
   if (pMD->getNumOperands() != 1) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed);
   }
-  uint64_t val;
-  if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &val)) {
+  uint64_t Val;
+  if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &Val)) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed);
   }
-  if (val != 1) {
+  if (Val != 1) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaValueRange);
   }
 }
@@ -2609,31 +2609,31 @@ static void ValidateInstructionMetadata(Instruction *I,
 }
 
 static void ValidateFunctionAttribute(Function *F, ValidationContext &ValCtx) {
-  AttributeSet attrSet = F->getAttributes().getFnAttributes();
+  AttributeSet AttrSet = F->getAttributes().getFnAttributes();
   // fp32-denorm-mode
-  if (attrSet.hasAttribute(AttributeSet::FunctionIndex,
+  if (AttrSet.hasAttribute(AttributeSet::FunctionIndex,
                            DXIL::kFP32DenormKindString)) {
-    Attribute attr = attrSet.getAttribute(AttributeSet::FunctionIndex,
+    Attribute Attr = AttrSet.getAttribute(AttributeSet::FunctionIndex,
                                           DXIL::kFP32DenormKindString);
-    StringRef value = attr.getValueAsString();
-    if (!value.equals(DXIL::kFP32DenormValueAnyString) &&
-        !value.equals(DXIL::kFP32DenormValueFtzString) &&
-        !value.equals(DXIL::kFP32DenormValuePreserveString)) {
-      ValCtx.EmitFnAttributeError(F, attr.getKindAsString(),
-                                  attr.getValueAsString());
+    StringRef StrValue = Attr.getValueAsString();
+    if (!StrValue.equals(DXIL::kFP32DenormValueAnyString) &&
+        !StrValue.equals(DXIL::kFP32DenormValueFtzString) &&
+        !StrValue.equals(DXIL::kFP32DenormValuePreserveString)) {
+      ValCtx.EmitFnAttributeError(F, Attr.getKindAsString(),
+                                  Attr.getValueAsString());
     }
   }
   // TODO: If validating libraries, we should remove all unknown function
   // attributes. For each attribute, check if it is a known attribute
-  for (unsigned I = 0, E = attrSet.getNumSlots(); I != E; ++I) {
-    for (auto AttrIter = attrSet.begin(I), AttrEnd = attrSet.end(I);
+  for (unsigned I = 0, E = AttrSet.getNumSlots(); I != E; ++I) {
+    for (auto AttrIter = AttrSet.begin(I), AttrEnd = AttrSet.end(I);
          AttrIter != AttrEnd; ++AttrIter) {
       if (!AttrIter->isStringAttribute()) {
         continue;
       }
-      StringRef kind = AttrIter->getKindAsString();
-      if (!kind.equals(DXIL::kFP32DenormKindString) &&
-          !kind.equals(DXIL::kWaveOpsIncludeHelperLanesString)) {
+      StringRef Kind = AttrIter->getKindAsString();
+      if (!Kind.equals(DXIL::kFP32DenormKindString) &&
+          !Kind.equals(DXIL::kWaveOpsIncludeHelperLanesString)) {
         ValCtx.EmitFnAttributeError(F, AttrIter->getKindAsString(),
                                     AttrIter->getValueAsString());
       }
@@ -2683,10 +2683,10 @@ static bool IsLLVMInstructionAllowedForShaderModel(Instruction &I,
                                                    ValidationContext &ValCtx) {
   if (ValCtx.DxilMod.GetShaderModel()->IsSM69Plus())
     return true;
-  unsigned OpCode = I.getOpcode();
-  if (OpCode == Instruction::InsertElement ||
-      OpCode == Instruction::ExtractElement ||
-      OpCode == Instruction::ShuffleVector)
+  unsigned Opcode = I.getOpcode();
+  if (Opcode == Instruction::InsertElement ||
+      Opcode == Instruction::ExtractElement ||
+      Opcode == Instruction::ShuffleVector)
     return false;
 
   return true;
@@ -2697,16 +2697,16 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
       ValCtx.DxilMod.GetGlobalFlags() & DXIL::kEnableMinPrecision;
   bool SupportsLifetimeIntrinsics =
       ValCtx.DxilMod.GetShaderModel()->IsSM66Plus();
-  SmallVector<CallInst *, 16> gradientOps;
-  SmallVector<CallInst *, 16> barriers;
-  CallInst *setMeshOutputCounts = nullptr;
-  CallInst *getMeshPayload = nullptr;
-  CallInst *dispatchMesh = nullptr;
-  hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP();
+  SmallVector<CallInst *, 16> GradientOps;
+  SmallVector<CallInst *, 16> Barriers;
+  CallInst *SetMeshOutputCounts = nullptr;
+  CallInst *GetMeshPayload = nullptr;
+  CallInst *DispatchMesh = nullptr;
+  hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP();
 
-  for (auto b = F->begin(), bend = F->end(); b != bend; ++b) {
-    for (auto i = b->begin(), iend = b->end(); i != iend; ++i) {
-      llvm::Instruction &I = *i;
+  for (auto B = F->begin(), BEnd = F->end(); B != BEnd; ++B) {
+    for (auto It = B->begin(), ItEnd = B->end(); It != ItEnd; ++It) {
+      llvm::Instruction &I = *It;
 
       if (I.hasMetadata()) {
 
@@ -2745,27 +2745,27 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
             continue;
           }
 
-          Value *opcodeVal = CI->getOperand(0);
-          ConstantInt *OpcodeConst = dyn_cast<ConstantInt>(opcodeVal);
+          Value *OpcodeVal = CI->getOperand(0);
+          ConstantInt *OpcodeConst = dyn_cast<ConstantInt>(OpcodeVal);
           if (OpcodeConst == nullptr) {
             ValCtx.EmitInstrFormatError(&I, ValidationRule::InstrOpConst,
                                         {"Opcode", "DXIL operation"});
             continue;
           }
 
-          unsigned opcode = OpcodeConst->getLimitedValue();
-          if (opcode >= static_cast<unsigned>(DXIL::OpCode::NumOpCodes)) {
+          unsigned Opcode = OpcodeConst->getLimitedValue();
+          if (Opcode >= static_cast<unsigned>(DXIL::OpCode::NumOpCodes)) {
             ValCtx.EmitInstrFormatError(
                 &I, ValidationRule::InstrIllegalDXILOpCode,
                 {std::to_string((unsigned)DXIL::OpCode::NumOpCodes),
-                 std::to_string(opcode)});
+                 std::to_string(Opcode)});
             continue;
           }
-          DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode;
+          DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode;
 
           bool IllegalOpFunc = true;
-          for (auto &it : hlslOP->GetOpFuncList(dxilOpcode)) {
-            if (it.second == FCalled) {
+          for (auto &It : HlslOP->GetOpFuncList(DxilOpcode)) {
+            if (It.second == FCalled) {
               IllegalOpFunc = false;
               break;
             }
@@ -2774,46 +2774,46 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
           if (IllegalOpFunc) {
             ValCtx.EmitInstrFormatError(
                 &I, ValidationRule::InstrIllegalDXILOpFunction,
-                {FCalled->getName(), OP::GetOpCodeName(dxilOpcode)});
+                {FCalled->getName(), OP::GetOpCodeName(DxilOpcode)});
             continue;
           }
 
-          if (OP::IsDxilOpGradient(dxilOpcode)) {
-            gradientOps.push_back(CI);
+          if (OP::IsDxilOpGradient(DxilOpcode)) {
+            GradientOps.push_back(CI);
           }
 
-          if (dxilOpcode == DXIL::OpCode::Barrier) {
-            barriers.push_back(CI);
+          if (DxilOpcode == DXIL::OpCode::Barrier) {
+            Barriers.push_back(CI);
           }
           // External function validation will check the parameter
           // list. This function will check that the call does not
           // violate any rules.
 
-          if (dxilOpcode == DXIL::OpCode::SetMeshOutputCounts) {
+          if (DxilOpcode == DXIL::OpCode::SetMeshOutputCounts) {
             // validate the call count of SetMeshOutputCounts
-            if (setMeshOutputCounts != nullptr) {
+            if (SetMeshOutputCounts != nullptr) {
               ValCtx.EmitInstrError(
                   &I, ValidationRule::InstrMultipleSetMeshOutputCounts);
             }
-            setMeshOutputCounts = CI;
+            SetMeshOutputCounts = CI;
           }
 
-          if (dxilOpcode == DXIL::OpCode::GetMeshPayload) {
+          if (DxilOpcode == DXIL::OpCode::GetMeshPayload) {
             // validate the call count of GetMeshPayload
-            if (getMeshPayload != nullptr) {
+            if (GetMeshPayload != nullptr) {
               ValCtx.EmitInstrError(
                   &I, ValidationRule::InstrMultipleGetMeshPayload);
             }
-            getMeshPayload = CI;
+            GetMeshPayload = CI;
           }
 
-          if (dxilOpcode == DXIL::OpCode::DispatchMesh) {
+          if (DxilOpcode == DXIL::OpCode::DispatchMesh) {
             // validate the call count of DispatchMesh
-            if (dispatchMesh != nullptr) {
+            if (DispatchMesh != nullptr) {
               ValCtx.EmitInstrError(&I,
                                     ValidationRule::InstrNotOnceDispatchMesh);
             }
-            dispatchMesh = CI;
+            DispatchMesh = CI;
           }
         }
         continue;
@@ -2821,23 +2821,23 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
 
       for (Value *op : I.operands()) {
         if (isa<UndefValue>(op)) {
-          bool legalUndef = isa<PHINode>(&I);
+          bool LegalUndef = isa<PHINode>(&I);
           if (isa<InsertElementInst>(&I)) {
-            legalUndef = op == I.getOperand(0);
+            LegalUndef = op == I.getOperand(0);
           }
           if (isa<ShuffleVectorInst>(&I)) {
-            legalUndef = op == I.getOperand(1);
+            LegalUndef = op == I.getOperand(1);
           }
           if (isa<StoreInst>(&I)) {
-            legalUndef = op == I.getOperand(0);
+            LegalUndef = op == I.getOperand(0);
           }
 
-          if (!legalUndef)
+          if (!LegalUndef)
             ValCtx.EmitInstrError(&I,
                                   ValidationRule::InstrNoReadingUninitialized);
         } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(op)) {
-          for (Value *opCE : CE->operands()) {
-            if (isa<UndefValue>(opCE)) {
+          for (Value *OpCE : CE->operands()) {
+            if (isa<UndefValue>(OpCE)) {
               ValCtx.EmitInstrError(
                   &I, ValidationRule::InstrNoReadingUninitialized);
             }
@@ -2867,8 +2867,8 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
         }
       }
 
-      unsigned opcode = I.getOpcode();
-      switch (opcode) {
+      unsigned Opcode = I.getOpcode();
+      switch (Opcode) {
       case Instruction::Alloca: {
         AllocaInst *AI = cast<AllocaInst>(&I);
         // TODO: validate address space and alignment
@@ -2909,26 +2909,26 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
           continue;
         }
         GetElementPtrInst *GEP = cast<GetElementPtrInst>(&I);
-        bool allImmIndex = true;
+        bool AllImmIndex = true;
         for (auto Idx = GEP->idx_begin(), E = GEP->idx_end(); Idx != E; Idx++) {
           if (!isa<ConstantInt>(Idx)) {
-            allImmIndex = false;
+            AllImmIndex = false;
             break;
           }
         }
-        if (allImmIndex) {
+        if (AllImmIndex) {
           const DataLayout &DL = ValCtx.DL;
 
           Value *Ptr = GEP->getPointerOperand();
-          unsigned size =
+          unsigned Size =
               DL.getTypeAllocSize(Ptr->getType()->getPointerElementType());
-          unsigned valSize =
+          unsigned ValSize =
               DL.getTypeAllocSize(GEP->getType()->getPointerElementType());
 
           SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
-          unsigned offset =
+          unsigned Offset =
               DL.getIndexedOffset(GEP->getPointerOperandType(), Indices);
-          if ((offset + valSize) > size) {
+          if ((Offset + ValSize) > Size) {
             ValCtx.EmitInstrError(GEP, ValidationRule::InstrInBoundsAccess);
           }
         }
@@ -3002,16 +3002,16 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
       case Instruction::AtomicCmpXchg:
       case Instruction::AtomicRMW: {
         Value *Ptr = I.getOperand(AtomicRMWInst::getPointerOperandIndex());
-        PointerType *ptrType = cast<PointerType>(Ptr->getType());
-        Type *elType = ptrType->getElementType();
+        PointerType *PtrType = cast<PointerType>(Ptr->getType());
+        Type *ElType = PtrType->getElementType();
         const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
-        if ((elType->isIntegerTy(64)) && !pSM->IsSM66Plus())
+        if ((ElType->isIntegerTy(64)) && !pSM->IsSM66Plus())
           ValCtx.EmitInstrFormatError(
               &I, ValidationRule::SmOpcodeInInvalidFunction,
               {"64-bit atomic operations", "Shader Model 6.6+"});
 
-        if (ptrType->getAddressSpace() != DXIL::kTGSMAddrSpace &&
-            ptrType->getAddressSpace() != DXIL::kNodeRecordAddrSpace)
+        if (PtrType->getAddressSpace() != DXIL::kTGSMAddrSpace &&
+            PtrType->getAddressSpace() != DXIL::kNodeRecordAddrSpace)
           ValCtx.EmitInstrError(
               &I, ValidationRule::InstrAtomicOpNonGroupsharedOrRecord);
 
@@ -3062,12 +3062,12 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
         }
       }
     }
-    ValidateControlFlowHint(*b, ValCtx);
+    ValidateControlFlowHint(*B, ValCtx);
   }
 
-  ValidateMsIntrinsics(F, ValCtx, setMeshOutputCounts, getMeshPayload);
+  ValidateMsIntrinsics(F, ValCtx, SetMeshOutputCounts, GetMeshPayload);
 
-  ValidateAsIntrinsics(F, ValCtx, dispatchMesh);
+  ValidateAsIntrinsics(F, ValCtx, DispatchMesh);
 }
 
 static void ValidateNodeInputRecord(Function *F, ValidationContext &ValCtx) {
@@ -3075,39 +3075,39 @@ static void ValidateNodeInputRecord(Function *F, ValidationContext &ValCtx) {
   // to do here
   if (!ValCtx.DxilMod.HasDxilFunctionProps(F))
     return;
-  auto &props = ValCtx.DxilMod.GetDxilFunctionProps(F);
-  if (!props.IsNode())
+  auto &Props = ValCtx.DxilMod.GetDxilFunctionProps(F);
+  if (!Props.IsNode())
     return;
-  if (props.InputNodes.size() > 1) {
+  if (Props.InputNodes.size() > 1) {
     ValCtx.EmitFnFormatError(
         F, ValidationRule::DeclMultipleNodeInputs,
-        {F->getName(), std::to_string(props.InputNodes.size())});
+        {F->getName(), std::to_string(Props.InputNodes.size())});
   }
-  for (auto &input : props.InputNodes) {
-    if (!input.Flags.RecordTypeMatchesLaunchType(props.Node.LaunchType)) {
+  for (auto &input : Props.InputNodes) {
+    if (!input.Flags.RecordTypeMatchesLaunchType(Props.Node.LaunchType)) {
       // We allow EmptyNodeInput here, as that may have been added implicitly
       // if there was no input specified
       if (input.Flags.IsEmptyInput())
         continue;
 
-      llvm::StringRef validInputs = "";
-      switch (props.Node.LaunchType) {
+      llvm::StringRef ValidInputs = "";
+      switch (Props.Node.LaunchType) {
       case DXIL::NodeLaunchType::Broadcasting:
-        validInputs = "{RW}DispatchNodeInputRecord";
+        ValidInputs = "{RW}DispatchNodeInputRecord";
         break;
       case DXIL::NodeLaunchType::Coalescing:
-        validInputs = "{RW}GroupNodeInputRecords or EmptyNodeInput";
+        ValidInputs = "{RW}GroupNodeInputRecords or EmptyNodeInput";
         break;
       case DXIL::NodeLaunchType::Thread:
-        validInputs = "{RW}ThreadNodeInputRecord";
+        ValidInputs = "{RW}ThreadNodeInputRecord";
         break;
       default:
         llvm_unreachable("invalid launch type");
       }
       ValCtx.EmitFnFormatError(
           F, ValidationRule::DeclNodeLaunchInputType,
-          {ShaderModel::GetNodeLaunchTypeName(props.Node.LaunchType),
-           F->getName(), validInputs});
+          {ShaderModel::GetNodeLaunchTypeName(Props.Node.LaunchType),
+           F->getName(), ValidInputs});
     }
   }
 }
@@ -3118,26 +3118,26 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) {
     if (F.isIntrinsic() || IsDxilFunction(&F))
       return;
   } else {
-    DXIL::ShaderKind shaderKind = DXIL::ShaderKind::Library;
-    bool isShader = ValCtx.DxilMod.HasDxilFunctionProps(&F);
-    unsigned numUDTShaderArgs = 0;
-    if (isShader) {
-      shaderKind = ValCtx.DxilMod.GetDxilFunctionProps(&F).shaderKind;
-      switch (shaderKind) {
+    DXIL::ShaderKind ShaderKind = DXIL::ShaderKind::Library;
+    bool IsShader = ValCtx.DxilMod.HasDxilFunctionProps(&F);
+    unsigned NumUDTShaderArgs = 0;
+    if (IsShader) {
+      ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(&F).shaderKind;
+      switch (ShaderKind) {
       case DXIL::ShaderKind::AnyHit:
       case DXIL::ShaderKind::ClosestHit:
-        numUDTShaderArgs = 2;
+        NumUDTShaderArgs = 2;
         break;
       case DXIL::ShaderKind::Miss:
       case DXIL::ShaderKind::Callable:
-        numUDTShaderArgs = 1;
+        NumUDTShaderArgs = 1;
         break;
       case DXIL::ShaderKind::Compute: {
         DxilModule &DM = ValCtx.DxilMod;
         if (DM.HasDxilEntryProps(&F)) {
-          DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F);
+          DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F);
           // Check that compute has no node metadata
-          if (entryProps.props.IsNode()) {
+          if (EntryProps.props.IsNode()) {
             ValCtx.EmitFnFormatError(&F, ValidationRule::MetaComputeWithNode,
                                      {F.getName()});
           }
@@ -3148,45 +3148,45 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) {
         break;
       }
     } else {
-      isShader = ValCtx.DxilMod.IsPatchConstantShader(&F);
+      IsShader = ValCtx.DxilMod.IsPatchConstantShader(&F);
     }
 
     // Entry function should not have parameter.
-    if (isShader && 0 == numUDTShaderArgs && !F.arg_empty())
+    if (IsShader && 0 == NumUDTShaderArgs && !F.arg_empty())
       ValCtx.EmitFnFormatError(&F, ValidationRule::FlowFunctionCall,
                                {F.getName()});
 
     // Shader functions should return void.
-    if (isShader && !F.getReturnType()->isVoidTy())
+    if (IsShader && !F.getReturnType()->isVoidTy())
       ValCtx.EmitFnFormatError(&F, ValidationRule::DeclShaderReturnVoid,
                                {F.getName()});
 
-    auto ArgFormatError = [&](Function &F, Argument &arg, ValidationRule rule) {
-      if (arg.hasName())
-        ValCtx.EmitFnFormatError(&F, rule, {arg.getName().str(), F.getName()});
+    auto ArgFormatError = [&](Function &F, Argument &Arg, ValidationRule Rule) {
+      if (Arg.hasName())
+        ValCtx.EmitFnFormatError(&F, Rule, {Arg.getName().str(), F.getName()});
       else
-        ValCtx.EmitFnFormatError(&F, rule,
-                                 {std::to_string(arg.getArgNo()), F.getName()});
+        ValCtx.EmitFnFormatError(&F, Rule,
+                                 {std::to_string(Arg.getArgNo()), F.getName()});
     };
 
-    unsigned numArgs = 0;
-    for (auto &arg : F.args()) {
-      Type *argTy = arg.getType();
-      if (argTy->isPointerTy())
-        argTy = argTy->getPointerElementType();
-
-      numArgs++;
-      if (numUDTShaderArgs) {
-        if (arg.getArgNo() >= numUDTShaderArgs) {
-          ArgFormatError(F, arg, ValidationRule::DeclExtraArgs);
-        } else if (!argTy->isStructTy()) {
-          switch (shaderKind) {
+    unsigned NumArgs = 0;
+    for (auto &Arg : F.args()) {
+      Type *ArgTy = Arg.getType();
+      if (ArgTy->isPointerTy())
+        ArgTy = ArgTy->getPointerElementType();
+
+      NumArgs++;
+      if (NumUDTShaderArgs) {
+        if (Arg.getArgNo() >= NumUDTShaderArgs) {
+          ArgFormatError(F, Arg, ValidationRule::DeclExtraArgs);
+        } else if (!ArgTy->isStructTy()) {
+          switch (ShaderKind) {
           case DXIL::ShaderKind::Callable:
-            ArgFormatError(F, arg, ValidationRule::DeclParamStruct);
+            ArgFormatError(F, Arg, ValidationRule::DeclParamStruct);
             break;
           default:
-            ArgFormatError(F, arg,
-                           arg.getArgNo() == 0
+            ArgFormatError(F, Arg,
+                           Arg.getArgNo() == 0
                                ? ValidationRule::DeclPayloadStruct
                                : ValidationRule::DeclAttrStruct);
           }
@@ -3194,24 +3194,24 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) {
         continue;
       }
 
-      while (argTy->isArrayTy()) {
-        argTy = argTy->getArrayElementType();
+      while (ArgTy->isArrayTy()) {
+        ArgTy = ArgTy->getArrayElementType();
       }
 
-      if (argTy->isStructTy() && !ValCtx.isLibProfile) {
-        ArgFormatError(F, arg, ValidationRule::DeclFnFlattenParam);
+      if (ArgTy->isStructTy() && !ValCtx.isLibProfile) {
+        ArgFormatError(F, Arg, ValidationRule::DeclFnFlattenParam);
         break;
       }
     }
 
-    if (numArgs < numUDTShaderArgs && shaderKind != DXIL::ShaderKind::Node) {
-      StringRef argType[2] = {
-          shaderKind == DXIL::ShaderKind::Callable ? "params" : "payload",
+    if (NumArgs < NumUDTShaderArgs && ShaderKind != DXIL::ShaderKind::Node) {
+      StringRef ArgType[2] = {
+          ShaderKind == DXIL::ShaderKind::Callable ? "params" : "payload",
           "attributes"};
-      for (unsigned i = numArgs; i < numUDTShaderArgs; i++) {
+      for (unsigned I = NumArgs; I < NumUDTShaderArgs; I++) {
         ValCtx.EmitFnFormatError(
             &F, ValidationRule::DeclShaderMissingArg,
-            {ShaderModel::GetKindName(shaderKind), F.getName(), argType[i]});
+            {ShaderModel::GetKindName(ShaderKind), F.getName(), ArgType[I]});
       }
     }
 
@@ -3248,25 +3248,25 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) {
 
 static void ValidateGlobalVariable(GlobalVariable &GV,
                                    ValidationContext &ValCtx) {
-  bool isInternalGV =
+  bool IsInternalGv =
       dxilutil::IsStaticGlobal(&GV) || dxilutil::IsSharedMemoryGlobal(&GV);
 
   if (ValCtx.isLibProfile) {
-    auto isCBufferGlobal =
+    auto IsCBufferGlobal =
         [&](const std::vector<std::unique_ptr<DxilCBuffer>> &ResTab) -> bool {
       for (auto &Res : ResTab)
         if (Res->GetGlobalSymbol() == &GV)
           return true;
       return false;
     };
-    auto isResourceGlobal =
+    auto IsResourceGlobal =
         [&](const std::vector<std::unique_ptr<DxilResource>> &ResTab) -> bool {
       for (auto &Res : ResTab)
         if (Res->GetGlobalSymbol() == &GV)
           return true;
       return false;
     };
-    auto isSamplerGlobal =
+    auto IsSamplerGlobal =
         [&](const std::vector<std::unique_ptr<DxilSampler>> &ResTab) -> bool {
       for (auto &Res : ResTab)
         if (Res->GetGlobalSymbol() == &GV)
@@ -3274,32 +3274,32 @@ static void ValidateGlobalVariable(GlobalVariable &GV,
       return false;
     };
 
-    bool isRes = isCBufferGlobal(ValCtx.DxilMod.GetCBuffers());
-    isRes |= isResourceGlobal(ValCtx.DxilMod.GetUAVs());
-    isRes |= isResourceGlobal(ValCtx.DxilMod.GetSRVs());
-    isRes |= isSamplerGlobal(ValCtx.DxilMod.GetSamplers());
-    isInternalGV |= isRes;
+    bool IsRes = IsCBufferGlobal(ValCtx.DxilMod.GetCBuffers());
+    IsRes |= IsResourceGlobal(ValCtx.DxilMod.GetUAVs());
+    IsRes |= IsResourceGlobal(ValCtx.DxilMod.GetSRVs());
+    IsRes |= IsSamplerGlobal(ValCtx.DxilMod.GetSamplers());
+    IsInternalGv |= IsRes;
 
     // Allow special dx.ishelper for library target
     if (GV.getName().compare(DXIL::kDxIsHelperGlobalName) == 0) {
       Type *Ty = GV.getType()->getPointerElementType();
       if (Ty->isIntegerTy() && Ty->getScalarSizeInBits() == 32) {
-        isInternalGV = true;
+        IsInternalGv = true;
       }
     }
   }
 
-  if (!isInternalGV) {
+  if (!IsInternalGv) {
     if (!GV.user_empty()) {
-      bool hasInstructionUser = false;
+      bool HasInstructionUser = false;
       for (User *U : GV.users()) {
         if (isa<Instruction>(U)) {
-          hasInstructionUser = true;
+          HasInstructionUser = true;
           break;
         }
       }
       // External GV should not have instruction user.
-      if (hasInstructionUser) {
+      if (HasInstructionUser) {
         ValCtx.EmitGlobalVariableFormatError(
             &GV, ValidationRule::DeclNotUsedExternal, {GV.getName()});
       }
@@ -3322,14 +3322,14 @@ static void ValidateGlobalVariable(GlobalVariable &GV,
 }
 
 static void CollectFixAddressAccess(Value *V,
-                                    std::vector<StoreInst *> &fixAddrTGSMList) {
+                                    std::vector<StoreInst *> &FixAddrTGSMList) {
   for (User *U : V->users()) {
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
       if (isa<ConstantExpr>(GEP) || GEP->hasAllConstantIndices()) {
-        CollectFixAddressAccess(GEP, fixAddrTGSMList);
+        CollectFixAddressAccess(GEP, FixAddrTGSMList);
       }
     } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
-      fixAddrTGSMList.emplace_back(SI);
+      FixAddrTGSMList.emplace_back(SI);
     }
   }
 }
@@ -3339,16 +3339,16 @@ static bool IsDivergent(Value *V) {
   return false;
 }
 
-static void ValidateTGSMRaceCondition(std::vector<StoreInst *> &fixAddrTGSMList,
+static void ValidateTGSMRaceCondition(std::vector<StoreInst *> &FixAddrTGSMList,
                                       ValidationContext &ValCtx) {
-  std::unordered_set<Function *> fixAddrTGSMFuncSet;
-  for (StoreInst *I : fixAddrTGSMList) {
+  std::unordered_set<Function *> FixAddrTGSMFuncSet;
+  for (StoreInst *I : FixAddrTGSMList) {
     BasicBlock *BB = I->getParent();
-    fixAddrTGSMFuncSet.insert(BB->getParent());
+    FixAddrTGSMFuncSet.insert(BB->getParent());
   }
 
   for (auto &F : ValCtx.DxilMod.GetModule()->functions()) {
-    if (F.isDeclaration() || !fixAddrTGSMFuncSet.count(&F))
+    if (F.isDeclaration() || !FixAddrTGSMFuncSet.count(&F))
       continue;
 
     PostDominatorTree PDT;
@@ -3356,7 +3356,7 @@ static void ValidateTGSMRaceCondition(std::vector<StoreInst *> &fixAddrTGSMList,
 
     BasicBlock *Entry = &F.getEntryBlock();
 
-    for (StoreInst *SI : fixAddrTGSMList) {
+    for (StoreInst *SI : FixAddrTGSMList) {
       BasicBlock *BB = SI->getParent();
       if (BB->getParent() == &F) {
         if (PDT.dominates(BB, Entry)) {
@@ -3375,7 +3375,7 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) {
   bool TGSMAllowed = pSM->IsCS() || pSM->IsAS() || pSM->IsMS() || pSM->IsLib();
 
   unsigned TGSMSize = 0;
-  std::vector<StoreInst *> fixAddrTGSMList;
+  std::vector<StoreInst *> FixAddrTGSMList;
   const DataLayout &DL = M.GetModule()->getDataLayout();
   for (GlobalVariable &GV : M.GetModule()->globals()) {
     ValidateGlobalVariable(GV, ValCtx);
@@ -3390,9 +3390,9 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) {
           if (Instruction *I = dyn_cast<Instruction>(U)) {
             llvm::Function *F = I->getParent()->getParent();
             if (M.HasDxilEntryProps(F)) {
-              DxilFunctionProps &props = M.GetDxilEntryProps(F).props;
-              if (!props.IsCS() && !props.IsAS() && !props.IsMS() &&
-                  !props.IsNode()) {
+              DxilFunctionProps &Props = M.GetDxilEntryProps(F).props;
+              if (!Props.IsCS() && !Props.IsAS() && !Props.IsMS() &&
+                  !Props.IsNode()) {
                 ValCtx.EmitInstrFormatError(I,
                                             ValidationRule::SmTGSMUnsupported,
                                             {"from non-compute entry points"});
@@ -3402,7 +3402,7 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) {
         }
       }
       TGSMSize += DL.getTypeAllocSize(GV.getType()->getElementType());
-      CollectFixAddressAccess(&GV, fixAddrTGSMList);
+      CollectFixAddressAccess(&GV, FixAddrTGSMList);
     }
   }
 
@@ -3426,8 +3426,8 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) {
         GV, Rule, {std::to_string(TGSMSize), std::to_string(MaxSize)});
   }
 
-  if (!fixAddrTGSMList.empty()) {
-    ValidateTGSMRaceCondition(fixAddrTGSMList, ValCtx);
+  if (!FixAddrTGSMList.empty()) {
+    ValidateTGSMRaceCondition(FixAddrTGSMList, ValCtx);
   }
 }
 
@@ -3440,20 +3440,20 @@ static void ValidateValidatorVersion(ValidationContext &ValCtx) {
   if (pNode->getNumOperands() == 1) {
     MDTuple *pVerValues = dyn_cast<MDTuple>(pNode->getOperand(0));
     if (pVerValues != nullptr && pVerValues->getNumOperands() == 2) {
-      uint64_t majorVer, minorVer;
-      if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &majorVer) &&
-          GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) {
-        unsigned curMajor, curMinor;
-        GetValidationVersion(&curMajor, &curMinor);
+      uint64_t MajorVer, MinorVer;
+      if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &MajorVer) &&
+          GetNodeOperandAsInt(ValCtx, pVerValues, 1, &MinorVer)) {
+        unsigned CurMajor, CurMinor;
+        GetValidationVersion(&CurMajor, &CurMinor);
         // This will need to be updated as major/minor versions evolve,
         // depending on the degree of compat across versions.
-        if (majorVer == curMajor && minorVer <= curMinor) {
+        if (MajorVer == CurMajor && MinorVer <= CurMinor) {
           return;
         } else {
           ValCtx.EmitFormatError(
               ValidationRule::MetaVersionSupported,
-              {"Validator", std::to_string(majorVer), std::to_string(minorVer),
-               std::to_string(curMajor), std::to_string(curMinor)});
+              {"Validator", std::to_string(MajorVer), std::to_string(MinorVer),
+               std::to_string(CurMajor), std::to_string(CurMinor)});
           return;
         }
       }
@@ -3471,19 +3471,19 @@ static void ValidateDxilVersion(ValidationContext &ValCtx) {
   if (pNode->getNumOperands() == 1) {
     MDTuple *pVerValues = dyn_cast<MDTuple>(pNode->getOperand(0));
     if (pVerValues != nullptr && pVerValues->getNumOperands() == 2) {
-      uint64_t majorVer, minorVer;
-      if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &majorVer) &&
-          GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) {
+      uint64_t MajorVer, MinorVer;
+      if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &MajorVer) &&
+          GetNodeOperandAsInt(ValCtx, pVerValues, 1, &MinorVer)) {
         // This will need to be updated as dxil major/minor versions evolve,
         // depending on the degree of compat across versions.
-        if ((majorVer == DXIL::kDxilMajor && minorVer <= DXIL::kDxilMinor) &&
-            (majorVer == ValCtx.m_DxilMajor &&
-             minorVer == ValCtx.m_DxilMinor)) {
+        if ((MajorVer == DXIL::kDxilMajor && MinorVer <= DXIL::kDxilMinor) &&
+            (MajorVer == ValCtx.m_DxilMajor &&
+             MinorVer == ValCtx.m_DxilMinor)) {
           return;
         } else {
           ValCtx.EmitFormatError(ValidationRule::MetaVersionSupported,
-                                 {"Dxil", std::to_string(majorVer),
-                                  std::to_string(minorVer),
+                                 {"Dxil", std::to_string(MajorVer),
+                                  std::to_string(MinorVer),
                                   std::to_string(DXIL::kDxilMajor),
                                   std::to_string(DXIL::kDxilMinor)});
           return;
@@ -3501,16 +3501,16 @@ static void ValidateTypeAnnotation(ValidationContext &ValCtx) {
     NamedMDNode *TA = pModule->getNamedMetadata("dx.typeAnnotations");
     if (TA == nullptr)
       return;
-    for (unsigned i = 0, end = TA->getNumOperands(); i < end; ++i) {
-      MDTuple *TANode = dyn_cast<MDTuple>(TA->getOperand(i));
+    for (unsigned I = 0, End = TA->getNumOperands(); I < End; ++I) {
+      MDTuple *TANode = dyn_cast<MDTuple>(TA->getOperand(I));
       if (TANode->getNumOperands() < 3) {
         ValCtx.EmitMetaError(TANode, ValidationRule::MetaWellFormed);
         return;
       }
-      ConstantInt *tag = mdconst::extract<ConstantInt>(TANode->getOperand(0));
-      uint64_t tagValue = tag->getZExtValue();
-      if (tagValue != DxilMDHelper::kDxilTypeSystemStructTag &&
-          tagValue != DxilMDHelper::kDxilTypeSystemFunctionTag) {
+      ConstantInt *Tag = mdconst::extract<ConstantInt>(TANode->getOperand(0));
+      uint64_t TagValue = Tag->getZExtValue();
+      if (TagValue != DxilMDHelper::kDxilTypeSystemStructTag &&
+          TagValue != DxilMDHelper::kDxilTypeSystemFunctionTag) {
         ValCtx.EmitMetaError(TANode, ValidationRule::MetaWellFormed);
         return;
       }
@@ -3519,11 +3519,11 @@ static void ValidateTypeAnnotation(ValidationContext &ValCtx) {
 }
 
 static void ValidateBitcode(ValidationContext &ValCtx) {
-  std::string diagStr;
-  raw_string_ostream diagStream(diagStr);
-  if (llvm::verifyModule(ValCtx.M, &diagStream)) {
+  std::string DiagStr;
+  raw_string_ostream DiagStream(DiagStr);
+  if (llvm::verifyModule(ValCtx.M, &DiagStream)) {
     ValCtx.EmitError(ValidationRule::BitcodeValid);
-    dxilutil::EmitErrorOnContext(ValCtx.M.getContext(), diagStream.str());
+    dxilutil::EmitErrorOnContext(ValCtx.M.getContext(), DiagStream.str());
   }
 }
 
@@ -3537,18 +3537,18 @@ static void ValidateWaveSize(ValidationContext &ValCtx,
   if (!EPs)
     return;
 
-  for (unsigned i = 0, end = EPs->getNumOperands(); i < end; ++i) {
-    MDTuple *EPNodeRef = dyn_cast<MDTuple>(EPs->getOperand(i));
+  for (unsigned I = 0, End = EPs->getNumOperands(); I < End; ++I) {
+    MDTuple *EPNodeRef = dyn_cast<MDTuple>(EPs->getOperand(I));
     if (EPNodeRef->getNumOperands() < 5) {
       ValCtx.EmitMetaError(EPNodeRef, ValidationRule::MetaWellFormed);
       return;
     }
     // get access to the digit that represents the metadata number that
     // would store entry properties
-    const llvm::MDOperand &mOp =
+    const llvm::MDOperand &MOp =
         EPNodeRef->getOperand(EPNodeRef->getNumOperands() - 1);
     // the final operand to the entry points tuple should be a tuple.
-    if (mOp == nullptr || (mOp.get())->getMetadataID() != Metadata::MDTupleKind)
+    if (MOp == nullptr || (MOp.get())->getMetadataID() != Metadata::MDTupleKind)
       continue;
 
     // get access to the node that stores entry properties
@@ -3556,29 +3556,29 @@ static void ValidateWaveSize(ValidationContext &ValCtx,
         EPNodeRef->getOperand(EPNodeRef->getNumOperands() - 1));
     // find any incompatible tags inside the entry properties
     // increment j by 2 to only analyze tags, not values
-    bool foundTag = false;
-    for (unsigned j = 0, end2 = EPropNode->getNumOperands(); j < end2; j += 2) {
-      const MDOperand &propertyTagOp = EPropNode->getOperand(j);
+    bool FoundTag = false;
+    for (unsigned J = 0, End2 = EPropNode->getNumOperands(); J < End2; J += 2) {
+      const MDOperand &PropertyTagOp = EPropNode->getOperand(J);
       // note, we are only looking for tags, which will be a constant
       // integer
-      DXASSERT(!(propertyTagOp == nullptr ||
-                 (propertyTagOp.get())->getMetadataID() !=
+      DXASSERT(!(PropertyTagOp == nullptr ||
+                 (PropertyTagOp.get())->getMetadataID() !=
                      Metadata::ConstantAsMetadataKind),
                "tag operand should be a constant integer.");
 
-      ConstantInt *tag = mdconst::extract<ConstantInt>(propertyTagOp);
-      uint64_t tagValue = tag->getZExtValue();
+      ConstantInt *Tag = mdconst::extract<ConstantInt>(PropertyTagOp);
+      uint64_t TagValue = Tag->getZExtValue();
 
       // legacy wavesize is only supported between 6.6 and 6.7, so we
       // should fail if we find the ranged wave size metadata tag
-      if (tagValue == DxilMDHelper::kDxilRangedWaveSizeTag) {
+      if (TagValue == DxilMDHelper::kDxilRangedWaveSizeTag) {
         // if this tag is already present in the
         // current entry point, emit an error
-        if (foundTag) {
+        if (FoundTag) {
           ValCtx.EmitFormatError(ValidationRule::SmWaveSizeTagDuplicate, {});
           return;
         }
-        foundTag = true;
+        FoundTag = true;
         if (SM->IsSM66Plus() && !SM->IsSM68Plus()) {
 
           ValCtx.EmitFormatError(ValidationRule::SmWaveSizeRangeNeedsSM68Plus,
@@ -3587,36 +3587,36 @@ static void ValidateWaveSize(ValidationContext &ValCtx,
         }
         // get the metadata that contains the
         // parameters to the wavesize attribute
-        MDTuple *WaveTuple = dyn_cast<MDTuple>(EPropNode->getOperand(j + 1));
+        MDTuple *WaveTuple = dyn_cast<MDTuple>(EPropNode->getOperand(J + 1));
         if (WaveTuple->getNumOperands() != 3) {
           ValCtx.EmitFormatError(
               ValidationRule::SmWaveSizeRangeExpectsThreeParams, {});
           return;
         }
-        for (int k = 0; k < 3; k++) {
-          const MDOperand &param = WaveTuple->getOperand(k);
-          if (param->getMetadataID() != Metadata::ConstantAsMetadataKind) {
+        for (int K = 0; K < 3; K++) {
+          const MDOperand &Param = WaveTuple->getOperand(K);
+          if (Param->getMetadataID() != Metadata::ConstantAsMetadataKind) {
             ValCtx.EmitFormatError(
                 ValidationRule::SmWaveSizeNeedsConstantOperands, {});
             return;
           }
         }
 
-      } else if (tagValue == DxilMDHelper::kDxilWaveSizeTag) {
+      } else if (TagValue == DxilMDHelper::kDxilWaveSizeTag) {
         // if this tag is already present in the
         // current entry point, emit an error
-        if (foundTag) {
+        if (FoundTag) {
           ValCtx.EmitFormatError(ValidationRule::SmWaveSizeTagDuplicate, {});
           return;
         }
-        foundTag = true;
-        MDTuple *WaveTuple = dyn_cast<MDTuple>(EPropNode->getOperand(j + 1));
+        FoundTag = true;
+        MDTuple *WaveTuple = dyn_cast<MDTuple>(EPropNode->getOperand(J + 1));
         if (WaveTuple->getNumOperands() != 1) {
           ValCtx.EmitFormatError(ValidationRule::SmWaveSizeExpectsOneParam, {});
           return;
         }
-        const MDOperand &param = WaveTuple->getOperand(0);
-        if (param->getMetadataID() != Metadata::ConstantAsMetadataKind) {
+        const MDOperand &Param = WaveTuple->getOperand(0);
+        if (Param->getMetadataID() != Metadata::ConstantAsMetadataKind) {
           ValCtx.EmitFormatError(
               ValidationRule::SmWaveSizeNeedsConstantOperands, {});
           return;
@@ -3637,9 +3637,9 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
   ValidateDxilVersion(ValCtx);
 
   Module *pModule = &ValCtx.M;
-  const std::string &target = pModule->getTargetTriple();
-  if (target != "dxil-ms-dx") {
-    ValCtx.EmitFormatError(ValidationRule::MetaTarget, {target});
+  const std::string &Target = pModule->getTargetTriple();
+  if (Target != "dxil-ms-dx") {
+    ValCtx.EmitFormatError(ValidationRule::MetaTarget, {Target});
   }
 
   // The llvm.dbg.(cu/contents/defines/mainFileName/arg) named metadata nodes
@@ -3647,9 +3647,9 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
   // llvm.bitsets is also disallowed.
   //
   // These are verified in lib/IR/Verifier.cpp.
-  StringMap<bool> llvmNamedMeta;
-  llvmNamedMeta["llvm.ident"];
-  llvmNamedMeta["llvm.module.flags"];
+  StringMap<bool> LlvmNamedMeta;
+  LlvmNamedMeta["llvm.ident"];
+  LlvmNamedMeta["llvm.module.flags"];
 
   for (auto &NamedMetaNode : pModule->named_metadata()) {
     if (!DxilModule::IsKnownNamedMetaData(NamedMetaNode)) {
@@ -3657,7 +3657,7 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
       if (!name.startswith_lower("llvm.")) {
         ValCtx.EmitFormatError(ValidationRule::MetaKnown, {name.str()});
       } else {
-        if (llvmNamedMeta.count(name) == 0) {
+        if (LlvmNamedMeta.count(name) == 0) {
           ValCtx.EmitFormatError(ValidationRule::MetaKnown, {name.str()});
         }
       }
@@ -3690,35 +3690,35 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
 }
 
 static void ValidateResourceOverlap(
-    hlsl::DxilResourceBase &res,
-    SpacesAllocator<unsigned, DxilResourceBase> &spaceAllocator,
+    hlsl::DxilResourceBase &Res,
+    SpacesAllocator<unsigned, DxilResourceBase> &SpaceAllocator,
     ValidationContext &ValCtx) {
-  unsigned base = res.GetLowerBound();
-  if (ValCtx.isLibProfile && !res.IsAllocated()) {
+  unsigned Base = Res.GetLowerBound();
+  if (ValCtx.isLibProfile && !Res.IsAllocated()) {
     // Skip unallocated resource for library.
     return;
   }
-  unsigned size = res.GetRangeSize();
-  unsigned space = res.GetSpaceID();
+  unsigned Size = Res.GetRangeSize();
+  unsigned Space = Res.GetSpaceID();
 
-  auto &allocator = spaceAllocator.Get(space);
-  unsigned end = base + size - 1;
+  auto &Allocator = SpaceAllocator.Get(Space);
+  unsigned End = Base + Size - 1;
   // unbounded
-  if (end < base)
-    end = size;
-  const DxilResourceBase *conflictRes = allocator.Insert(&res, base, end);
-  if (conflictRes) {
+  if (End < Base)
+    End = Size;
+  const DxilResourceBase *ConflictRes = Allocator.Insert(&Res, Base, End);
+  if (ConflictRes) {
     ValCtx.EmitFormatError(
         ValidationRule::SmResourceRangeOverlap,
-        {ValCtx.GetResourceName(&res), std::to_string(base),
-         std::to_string(size), std::to_string(conflictRes->GetLowerBound()),
-         std::to_string(conflictRes->GetRangeSize()), std::to_string(space)});
+        {ValCtx.GetResourceName(&Res), std::to_string(Base),
+         std::to_string(Size), std::to_string(ConflictRes->GetLowerBound()),
+         std::to_string(ConflictRes->GetRangeSize()), std::to_string(Space)});
   }
 }
 
-static void ValidateResource(hlsl::DxilResource &res,
+static void ValidateResource(hlsl::DxilResource &Res,
                              ValidationContext &ValCtx) {
-  switch (res.GetKind()) {
+  switch (Res.GetKind()) {
   case DXIL::ResourceKind::RawBuffer:
   case DXIL::ResourceKind::TypedBuffer:
   case DXIL::ResourceKind::TBuffer:
@@ -3730,8 +3730,8 @@ static void ValidateResource(hlsl::DxilResource &res,
   case DXIL::ResourceKind::Texture3D:
   case DXIL::ResourceKind::TextureCube:
   case DXIL::ResourceKind::TextureCubeArray:
-    if (res.GetSampleCount() > 0) {
-      ValCtx.EmitResourceError(&res, ValidationRule::SmSampleCountOnlyOn2DMS);
+    if (Res.GetSampleCount() > 0) {
+      ValCtx.EmitResourceError(&Res, ValidationRule::SmSampleCountOnlyOn2DMS);
     }
     break;
   case DXIL::ResourceKind::Texture2DMS:
@@ -3742,16 +3742,16 @@ static void ValidateResource(hlsl::DxilResource &res,
     break;
   case DXIL::ResourceKind::FeedbackTexture2D:
   case DXIL::ResourceKind::FeedbackTexture2DArray:
-    if (res.GetSamplerFeedbackType() >= DXIL::SamplerFeedbackType::LastEntry)
-      ValCtx.EmitResourceError(&res,
+    if (Res.GetSamplerFeedbackType() >= DXIL::SamplerFeedbackType::LastEntry)
+      ValCtx.EmitResourceError(&Res,
                                ValidationRule::SmInvalidSamplerFeedbackType);
     break;
   default:
-    ValCtx.EmitResourceError(&res, ValidationRule::SmInvalidResourceKind);
+    ValCtx.EmitResourceError(&Res, ValidationRule::SmInvalidResourceKind);
     break;
   }
 
-  switch (res.GetCompType().GetKind()) {
+  switch (Res.GetCompType().GetKind()) {
   case DXIL::ComponentType::F32:
   case DXIL::ComponentType::SNormF32:
   case DXIL::ComponentType::UNormF32:
@@ -3765,266 +3765,266 @@ static void ValidateResource(hlsl::DxilResource &res,
   case DXIL::ComponentType::U16:
     break;
   default:
-    if (!res.IsStructuredBuffer() && !res.IsRawBuffer() &&
-        !res.IsFeedbackTexture())
-      ValCtx.EmitResourceError(&res, ValidationRule::SmInvalidResourceCompType);
+    if (!Res.IsStructuredBuffer() && !Res.IsRawBuffer() &&
+        !Res.IsFeedbackTexture())
+      ValCtx.EmitResourceError(&Res, ValidationRule::SmInvalidResourceCompType);
     break;
   }
 
-  if (res.IsStructuredBuffer()) {
-    unsigned stride = res.GetElementStride();
-    bool alignedTo4Bytes = (stride & 3) == 0;
-    if (!alignedTo4Bytes && ValCtx.M.GetDxilModule().GetUseMinPrecision()) {
+  if (Res.IsStructuredBuffer()) {
+    unsigned Stride = Res.GetElementStride();
+    bool AlignedTo4Bytes = (Stride & 3) == 0;
+    if (!AlignedTo4Bytes && ValCtx.M.GetDxilModule().GetUseMinPrecision()) {
       ValCtx.EmitResourceFormatError(
-          &res, ValidationRule::MetaStructBufAlignment,
-          {std::to_string(4), std::to_string(stride)});
+          &Res, ValidationRule::MetaStructBufAlignment,
+          {std::to_string(4), std::to_string(Stride)});
     }
-    if (stride > DXIL::kMaxStructBufferStride) {
+    if (Stride > DXIL::kMaxStructBufferStride) {
       ValCtx.EmitResourceFormatError(
-          &res, ValidationRule::MetaStructBufAlignmentOutOfBound,
+          &Res, ValidationRule::MetaStructBufAlignmentOutOfBound,
           {std::to_string(DXIL::kMaxStructBufferStride),
-           std::to_string(stride)});
+           std::to_string(Stride)});
     }
   }
 
-  if (res.IsAnyTexture() || res.IsTypedBuffer()) {
-    Type *RetTy = res.GetRetType();
-    unsigned size =
+  if (Res.IsAnyTexture() || Res.IsTypedBuffer()) {
+    Type *RetTy = Res.GetRetType();
+    unsigned Size =
         ValCtx.DxilMod.GetModule()->getDataLayout().getTypeAllocSize(RetTy);
-    if (size > 4 * 4) {
-      ValCtx.EmitResourceError(&res, ValidationRule::MetaTextureType);
+    if (Size > 4 * 4) {
+      ValCtx.EmitResourceError(&Res, ValidationRule::MetaTextureType);
     }
   }
 }
 
 static void CollectCBufferRanges(
-    DxilStructAnnotation *annotation,
-    SpanAllocator<unsigned, DxilFieldAnnotation> &constAllocator, unsigned base,
-    DxilTypeSystem &typeSys, StringRef cbName, ValidationContext &ValCtx) {
-  DXASSERT(((base + 15) & ~(0xf)) == base,
+    DxilStructAnnotation *Annotation,
+    SpanAllocator<unsigned, DxilFieldAnnotation> &ConstAllocator, unsigned Base,
+    DxilTypeSystem &TypeSys, StringRef CbName, ValidationContext &ValCtx) {
+  DXASSERT(((Base + 15) & ~(0xf)) == Base,
            "otherwise, base for struct is not aligned");
-  unsigned cbSize = annotation->GetCBufferSize();
+  unsigned CbSize = Annotation->GetCBufferSize();
 
-  const StructType *ST = annotation->GetStructType();
+  const StructType *ST = Annotation->GetStructType();
 
-  for (int i = annotation->GetNumFields() - 1; i >= 0; i--) {
-    DxilFieldAnnotation &fieldAnnotation = annotation->GetFieldAnnotation(i);
-    Type *EltTy = ST->getElementType(i);
+  for (int I = Annotation->GetNumFields() - 1; I >= 0; I--) {
+    DxilFieldAnnotation &FieldAnnotation = Annotation->GetFieldAnnotation(I);
+    Type *EltTy = ST->getElementType(I);
 
-    unsigned offset = fieldAnnotation.GetCBufferOffset();
+    unsigned Offset = FieldAnnotation.GetCBufferOffset();
 
     unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
-        fieldAnnotation, EltTy, typeSys);
+        FieldAnnotation, EltTy, TypeSys);
 
-    bool bOutOfBound = false;
+    bool IsOutOfBound = false;
     if (!EltTy->isAggregateType()) {
-      bOutOfBound = (offset + EltSize) > cbSize;
-      if (!bOutOfBound) {
-        if (constAllocator.Insert(&fieldAnnotation, base + offset,
-                                  base + offset + EltSize - 1)) {
+      IsOutOfBound = (Offset + EltSize) > CbSize;
+      if (!IsOutOfBound) {
+        if (ConstAllocator.Insert(&FieldAnnotation, Base + Offset,
+                                  Base + Offset + EltSize - 1)) {
           ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap,
-                                 {cbName, std::to_string(base + offset)});
+                                 {CbName, std::to_string(Base + Offset)});
         }
       }
     } else if (isa<ArrayType>(EltTy)) {
-      if (((offset + 15) & ~(0xf)) != offset) {
+      if (((Offset + 15) & ~(0xf)) != Offset) {
         ValCtx.EmitFormatError(ValidationRule::SmCBufferArrayOffsetAlignment,
-                               {cbName, std::to_string(offset)});
+                               {CbName, std::to_string(Offset)});
         continue;
       }
-      unsigned arrayCount = 1;
+      unsigned ArrayCount = 1;
       while (isa<ArrayType>(EltTy)) {
-        arrayCount *= EltTy->getArrayNumElements();
+        ArrayCount *= EltTy->getArrayNumElements();
         EltTy = EltTy->getArrayElementType();
       }
 
       DxilStructAnnotation *EltAnnotation = nullptr;
       if (StructType *EltST = dyn_cast<StructType>(EltTy))
-        EltAnnotation = typeSys.GetStructAnnotation(EltST);
+        EltAnnotation = TypeSys.GetStructAnnotation(EltST);
 
-      unsigned alignedEltSize = ((EltSize + 15) & ~(0xf));
-      unsigned arraySize = ((arrayCount - 1) * alignedEltSize) + EltSize;
-      bOutOfBound = (offset + arraySize) > cbSize;
+      unsigned AlignedEltSize = ((EltSize + 15) & ~(0xf));
+      unsigned ArraySize = ((ArrayCount - 1) * AlignedEltSize) + EltSize;
+      IsOutOfBound = (Offset + ArraySize) > CbSize;
 
-      if (!bOutOfBound) {
+      if (!IsOutOfBound) {
         // If we didn't care about gaps where elements could be placed with user
         // offsets, we could: recurse once if EltAnnotation, then allocate the
-        // rest if arrayCount > 1
+        // rest if ArrayCount > 1
 
-        unsigned arrayBase = base + offset;
+        unsigned ArrayBase = Base + Offset;
         if (!EltAnnotation) {
           if (EltSize > 0 &&
-              nullptr != constAllocator.Insert(&fieldAnnotation, arrayBase,
-                                               arrayBase + arraySize - 1)) {
+              nullptr != ConstAllocator.Insert(&FieldAnnotation, ArrayBase,
+                                               ArrayBase + ArraySize - 1)) {
             ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap,
-                                   {cbName, std::to_string(arrayBase)});
+                                   {CbName, std::to_string(ArrayBase)});
           }
         } else {
-          for (unsigned idx = 0; idx < arrayCount; idx++) {
-            CollectCBufferRanges(EltAnnotation, constAllocator, arrayBase,
-                                 typeSys, cbName, ValCtx);
-            arrayBase += alignedEltSize;
+          for (unsigned Idx = 0; Idx < ArrayCount; Idx++) {
+            CollectCBufferRanges(EltAnnotation, ConstAllocator, ArrayBase,
+                                 TypeSys, CbName, ValCtx);
+            ArrayBase += AlignedEltSize;
           }
         }
       }
     } else {
       StructType *EltST = cast<StructType>(EltTy);
-      unsigned structBase = base + offset;
-      bOutOfBound = (offset + EltSize) > cbSize;
-      if (!bOutOfBound) {
+      unsigned StructBase = Base + Offset;
+      IsOutOfBound = (Offset + EltSize) > CbSize;
+      if (!IsOutOfBound) {
         if (DxilStructAnnotation *EltAnnotation =
-                typeSys.GetStructAnnotation(EltST)) {
-          CollectCBufferRanges(EltAnnotation, constAllocator, structBase,
-                               typeSys, cbName, ValCtx);
+                TypeSys.GetStructAnnotation(EltST)) {
+          CollectCBufferRanges(EltAnnotation, ConstAllocator, StructBase,
+                               TypeSys, CbName, ValCtx);
         } else {
           if (EltSize > 0 &&
-              nullptr != constAllocator.Insert(&fieldAnnotation, structBase,
-                                               structBase + EltSize - 1)) {
+              nullptr != ConstAllocator.Insert(&FieldAnnotation, StructBase,
+                                               StructBase + EltSize - 1)) {
             ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap,
-                                   {cbName, std::to_string(structBase)});
+                                   {CbName, std::to_string(StructBase)});
           }
         }
       }
     }
 
-    if (bOutOfBound) {
+    if (IsOutOfBound) {
       ValCtx.EmitFormatError(ValidationRule::SmCBufferElementOverflow,
-                             {cbName, std::to_string(base + offset)});
+                             {CbName, std::to_string(Base + Offset)});
     }
   }
 }
 
-static void ValidateCBuffer(DxilCBuffer &cb, ValidationContext &ValCtx) {
-  Type *Ty = cb.GetHLSLType()->getPointerElementType();
-  if (cb.GetRangeSize() != 1 || Ty->isArrayTy()) {
+static void ValidateCBuffer(DxilCBuffer &Cb, ValidationContext &ValCtx) {
+  Type *Ty = Cb.GetHLSLType()->getPointerElementType();
+  if (Cb.GetRangeSize() != 1 || Ty->isArrayTy()) {
     Ty = Ty->getArrayElementType();
   }
   if (!isa<StructType>(Ty)) {
-    ValCtx.EmitResourceError(&cb,
+    ValCtx.EmitResourceError(&Cb,
                              ValidationRule::SmCBufferTemplateTypeMustBeStruct);
     return;
   }
-  if (cb.GetSize() > (DXIL::kMaxCBufferSize << 4)) {
-    ValCtx.EmitResourceFormatError(&cb, ValidationRule::SmCBufferSize,
-                                   {std::to_string(cb.GetSize())});
+  if (Cb.GetSize() > (DXIL::kMaxCBufferSize << 4)) {
+    ValCtx.EmitResourceFormatError(&Cb, ValidationRule::SmCBufferSize,
+                                   {std::to_string(Cb.GetSize())});
     return;
   }
   StructType *ST = cast<StructType>(Ty);
-  DxilTypeSystem &typeSys = ValCtx.DxilMod.GetTypeSystem();
-  DxilStructAnnotation *annotation = typeSys.GetStructAnnotation(ST);
-  if (!annotation)
+  DxilTypeSystem &TypeSys = ValCtx.DxilMod.GetTypeSystem();
+  DxilStructAnnotation *Annotation = TypeSys.GetStructAnnotation(ST);
+  if (!Annotation)
     return;
 
   // Collect constant ranges.
-  std::vector<std::pair<unsigned, unsigned>> constRanges;
-  SpanAllocator<unsigned, DxilFieldAnnotation> constAllocator(
+  std::vector<std::pair<unsigned, unsigned>> ConstRanges;
+  SpanAllocator<unsigned, DxilFieldAnnotation> ConstAllocator(
       0,
       // 4096 * 16 bytes.
       DXIL::kMaxCBufferSize << 4);
-  CollectCBufferRanges(annotation, constAllocator, 0, typeSys,
-                       ValCtx.GetResourceName(&cb), ValCtx);
+  CollectCBufferRanges(Annotation, ConstAllocator, 0, TypeSys,
+                       ValCtx.GetResourceName(&Cb), ValCtx);
 }
 
 static void ValidateResources(ValidationContext &ValCtx) {
-  const vector<unique_ptr<DxilResource>> &uavs = ValCtx.DxilMod.GetUAVs();
-  SpacesAllocator<unsigned, DxilResourceBase> uavAllocator;
+  const vector<unique_ptr<DxilResource>> &Uavs = ValCtx.DxilMod.GetUAVs();
+  SpacesAllocator<unsigned, DxilResourceBase> UavAllocator;
 
-  for (auto &uav : uavs) {
-    if (uav->IsROV()) {
+  for (auto &Uav : Uavs) {
+    if (Uav->IsROV()) {
       if (!ValCtx.DxilMod.GetShaderModel()->IsPS() && !ValCtx.isLibProfile) {
-        ValCtx.EmitResourceError(uav.get(), ValidationRule::SmROVOnlyInPS);
+        ValCtx.EmitResourceError(Uav.get(), ValidationRule::SmROVOnlyInPS);
       }
     }
-    switch (uav->GetKind()) {
+    switch (Uav->GetKind()) {
     case DXIL::ResourceKind::TextureCube:
     case DXIL::ResourceKind::TextureCubeArray:
-      ValCtx.EmitResourceError(uav.get(),
+      ValCtx.EmitResourceError(Uav.get(),
                                ValidationRule::SmInvalidTextureKindOnUAV);
       break;
     default:
       break;
     }
 
-    if (uav->HasCounter() && !uav->IsStructuredBuffer()) {
-      ValCtx.EmitResourceError(uav.get(),
+    if (Uav->HasCounter() && !Uav->IsStructuredBuffer()) {
+      ValCtx.EmitResourceError(Uav.get(),
                                ValidationRule::SmCounterOnlyOnStructBuf);
     }
-    if (uav->HasCounter() && uav->IsGloballyCoherent())
-      ValCtx.EmitResourceFormatError(uav.get(),
+    if (Uav->HasCounter() && Uav->IsGloballyCoherent())
+      ValCtx.EmitResourceFormatError(Uav.get(),
                                      ValidationRule::MetaGlcNotOnAppendConsume,
-                                     {ValCtx.GetResourceName(uav.get())});
+                                     {ValCtx.GetResourceName(Uav.get())});
 
-    ValidateResource(*uav, ValCtx);
-    ValidateResourceOverlap(*uav, uavAllocator, ValCtx);
+    ValidateResource(*Uav, ValCtx);
+    ValidateResourceOverlap(*Uav, UavAllocator, ValCtx);
   }
 
-  SpacesAllocator<unsigned, DxilResourceBase> srvAllocator;
-  const vector<unique_ptr<DxilResource>> &srvs = ValCtx.DxilMod.GetSRVs();
-  for (auto &srv : srvs) {
+  SpacesAllocator<unsigned, DxilResourceBase> SrvAllocator;
+  const vector<unique_ptr<DxilResource>> &Srvs = ValCtx.DxilMod.GetSRVs();
+  for (auto &srv : Srvs) {
     ValidateResource(*srv, ValCtx);
-    ValidateResourceOverlap(*srv, srvAllocator, ValCtx);
+    ValidateResourceOverlap(*srv, SrvAllocator, ValCtx);
   }
 
-  hlsl::DxilResourceBase *pNonDense;
-  if (!AreDxilResourcesDense(&ValCtx.M, &pNonDense)) {
-    ValCtx.EmitResourceError(pNonDense, ValidationRule::MetaDenseResIDs);
+  hlsl::DxilResourceBase *NonDenseRes;
+  if (!AreDxilResourcesDense(&ValCtx.M, &NonDenseRes)) {
+    ValCtx.EmitResourceError(NonDenseRes, ValidationRule::MetaDenseResIDs);
   }
 
-  SpacesAllocator<unsigned, DxilResourceBase> samplerAllocator;
+  SpacesAllocator<unsigned, DxilResourceBase> SamplerAllocator;
   for (auto &sampler : ValCtx.DxilMod.GetSamplers()) {
     if (sampler->GetSamplerKind() == DXIL::SamplerKind::Invalid) {
       ValCtx.EmitResourceError(sampler.get(),
                                ValidationRule::MetaValidSamplerMode);
     }
-    ValidateResourceOverlap(*sampler, samplerAllocator, ValCtx);
+    ValidateResourceOverlap(*sampler, SamplerAllocator, ValCtx);
   }
 
-  SpacesAllocator<unsigned, DxilResourceBase> cbufferAllocator;
+  SpacesAllocator<unsigned, DxilResourceBase> CbufferAllocator;
   for (auto &cbuffer : ValCtx.DxilMod.GetCBuffers()) {
     ValidateCBuffer(*cbuffer, ValCtx);
-    ValidateResourceOverlap(*cbuffer, cbufferAllocator, ValCtx);
+    ValidateResourceOverlap(*cbuffer, CbufferAllocator, ValCtx);
   }
 }
 
 static void ValidateShaderFlags(ValidationContext &ValCtx) {
-  ShaderFlags calcFlags;
-  ValCtx.DxilMod.CollectShaderFlagsForModule(calcFlags);
+  ShaderFlags CalcFlags;
+  ValCtx.DxilMod.CollectShaderFlagsForModule(CalcFlags);
 
   // Special case for validator version prior to 1.8.
   // If DXR 1.1 flag is set, but our computed flags do not have this set, then
   // this is due to prior versions setting the flag based on DXR 1.1 subobjects,
   // which are gone by this point.  Set the flag and the rest should match.
-  unsigned valMajor, valMinor;
-  ValCtx.DxilMod.GetValidatorVersion(valMajor, valMinor);
-  if (DXIL::CompareVersions(valMajor, valMinor, 1, 5) >= 0 &&
-      DXIL::CompareVersions(valMajor, valMinor, 1, 8) < 0 &&
+  unsigned ValMajor, ValMinor;
+  ValCtx.DxilMod.GetValidatorVersion(ValMajor, ValMinor);
+  if (DXIL::CompareVersions(ValMajor, ValMinor, 1, 5) >= 0 &&
+      DXIL::CompareVersions(ValMajor, ValMinor, 1, 8) < 0 &&
       ValCtx.DxilMod.m_ShaderFlags.GetRaytracingTier1_1() &&
-      !calcFlags.GetRaytracingTier1_1()) {
-    calcFlags.SetRaytracingTier1_1(true);
+      !CalcFlags.GetRaytracingTier1_1()) {
+    CalcFlags.SetRaytracingTier1_1(true);
   }
 
-  const uint64_t mask = ShaderFlags::GetShaderFlagsRawForCollection();
-  uint64_t declaredFlagsRaw = ValCtx.DxilMod.m_ShaderFlags.GetShaderFlagsRaw();
-  uint64_t calcFlagsRaw = calcFlags.GetShaderFlagsRaw();
+  const uint64_t Mask = ShaderFlags::GetShaderFlagsRawForCollection();
+  uint64_t DeclaredFlagsRaw = ValCtx.DxilMod.m_ShaderFlags.GetShaderFlagsRaw();
+  uint64_t CalcFlagsRaw = CalcFlags.GetShaderFlagsRaw();
 
-  declaredFlagsRaw &= mask;
-  calcFlagsRaw &= mask;
+  DeclaredFlagsRaw &= Mask;
+  CalcFlagsRaw &= Mask;
 
-  if (declaredFlagsRaw == calcFlagsRaw) {
+  if (DeclaredFlagsRaw == CalcFlagsRaw) {
     return;
   }
   ValCtx.EmitError(ValidationRule::MetaFlagsUsage);
 
   dxilutil::EmitNoteOnContext(ValCtx.M.getContext(),
                               Twine("Flags declared=") +
-                                  Twine(declaredFlagsRaw) + Twine(", actual=") +
-                                  Twine(calcFlagsRaw));
+                                  Twine(DeclaredFlagsRaw) + Twine(", actual=") +
+                                  Twine(CalcFlagsRaw));
 }
 
 static void ValidateSignatureElement(DxilSignatureElement &SE,
                                      ValidationContext &ValCtx) {
-  DXIL::SemanticKind semanticKind = SE.GetSemantic()->GetKind();
-  CompType::Kind compKind = SE.GetCompType().GetKind();
+  DXIL::SemanticKind SemanticKind = SE.GetSemantic()->GetKind();
+  CompType::Kind CompKind = SE.GetCompType().GetKind();
   DXIL::InterpolationMode Mode = SE.GetInterpolationMode()->GetKind();
 
   StringRef Name = SE.GetName();
@@ -4032,86 +4032,86 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     ValCtx.EmitSignatureError(&SE, ValidationRule::MetaSemanticLen);
   }
 
-  if (semanticKind > DXIL::SemanticKind::Arbitrary &&
-      semanticKind < DXIL::SemanticKind::Invalid) {
-    if (semanticKind != Semantic::GetByName(SE.GetName())->GetKind()) {
+  if (SemanticKind > DXIL::SemanticKind::Arbitrary &&
+      SemanticKind < DXIL::SemanticKind::Invalid) {
+    if (SemanticKind != Semantic::GetByName(SE.GetName())->GetKind()) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemaKindMatchesName,
                              {SE.GetName(), SE.GetSemantic()->GetName()});
     }
   }
 
-  unsigned compWidth = 0;
-  bool compFloat = false;
-  bool compInt = false;
-  bool compBool = false;
+  unsigned CompWidth = 0;
+  bool CompFloat = false;
+  bool CompInt = false;
+  bool CompBool = false;
 
-  switch (compKind) {
+  switch (CompKind) {
   case CompType::Kind::U64:
-    compWidth = 64;
-    compInt = true;
+    CompWidth = 64;
+    CompInt = true;
     break;
   case CompType::Kind::I64:
-    compWidth = 64;
-    compInt = true;
+    CompWidth = 64;
+    CompInt = true;
     break;
   // These should be translated for signatures:
   // case CompType::Kind::PackedS8x32:
   // case CompType::Kind::PackedU8x32:
   case CompType::Kind::U32:
-    compWidth = 32;
-    compInt = true;
+    CompWidth = 32;
+    CompInt = true;
     break;
   case CompType::Kind::I32:
-    compWidth = 32;
-    compInt = true;
+    CompWidth = 32;
+    CompInt = true;
     break;
   case CompType::Kind::U16:
-    compWidth = 16;
-    compInt = true;
+    CompWidth = 16;
+    CompInt = true;
     break;
   case CompType::Kind::I16:
-    compWidth = 16;
-    compInt = true;
+    CompWidth = 16;
+    CompInt = true;
     break;
   case CompType::Kind::I1:
-    compWidth = 1;
-    compBool = true;
+    CompWidth = 1;
+    CompBool = true;
     break;
   case CompType::Kind::F64:
-    compWidth = 64;
-    compFloat = true;
+    CompWidth = 64;
+    CompFloat = true;
     break;
   case CompType::Kind::F32:
-    compWidth = 32;
-    compFloat = true;
+    CompWidth = 32;
+    CompFloat = true;
     break;
   case CompType::Kind::F16:
-    compWidth = 16;
-    compFloat = true;
+    CompWidth = 16;
+    CompFloat = true;
     break;
   case CompType::Kind::SNormF64:
-    compWidth = 64;
-    compFloat = true;
+    CompWidth = 64;
+    CompFloat = true;
     break;
   case CompType::Kind::SNormF32:
-    compWidth = 32;
-    compFloat = true;
+    CompWidth = 32;
+    CompFloat = true;
     break;
   case CompType::Kind::SNormF16:
-    compWidth = 16;
-    compFloat = true;
+    CompWidth = 16;
+    CompFloat = true;
     break;
   case CompType::Kind::UNormF64:
-    compWidth = 64;
-    compFloat = true;
+    CompWidth = 64;
+    CompFloat = true;
     break;
   case CompType::Kind::UNormF32:
-    compWidth = 32;
-    compFloat = true;
+    CompWidth = 32;
+    CompFloat = true;
     break;
   case CompType::Kind::UNormF16:
-    compWidth = 16;
-    compFloat = true;
+    CompWidth = 16;
+    CompFloat = true;
     break;
   case CompType::Kind::Invalid:
   default:
@@ -4120,7 +4120,7 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     break;
   }
 
-  if (compInt || compBool) {
+  if (CompInt || CompBool) {
     switch (Mode) {
     case DXIL::InterpolationMode::Linear:
     case DXIL::InterpolationMode::LinearCentroid:
@@ -4137,91 +4137,91 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
   }
 
   // Elements that should not appear in the Dxil signature:
-  bool bAllowedInSig = true;
-  bool bShouldBeAllocated = true;
+  bool AllowedInSig = true;
+  bool ShouldBeAllocated = true;
   switch (SE.GetInterpretation()) {
   case DXIL::SemanticInterpretationKind::NA:
   case DXIL::SemanticInterpretationKind::NotInSig:
   case DXIL::SemanticInterpretationKind::Invalid:
-    bAllowedInSig = false;
+    AllowedInSig = false;
     LLVM_FALLTHROUGH;
   case DXIL::SemanticInterpretationKind::NotPacked:
   case DXIL::SemanticInterpretationKind::Shadow:
-    bShouldBeAllocated = false;
+    ShouldBeAllocated = false;
     break;
   default:
     break;
   }
 
-  const char *inputOutput = nullptr;
+  const char *InputOutput = nullptr;
   if (SE.IsInput())
-    inputOutput = "Input";
+    InputOutput = "Input";
   else if (SE.IsOutput())
-    inputOutput = "Output";
+    InputOutput = "Output";
   else
-    inputOutput = "PatchConstant";
+    InputOutput = "PatchConstant";
 
-  if (!bAllowedInSig) {
+  if (!AllowedInSig) {
     ValCtx.EmitFormatError(ValidationRule::SmSemantic,
                            {SE.GetName(),
                             ValCtx.DxilMod.GetShaderModel()->GetKindName(),
-                            inputOutput});
-  } else if (bShouldBeAllocated && !SE.IsAllocated()) {
+                            InputOutput});
+  } else if (ShouldBeAllocated && !SE.IsAllocated()) {
     ValCtx.EmitFormatError(ValidationRule::MetaSemanticShouldBeAllocated,
-                           {inputOutput, SE.GetName()});
-  } else if (!bShouldBeAllocated && SE.IsAllocated()) {
+                           {InputOutput, SE.GetName()});
+  } else if (!ShouldBeAllocated && SE.IsAllocated()) {
     ValCtx.EmitFormatError(ValidationRule::MetaSemanticShouldNotBeAllocated,
-                           {inputOutput, SE.GetName()});
+                           {InputOutput, SE.GetName()});
   }
 
-  bool bIsClipCull = false;
-  bool bIsTessfactor = false;
-  bool bIsBarycentric = false;
+  bool IsClipCull = false;
+  bool IsTessfactor = false;
+  bool IsBarycentric = false;
 
-  switch (semanticKind) {
+  switch (SemanticKind) {
   case DXIL::SemanticKind::Depth:
   case DXIL::SemanticKind::DepthGreaterEqual:
   case DXIL::SemanticKind::DepthLessEqual:
-    if (!compFloat || compWidth > 32 || SE.GetCols() != 1) {
+    if (!CompFloat || CompWidth > 32 || SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float"});
     }
     break;
   case DXIL::SemanticKind::Coverage:
-    DXASSERT(!SE.IsInput() || !bAllowedInSig,
+    DXASSERT(!SE.IsInput() || !AllowedInSig,
              "else internal inconsistency between semantic interpretation "
              "table and validation code");
     LLVM_FALLTHROUGH;
   case DXIL::SemanticKind::InnerCoverage:
   case DXIL::SemanticKind::OutputControlPointID:
-    if (compKind != CompType::Kind::U32 || SE.GetCols() != 1) {
+    if (CompKind != CompType::Kind::U32 || SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "uint"});
     }
     break;
   case DXIL::SemanticKind::Position:
-    if (!compFloat || compWidth > 32 || SE.GetCols() != 4) {
+    if (!CompFloat || CompWidth > 32 || SE.GetCols() != 4) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float4"});
     }
     break;
   case DXIL::SemanticKind::Target:
-    if (compWidth > 32) {
+    if (CompWidth > 32) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float/int/uint"});
     }
     break;
   case DXIL::SemanticKind::ClipDistance:
   case DXIL::SemanticKind::CullDistance:
-    bIsClipCull = true;
-    if (!compFloat || compWidth > 32) {
+    IsClipCull = true;
+    if (!CompFloat || CompWidth > 32) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float"});
     }
     // NOTE: clip cull distance size is checked at ValidateSignature.
     break;
   case DXIL::SemanticKind::IsFrontFace: {
-    if (!(compInt && compWidth == 32) || SE.GetCols() != 1) {
+    if (!(CompInt && CompWidth == 32) || SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "uint"});
     }
@@ -4235,14 +4235,14 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
   case DXIL::SemanticKind::SampleIndex:
   case DXIL::SemanticKind::StencilRef:
   case DXIL::SemanticKind::ShadingRate:
-    if ((compKind != CompType::Kind::U32 && compKind != CompType::Kind::U16) ||
+    if ((CompKind != CompType::Kind::U32 && CompKind != CompType::Kind::U16) ||
         SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "uint"});
     }
     break;
   case DXIL::SemanticKind::CullPrimitive: {
-    if (!(compBool && compWidth == 1) || SE.GetCols() != 1) {
+    if (!(CompBool && CompWidth == 1) || SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "bool"});
     }
@@ -4250,8 +4250,8 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
   case DXIL::SemanticKind::TessFactor:
   case DXIL::SemanticKind::InsideTessFactor:
     // NOTE: the size check is at CheckPatchConstantSemantic.
-    bIsTessfactor = true;
-    if (!compFloat || compWidth > 32) {
+    IsTessfactor = true;
+    if (!CompFloat || CompWidth > 32) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float"});
     }
@@ -4260,12 +4260,12 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     break;
   case DXIL::SemanticKind::DomainLocation:
   case DXIL::SemanticKind::Invalid:
-    DXASSERT(!bAllowedInSig, "else internal inconsistency between semantic "
-                             "interpretation table and validation code");
+    DXASSERT(!AllowedInSig, "else internal inconsistency between semantic "
+                            "interpretation table and validation code");
     break;
   case DXIL::SemanticKind::Barycentrics:
-    bIsBarycentric = true;
-    if (!compFloat || compWidth > 32) {
+    IsBarycentric = true;
+    if (!CompFloat || CompWidth > 32) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float"});
     }
@@ -4310,32 +4310,32 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     }
   }
 
-  if (semanticKind == DXIL::SemanticKind::Target) {
-    // Verify packed row == semantic index
-    unsigned row = SE.GetStartRow();
+  if (SemanticKind == DXIL::SemanticKind::Target) {
+    // Verify packed Row == semantic index
+    unsigned Row = SE.GetStartRow();
     for (unsigned i : SE.GetSemanticIndexVec()) {
-      if (row != i) {
+      if (Row != i) {
         ValCtx.EmitSignatureError(&SE,
                                   ValidationRule::SmPSTargetIndexMatchesRow);
       }
-      ++row;
+      ++Row;
     }
-    // Verify packed col is 0
+    // Verify packed Col is 0
     if (SE.GetStartCol() != 0) {
       ValCtx.EmitSignatureError(&SE, ValidationRule::SmPSTargetCol0);
     }
-    // Verify max row used < 8
+    // Verify max Row used < 8
     if (SE.GetStartRow() + SE.GetRows() > 8) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax,
                              {"SV_Target", "7"});
     }
-  } else if (bAllowedInSig && semanticKind != DXIL::SemanticKind::Arbitrary) {
-    if (bIsBarycentric) {
+  } else if (AllowedInSig && SemanticKind != DXIL::SemanticKind::Arbitrary) {
+    if (IsBarycentric) {
       if (SE.GetSemanticStartIndex() > 1) {
         ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax,
                                {SE.GetSemantic()->GetName(), "1"});
       }
-    } else if (!bIsClipCull && SE.GetSemanticStartIndex() > 0) {
+    } else if (!IsClipCull && SE.GetSemanticStartIndex() > 0) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax,
                              {SE.GetSemantic()->GetName(), "0"});
     }
@@ -4343,17 +4343,17 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     // with the exception of tessfactors, which are validated in
     // CheckPatchConstantSemantic and ClipDistance/CullDistance, which have
     // other custom constraints.
-    if (!bIsTessfactor && !bIsClipCull && SE.GetRows() > 1) {
+    if (!IsTessfactor && !IsClipCull && SE.GetRows() > 1) {
       ValCtx.EmitSignatureError(&SE, ValidationRule::MetaSystemValueRows);
     }
   }
 
   if (SE.GetCols() + (SE.IsAllocated() ? SE.GetStartCol() : 0) > 4) {
-    unsigned size = (SE.GetRows() - 1) * 4 + SE.GetCols();
+    unsigned Size = (SE.GetRows() - 1) * 4 + SE.GetCols();
     ValCtx.EmitFormatError(ValidationRule::MetaSignatureOutOfRange,
                            {SE.GetName(), std::to_string(SE.GetStartRow()),
                             std::to_string(SE.GetStartCol()),
-                            std::to_string(size)});
+                            std::to_string(Size)});
   }
 
   if (!SE.GetInterpolationMode()->IsValid()) {
@@ -4362,8 +4362,8 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
 }
 
 static void ValidateSignatureOverlap(DxilSignatureElement &E,
-                                     unsigned maxScalars,
-                                     DxilSignatureAllocator &allocator,
+                                     unsigned MaxScalars,
+                                     DxilSignatureAllocator &Allocator,
                                      ValidationContext &ValCtx) {
 
   // Skip entries that are not or should not be allocated.  Validation occurs in
@@ -4381,16 +4381,16 @@ static void ValidateSignatureOverlap(DxilSignatureElement &E,
     break;
   }
 
-  DxilPackElement PE(&E, allocator.UseMinPrecision());
-  DxilSignatureAllocator::ConflictType conflict =
-      allocator.DetectRowConflict(&PE, E.GetStartRow());
-  if (conflict == DxilSignatureAllocator::kNoConflict ||
-      conflict == DxilSignatureAllocator::kInsufficientFreeComponents)
-    conflict =
-        allocator.DetectColConflict(&PE, E.GetStartRow(), E.GetStartCol());
-  switch (conflict) {
+  DxilPackElement PE(&E, Allocator.UseMinPrecision());
+  DxilSignatureAllocator::ConflictType Conflict =
+      Allocator.DetectRowConflict(&PE, E.GetStartRow());
+  if (Conflict == DxilSignatureAllocator::kNoConflict ||
+      Conflict == DxilSignatureAllocator::kInsufficientFreeComponents)
+    Conflict =
+        Allocator.DetectColConflict(&PE, E.GetStartRow(), E.GetStartCol());
+  switch (Conflict) {
   case DxilSignatureAllocator::kNoConflict:
-    allocator.PlaceElement(&PE, E.GetStartRow(), E.GetStartCol());
+    Allocator.PlaceElement(&PE, E.GetStartRow(), E.GetStartCol());
     break;
   case DxilSignatureAllocator::kConflictsWithIndexed:
     ValCtx.EmitFormatError(ValidationRule::MetaSignatureIndexConflict,
@@ -4452,59 +4452,59 @@ static void ValidateSignatureOverlap(DxilSignatureElement &E,
 }
 
 static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
-                              EntryStatus &Status, unsigned maxScalars) {
-  DxilSignatureAllocator allocator[DXIL::kNumOutputStreams] = {
+                              EntryStatus &Status, unsigned MaxScalars) {
+  DxilSignatureAllocator Allocator[DXIL::kNumOutputStreams] = {
       {32, ValCtx.DxilMod.GetUseMinPrecision()},
       {32, ValCtx.DxilMod.GetUseMinPrecision()},
       {32, ValCtx.DxilMod.GetUseMinPrecision()},
       {32, ValCtx.DxilMod.GetUseMinPrecision()}};
-  unordered_set<unsigned> semanticUsageSet[DXIL::kNumOutputStreams];
-  StringMap<unordered_set<unsigned>> semanticIndexMap[DXIL::kNumOutputStreams];
-  unordered_set<unsigned> clipcullRowSet[DXIL::kNumOutputStreams];
-  unsigned clipcullComponents[DXIL::kNumOutputStreams] = {0, 0, 0, 0};
+  unordered_set<unsigned> SemanticUsageSet[DXIL::kNumOutputStreams];
+  StringMap<unordered_set<unsigned>> SemanticIndexMap[DXIL::kNumOutputStreams];
+  unordered_set<unsigned> ClipcullRowSet[DXIL::kNumOutputStreams];
+  unsigned ClipcullComponents[DXIL::kNumOutputStreams] = {0, 0, 0, 0};
 
-  bool isOutput = S.IsOutput();
+  bool IsOutput = S.IsOutput();
   unsigned TargetMask = 0;
   DXIL::SemanticKind DepthKind = DXIL::SemanticKind::Invalid;
 
-  const InterpolationMode *prevBaryInterpMode = nullptr;
-  unsigned numBarycentrics = 0;
+  const InterpolationMode *PrevBaryInterpMode = nullptr;
+  unsigned NumBarycentrics = 0;
 
   for (auto &E : S.GetElements()) {
-    DXIL::SemanticKind semanticKind = E->GetSemantic()->GetKind();
+    DXIL::SemanticKind SemanticKind = E->GetSemantic()->GetKind();
     ValidateSignatureElement(*E, ValCtx);
-    // Avoid OOB indexing on streamId.
-    unsigned streamId = E->GetOutputStream();
-    if (streamId >= DXIL::kNumOutputStreams || !isOutput ||
+    // Avoid OOB indexing on StreamId.
+    unsigned StreamId = E->GetOutputStream();
+    if (StreamId >= DXIL::kNumOutputStreams || !IsOutput ||
         !ValCtx.DxilMod.GetShaderModel()->IsGS()) {
-      streamId = 0;
+      StreamId = 0;
     }
 
     // Semantic index overlap check, keyed by name.
-    std::string nameUpper(E->GetName());
-    std::transform(nameUpper.begin(), nameUpper.end(), nameUpper.begin(),
+    std::string NameUpper(E->GetName());
+    std::transform(NameUpper.begin(), NameUpper.end(), NameUpper.begin(),
                    ::toupper);
-    unordered_set<unsigned> &semIdxSet = semanticIndexMap[streamId][nameUpper];
-    for (unsigned semIdx : E->GetSemanticIndexVec()) {
-      if (semIdxSet.count(semIdx) > 0) {
+    unordered_set<unsigned> &SemIdxSet = SemanticIndexMap[StreamId][NameUpper];
+    for (unsigned SemIdx : E->GetSemanticIndexVec()) {
+      if (SemIdxSet.count(SemIdx) > 0) {
         ValCtx.EmitFormatError(ValidationRule::MetaNoSemanticOverlap,
-                               {E->GetName(), std::to_string(semIdx)});
+                               {E->GetName(), std::to_string(SemIdx)});
         return;
       } else
-        semIdxSet.insert(semIdx);
+        SemIdxSet.insert(SemIdx);
     }
 
     // SV_Target has special rules
-    if (semanticKind == DXIL::SemanticKind::Target) {
+    if (SemanticKind == DXIL::SemanticKind::Target) {
       // Validate target overlap
       if (E->GetStartRow() + E->GetRows() <= 8) {
-        unsigned mask = ((1 << E->GetRows()) - 1) << E->GetStartRow();
-        if (TargetMask & mask) {
+        unsigned Mask = ((1 << E->GetRows()) - 1) << E->GetStartRow();
+        if (TargetMask & Mask) {
           ValCtx.EmitFormatError(
               ValidationRule::MetaNoSemanticOverlap,
               {"SV_Target", std::to_string(E->GetStartRow())});
         }
-        TargetMask = TargetMask | mask;
+        TargetMask = TargetMask | Mask;
       }
       if (E->GetRows() > 1) {
         ValCtx.EmitSignatureError(E.get(), ValidationRule::SmNoPSOutputIdx);
@@ -4516,19 +4516,19 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
       continue;
 
     // validate system value semantic rules
-    switch (semanticKind) {
+    switch (SemanticKind) {
     case DXIL::SemanticKind::Arbitrary:
       break;
     case DXIL::SemanticKind::ClipDistance:
     case DXIL::SemanticKind::CullDistance:
       // Validate max 8 components across 2 rows (registers)
-      for (unsigned rowIdx = 0; rowIdx < E->GetRows(); rowIdx++)
-        clipcullRowSet[streamId].insert(E->GetStartRow() + rowIdx);
-      if (clipcullRowSet[streamId].size() > 2) {
+      for (unsigned RowIdx = 0; RowIdx < E->GetRows(); RowIdx++)
+        ClipcullRowSet[StreamId].insert(E->GetStartRow() + RowIdx);
+      if (ClipcullRowSet[StreamId].size() > 2) {
         ValCtx.EmitSignatureError(E.get(), ValidationRule::MetaClipCullMaxRows);
       }
-      clipcullComponents[streamId] += E->GetCols();
-      if (clipcullComponents[streamId] > 8) {
+      ClipcullComponents[StreamId] += E->GetCols();
+      if (ClipcullComponents[StreamId] > 8) {
         ValCtx.EmitSignatureError(E.get(),
                                   ValidationRule::MetaClipCullMaxComponents);
       }
@@ -4540,58 +4540,58 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
         ValCtx.EmitSignatureError(E.get(),
                                   ValidationRule::SmPSMultipleDepthSemantic);
       }
-      DepthKind = semanticKind;
+      DepthKind = SemanticKind;
       break;
     case DXIL::SemanticKind::Barycentrics: {
       // There can only be up to two SV_Barycentrics
       // with differeent perspective interpolation modes.
-      if (numBarycentrics++ > 1) {
+      if (NumBarycentrics++ > 1) {
         ValCtx.EmitSignatureError(
             E.get(), ValidationRule::MetaBarycentricsTwoPerspectives);
         break;
       }
-      const InterpolationMode *mode = E->GetInterpolationMode();
-      if (prevBaryInterpMode) {
-        if ((mode->IsAnyNoPerspective() &&
-             prevBaryInterpMode->IsAnyNoPerspective()) ||
-            (!mode->IsAnyNoPerspective() &&
-             !prevBaryInterpMode->IsAnyNoPerspective())) {
+      const InterpolationMode *Mode = E->GetInterpolationMode();
+      if (PrevBaryInterpMode) {
+        if ((Mode->IsAnyNoPerspective() &&
+             PrevBaryInterpMode->IsAnyNoPerspective()) ||
+            (!Mode->IsAnyNoPerspective() &&
+             !PrevBaryInterpMode->IsAnyNoPerspective())) {
           ValCtx.EmitSignatureError(
               E.get(), ValidationRule::MetaBarycentricsTwoPerspectives);
         }
       }
-      prevBaryInterpMode = mode;
+      PrevBaryInterpMode = Mode;
       break;
     }
     default:
-      if (semanticUsageSet[streamId].count(
-              static_cast<unsigned>(semanticKind)) > 0) {
+      if (SemanticUsageSet[StreamId].count(
+              static_cast<unsigned>(SemanticKind)) > 0) {
         ValCtx.EmitFormatError(ValidationRule::MetaDuplicateSysValue,
                                {E->GetSemantic()->GetName()});
       }
-      semanticUsageSet[streamId].insert(static_cast<unsigned>(semanticKind));
+      SemanticUsageSet[StreamId].insert(static_cast<unsigned>(SemanticKind));
       break;
     }
 
     // Packed element overlap check.
-    ValidateSignatureOverlap(*E.get(), maxScalars, allocator[streamId], ValCtx);
+    ValidateSignatureOverlap(*E.get(), MaxScalars, Allocator[StreamId], ValCtx);
 
-    if (isOutput && semanticKind == DXIL::SemanticKind::Position) {
+    if (IsOutput && SemanticKind == DXIL::SemanticKind::Position) {
       Status.hasOutputPosition[E->GetOutputStream()] = true;
     }
   }
 
   if (Status.hasViewID && S.IsInput() &&
       ValCtx.DxilMod.GetShaderModel()->GetKind() == DXIL::ShaderKind::Pixel) {
-    // Ensure sufficient space for ViewID:
-    DxilSignatureAllocator::DummyElement viewID;
-    viewID.rows = 1;
-    viewID.cols = 1;
-    viewID.kind = DXIL::SemanticKind::Arbitrary;
-    viewID.interpolation = DXIL::InterpolationMode::Constant;
-    viewID.interpretation = DXIL::SemanticInterpretationKind::SGV;
-    allocator[0].PackNext(&viewID, 0, 32);
-    if (!viewID.IsAllocated()) {
+    // Ensure sufficient space for ViewId:
+    DxilSignatureAllocator::DummyElement ViewId;
+    ViewId.rows = 1;
+    ViewId.cols = 1;
+    ViewId.kind = DXIL::SemanticKind::Arbitrary;
+    ViewId.interpolation = DXIL::InterpolationMode::Constant;
+    ViewId.interpretation = DXIL::SemanticInterpretationKind::SGV;
+    Allocator[0].PackNext(&ViewId, 0, 32);
+    if (!ViewId.IsAllocated()) {
       ValCtx.EmitError(ValidationRule::SmViewIDNeedsSlot);
     }
   }
@@ -4616,12 +4616,12 @@ static void ValidateConstantInterpModeSignature(ValidationContext &ValCtx,
 }
 
 static void ValidateEntrySignatures(ValidationContext &ValCtx,
-                                    const DxilEntryProps &entryProps,
+                                    const DxilEntryProps &EntryProps,
                                     EntryStatus &Status, Function &F) {
-  const DxilFunctionProps &props = entryProps.props;
-  const DxilEntrySignature &S = entryProps.sig;
+  const DxilFunctionProps &Props = EntryProps.props;
+  const DxilEntrySignature &S = EntryProps.sig;
 
-  if (props.IsRay()) {
+  if (Props.IsRay()) {
     // No signatures allowed
     if (!S.InputSignature.GetElements().empty() ||
         !S.OutputSignature.GetElements().empty() ||
@@ -4631,62 +4631,62 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
     }
 
     // Validate payload/attribute/params sizes
-    unsigned payloadSize = 0;
-    unsigned attrSize = 0;
-    auto itPayload = F.arg_begin();
-    auto itAttr = itPayload;
-    if (itAttr != F.arg_end())
-      itAttr++;
+    unsigned PayloadSize = 0;
+    unsigned AttrSize = 0;
+    auto ItPayload = F.arg_begin();
+    auto ItAttr = ItPayload;
+    if (ItAttr != F.arg_end())
+      ItAttr++;
     DataLayout DL(F.getParent());
-    switch (props.shaderKind) {
+    switch (Props.shaderKind) {
     case DXIL::ShaderKind::AnyHit:
     case DXIL::ShaderKind::ClosestHit:
-      if (itAttr != F.arg_end()) {
-        Type *Ty = itAttr->getType();
+      if (ItAttr != F.arg_end()) {
+        Type *Ty = ItAttr->getType();
         if (Ty->isPointerTy())
           Ty = Ty->getPointerElementType();
-        attrSize =
+        AttrSize =
             (unsigned)std::min(DL.getTypeAllocSize(Ty), (uint64_t)UINT_MAX);
       }
       LLVM_FALLTHROUGH;
     case DXIL::ShaderKind::Miss:
     case DXIL::ShaderKind::Callable:
-      if (itPayload != F.arg_end()) {
-        Type *Ty = itPayload->getType();
+      if (ItPayload != F.arg_end()) {
+        Type *Ty = ItPayload->getType();
         if (Ty->isPointerTy())
           Ty = Ty->getPointerElementType();
-        payloadSize =
+        PayloadSize =
             (unsigned)std::min(DL.getTypeAllocSize(Ty), (uint64_t)UINT_MAX);
       }
       break;
     }
-    if (props.ShaderProps.Ray.payloadSizeInBytes < payloadSize) {
+    if (Props.ShaderProps.Ray.payloadSizeInBytes < PayloadSize) {
       ValCtx.EmitFnFormatError(
           &F, ValidationRule::SmRayShaderPayloadSize,
-          {F.getName(), props.IsCallable() ? "params" : "payload"});
+          {F.getName(), Props.IsCallable() ? "params" : "payload"});
     }
-    if (props.ShaderProps.Ray.attributeSizeInBytes < attrSize) {
+    if (Props.ShaderProps.Ray.attributeSizeInBytes < AttrSize) {
       ValCtx.EmitFnFormatError(&F, ValidationRule::SmRayShaderPayloadSize,
                                {F.getName(), "attribute"});
     }
     return;
   }
 
-  bool isPS = props.IsPS();
-  bool isVS = props.IsVS();
-  bool isGS = props.IsGS();
-  bool isCS = props.IsCS();
-  bool isMS = props.IsMS();
+  bool IsPs = Props.IsPS();
+  bool IsVs = Props.IsVS();
+  bool IsGs = Props.IsGS();
+  bool IsCs = Props.IsCS();
+  bool IsMs = Props.IsMS();
 
-  if (isPS) {
+  if (IsPs) {
     // PS output no interp mode.
     ValidateNoInterpModeSignature(ValCtx, S.OutputSignature);
-  } else if (isVS) {
+  } else if (IsVs) {
     // VS input no interp mode.
     ValidateNoInterpModeSignature(ValCtx, S.InputSignature);
   }
 
-  if (isMS) {
+  if (IsMs) {
     // primitive output constant interp mode.
     ValidateConstantInterpModeSignature(ValCtx, S.PatchConstOrPrimSignature);
   } else {
@@ -4694,38 +4694,38 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
     ValidateNoInterpModeSignature(ValCtx, S.PatchConstOrPrimSignature);
   }
 
-  unsigned maxInputScalars = DXIL::kMaxInputTotalScalars;
-  unsigned maxOutputScalars = 0;
-  unsigned maxPatchConstantScalars = 0;
+  unsigned MaxInputScalars = DXIL::kMaxInputTotalScalars;
+  unsigned MaxOutputScalars = 0;
+  unsigned MaxPatchConstantScalars = 0;
 
-  switch (props.shaderKind) {
+  switch (Props.shaderKind) {
   case DXIL::ShaderKind::Compute:
     break;
   case DXIL::ShaderKind::Vertex:
   case DXIL::ShaderKind::Geometry:
   case DXIL::ShaderKind::Pixel:
-    maxOutputScalars = DXIL::kMaxOutputTotalScalars;
+    MaxOutputScalars = DXIL::kMaxOutputTotalScalars;
     break;
   case DXIL::ShaderKind::Hull:
   case DXIL::ShaderKind::Domain:
-    maxOutputScalars = DXIL::kMaxOutputTotalScalars;
-    maxPatchConstantScalars = DXIL::kMaxHSOutputPatchConstantTotalScalars;
+    MaxOutputScalars = DXIL::kMaxOutputTotalScalars;
+    MaxPatchConstantScalars = DXIL::kMaxHSOutputPatchConstantTotalScalars;
     break;
   case DXIL::ShaderKind::Mesh:
-    maxOutputScalars = DXIL::kMaxOutputTotalScalars;
-    maxPatchConstantScalars = DXIL::kMaxOutputTotalScalars;
+    MaxOutputScalars = DXIL::kMaxOutputTotalScalars;
+    MaxPatchConstantScalars = DXIL::kMaxOutputTotalScalars;
     break;
   case DXIL::ShaderKind::Amplification:
   default:
     break;
   }
 
-  ValidateSignature(ValCtx, S.InputSignature, Status, maxInputScalars);
-  ValidateSignature(ValCtx, S.OutputSignature, Status, maxOutputScalars);
+  ValidateSignature(ValCtx, S.InputSignature, Status, MaxInputScalars);
+  ValidateSignature(ValCtx, S.OutputSignature, Status, MaxOutputScalars);
   ValidateSignature(ValCtx, S.PatchConstOrPrimSignature, Status,
-                    maxPatchConstantScalars);
+                    MaxPatchConstantScalars);
 
-  if (isPS) {
+  if (IsPs) {
     // Gather execution information.
     hlsl::PSExecutionInfo PSExec;
     DxilSignatureElement *PosInterpSE = nullptr;
@@ -4767,10 +4767,10 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
     }
 
     // Validate PS output semantic.
-    const DxilSignature &outputSig = S.OutputSignature;
-    for (auto &SE : outputSig.GetElements()) {
-      Semantic::Kind semanticKind = SE->GetSemantic()->GetKind();
-      switch (semanticKind) {
+    const DxilSignature &OutputSig = S.OutputSignature;
+    for (auto &SE : OutputSig.GetElements()) {
+      Semantic::Kind SemanticKind = SE->GetSemantic()->GetKind();
+      switch (SemanticKind) {
       case Semantic::Kind::Target:
       case Semantic::Kind::Coverage:
       case Semantic::Kind::Depth:
@@ -4786,24 +4786,24 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
     }
   }
 
-  if (isGS) {
-    unsigned maxVertexCount = props.ShaderProps.GS.maxVertexCount;
-    unsigned outputScalarCount = 0;
-    const DxilSignature &outSig = S.OutputSignature;
-    for (auto &SE : outSig.GetElements()) {
-      outputScalarCount += SE->GetRows() * SE->GetCols();
+  if (IsGs) {
+    unsigned MaxVertexCount = Props.ShaderProps.GS.maxVertexCount;
+    unsigned OutputScalarCount = 0;
+    const DxilSignature &OutSig = S.OutputSignature;
+    for (auto &SE : OutSig.GetElements()) {
+      OutputScalarCount += SE->GetRows() * SE->GetCols();
     }
-    unsigned totalOutputScalars = maxVertexCount * outputScalarCount;
-    if (totalOutputScalars > DXIL::kMaxGSOutputTotalScalars) {
+    unsigned TotalOutputScalars = MaxVertexCount * OutputScalarCount;
+    if (TotalOutputScalars > DXIL::kMaxGSOutputTotalScalars) {
       ValCtx.EmitFnFormatError(
           &F, ValidationRule::SmGSTotalOutputVertexDataRange,
-          {std::to_string(maxVertexCount), std::to_string(outputScalarCount),
-           std::to_string(totalOutputScalars),
+          {std::to_string(MaxVertexCount), std::to_string(OutputScalarCount),
+           std::to_string(TotalOutputScalars),
            std::to_string(DXIL::kMaxGSOutputTotalScalars)});
     }
   }
 
-  if (isCS) {
+  if (IsCs) {
     if (!S.InputSignature.GetElements().empty() ||
         !S.OutputSignature.GetElements().empty() ||
         !S.PatchConstOrPrimSignature.GetElements().empty()) {
@@ -4811,7 +4811,7 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
     }
   }
 
-  if (isMS) {
+  if (IsMs) {
     unsigned VertexSignatureRows = S.OutputSignature.GetRowCount();
     if (VertexSignatureRows > DXIL::kMaxMSVSigRows) {
       ValCtx.EmitFnFormatError(
@@ -4833,31 +4833,31 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
 
     const unsigned kScalarSizeForMSAttributes = 4;
 #define ALIGN32(n) (((n) + 31) & ~31)
-    unsigned maxAlign32VertexCount =
-        ALIGN32(props.ShaderProps.MS.maxVertexCount);
-    unsigned maxAlign32PrimitiveCount =
-        ALIGN32(props.ShaderProps.MS.maxPrimitiveCount);
-    unsigned totalOutputScalars = 0;
+    unsigned MaxAlign32VertexCount =
+        ALIGN32(Props.ShaderProps.MS.maxVertexCount);
+    unsigned MaxAlign32PrimitiveCount =
+        ALIGN32(Props.ShaderProps.MS.maxPrimitiveCount);
+    unsigned TotalOutputScalars = 0;
     for (auto &SE : S.OutputSignature.GetElements()) {
-      totalOutputScalars +=
-          SE->GetRows() * SE->GetCols() * maxAlign32VertexCount;
+      TotalOutputScalars +=
+          SE->GetRows() * SE->GetCols() * MaxAlign32VertexCount;
     }
     for (auto &SE : S.PatchConstOrPrimSignature.GetElements()) {
-      totalOutputScalars +=
-          SE->GetRows() * SE->GetCols() * maxAlign32PrimitiveCount;
+      TotalOutputScalars +=
+          SE->GetRows() * SE->GetCols() * MaxAlign32PrimitiveCount;
     }
 
-    if (totalOutputScalars * kScalarSizeForMSAttributes >
+    if (TotalOutputScalars * kScalarSizeForMSAttributes >
         DXIL::kMaxMSOutputTotalBytes) {
       ValCtx.EmitFnFormatError(
           &F, ValidationRule::SmMeshShaderOutputSize,
           {F.getName(), std::to_string(DXIL::kMaxMSOutputTotalBytes)});
     }
 
-    unsigned totalInputOutputBytes =
-        totalOutputScalars * kScalarSizeForMSAttributes +
-        props.ShaderProps.MS.payloadSizeInBytes;
-    if (totalInputOutputBytes > DXIL::kMaxMSInputOutputTotalBytes) {
+    unsigned TotalInputOutputBytes =
+        TotalOutputScalars * kScalarSizeForMSAttributes +
+        Props.ShaderProps.MS.payloadSizeInBytes;
+    if (TotalInputOutputBytes > DXIL::kMaxMSInputOutputTotalBytes) {
       ValCtx.EmitFnFormatError(
           &F, ValidationRule::SmMeshShaderInOutSize,
           {F.getName(), std::to_string(DXIL::kMaxMSInputOutputTotalBytes)});
@@ -4870,9 +4870,9 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) {
   if (ValCtx.isLibProfile) {
     for (Function &F : DM.GetModule()->functions()) {
       if (DM.HasDxilEntryProps(&F)) {
-        DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F);
+        DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F);
         EntryStatus &Status = ValCtx.GetEntryStatus(&F);
-        ValidateEntrySignatures(ValCtx, entryProps, Status, F);
+        ValidateEntrySignatures(ValCtx, EntryProps, Status, F);
       }
     }
   } else {
@@ -4883,8 +4883,8 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) {
       return;
     }
     EntryStatus &Status = ValCtx.GetEntryStatus(Entry);
-    DxilEntryProps &entryProps = DM.GetDxilEntryProps(Entry);
-    ValidateEntrySignatures(ValCtx, entryProps, Status, *Entry);
+    DxilEntryProps &EntryProps = DM.GetDxilEntryProps(Entry);
+    ValidateEntrySignatures(ValCtx, EntryProps, Status, *Entry);
   }
 }
 
@@ -4893,14 +4893,14 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) {
 struct CompatibilityChecker {
   ValidationContext &ValCtx;
   Function *EntryFn;
-  const DxilFunctionProps &props;
-  DXIL::ShaderKind shaderKind;
+  const DxilFunctionProps &Props;
+  DXIL::ShaderKind ShaderKind;
 
   // These masks identify the potential conflict flags based on the entry
   // function's shader kind and properties when either UsesDerivatives or
   // RequiresGroup flags are set in ShaderCompatInfo.
-  uint32_t maskForDeriv = 0;
-  uint32_t maskForGroup = 0;
+  uint32_t MaskForDeriv = 0;
+  uint32_t MaskForGroup = 0;
 
   enum class ConflictKind : uint32_t {
     Stage,
@@ -4922,77 +4922,77 @@ struct CompatibilityChecker {
 
   CompatibilityChecker(ValidationContext &ValCtx, Function *EntryFn)
       : ValCtx(ValCtx), EntryFn(EntryFn),
-        props(ValCtx.DxilMod.GetDxilEntryProps(EntryFn).props),
-        shaderKind(props.shaderKind) {
+        Props(ValCtx.DxilMod.GetDxilEntryProps(EntryFn).props),
+        ShaderKind(Props.shaderKind) {
 
     // Precompute potential incompatibilities based on shader stage, shader kind
     // and entry attributes. These will turn into full conflicts if the entry
     // point's shader flags indicate that they use relevant features.
     if (!ValCtx.DxilMod.GetShaderModel()->IsSM66Plus() &&
-        (shaderKind == DXIL::ShaderKind::Mesh ||
-         shaderKind == DXIL::ShaderKind::Amplification ||
-         shaderKind == DXIL::ShaderKind::Compute)) {
-      maskForDeriv |=
+        (ShaderKind == DXIL::ShaderKind::Mesh ||
+         ShaderKind == DXIL::ShaderKind::Amplification ||
+         ShaderKind == DXIL::ShaderKind::Compute)) {
+      MaskForDeriv |=
           static_cast<uint32_t>(ConflictFlags::DerivInComputeShaderModel);
-    } else if (shaderKind == DXIL::ShaderKind::Node) {
+    } else if (ShaderKind == DXIL::ShaderKind::Node) {
       // Only broadcasting launch supports derivatives.
-      if (props.Node.LaunchType != DXIL::NodeLaunchType::Broadcasting)
-        maskForDeriv |= static_cast<uint32_t>(ConflictFlags::DerivLaunch);
+      if (Props.Node.LaunchType != DXIL::NodeLaunchType::Broadcasting)
+        MaskForDeriv |= static_cast<uint32_t>(ConflictFlags::DerivLaunch);
       // Thread launch node has no group.
-      if (props.Node.LaunchType == DXIL::NodeLaunchType::Thread)
-        maskForGroup |= static_cast<uint32_t>(ConflictFlags::RequiresGroup);
+      if (Props.Node.LaunchType == DXIL::NodeLaunchType::Thread)
+        MaskForGroup |= static_cast<uint32_t>(ConflictFlags::RequiresGroup);
     }
 
-    if (shaderKind == DXIL::ShaderKind::Mesh ||
-        shaderKind == DXIL::ShaderKind::Amplification ||
-        shaderKind == DXIL::ShaderKind::Compute ||
-        shaderKind == DXIL::ShaderKind::Node) {
+    if (ShaderKind == DXIL::ShaderKind::Mesh ||
+        ShaderKind == DXIL::ShaderKind::Amplification ||
+        ShaderKind == DXIL::ShaderKind::Compute ||
+        ShaderKind == DXIL::ShaderKind::Node) {
       // All compute-like stages
       // Thread dimensions must be either 1D and X is multiple of 4, or 2D
       // and X and Y must be multiples of 2.
-      if (props.numThreads[1] == 1 && props.numThreads[2] == 1) {
-        if ((props.numThreads[0] & 0x3) != 0)
-          maskForDeriv |=
+      if (Props.numThreads[1] == 1 && Props.numThreads[2] == 1) {
+        if ((Props.numThreads[0] & 0x3) != 0)
+          MaskForDeriv |=
               static_cast<uint32_t>(ConflictFlags::DerivThreadGroupDim);
-      } else if ((props.numThreads[0] & 0x1) || (props.numThreads[1] & 0x1))
-        maskForDeriv |=
+      } else if ((Props.numThreads[0] & 0x1) || (Props.numThreads[1] & 0x1))
+        MaskForDeriv |=
             static_cast<uint32_t>(ConflictFlags::DerivThreadGroupDim);
     } else {
       // other stages have no group
-      maskForGroup |= static_cast<uint32_t>(ConflictFlags::RequiresGroup);
+      MaskForGroup |= static_cast<uint32_t>(ConflictFlags::RequiresGroup);
     }
   }
 
   uint32_t
-  IdentifyConflict(const DxilModule::ShaderCompatInfo &compatInfo) const {
-    uint32_t conflictMask = 0;
+  IdentifyConflict(const DxilModule::ShaderCompatInfo &CompatInfo) const {
+    uint32_t ConflictMask = 0;
 
     // Compatibility check said this shader kind is not compatible.
-    if (0 == ((1 << (uint32_t)shaderKind) & compatInfo.mask))
-      conflictMask |= (uint32_t)ConflictFlags::Stage;
+    if (0 == ((1 << (uint32_t)ShaderKind) & CompatInfo.mask))
+      ConflictMask |= (uint32_t)ConflictFlags::Stage;
 
     // Compatibility check said this shader model is not compatible.
     if (DXIL::CompareVersions(ValCtx.DxilMod.GetShaderModel()->GetMajor(),
                               ValCtx.DxilMod.GetShaderModel()->GetMinor(),
-                              compatInfo.minMajor, compatInfo.minMinor) < 0)
-      conflictMask |= (uint32_t)ConflictFlags::ShaderModel;
+                              CompatInfo.minMajor, CompatInfo.minMinor) < 0)
+      ConflictMask |= (uint32_t)ConflictFlags::ShaderModel;
 
-    if (compatInfo.shaderFlags.GetUsesDerivatives())
-      conflictMask |= maskForDeriv;
+    if (CompatInfo.shaderFlags.GetUsesDerivatives())
+      ConflictMask |= MaskForDeriv;
 
-    if (compatInfo.shaderFlags.GetRequiresGroup())
-      conflictMask |= maskForGroup;
+    if (CompatInfo.shaderFlags.GetRequiresGroup())
+      ConflictMask |= MaskForGroup;
 
-    return conflictMask;
+    return ConflictMask;
   }
 
-  void Diagnose(Function *F, uint32_t conflictMask, ConflictKind conflict,
-                ValidationRule rule, ArrayRef<StringRef> args = {}) {
-    if (conflictMask & (1 << (unsigned)conflict))
-      ValCtx.EmitFnFormatError(F, rule, args);
+  void Diagnose(Function *F, uint32_t ConflictMask, ConflictKind Conflict,
+                ValidationRule Rule, ArrayRef<StringRef> Args = {}) {
+    if (ConflictMask & (1 << (unsigned)Conflict))
+      ValCtx.EmitFnFormatError(F, Rule, Args);
   }
 
-  void DiagnoseConflicts(Function *F, uint32_t conflictMask) {
+  void DiagnoseConflicts(Function *F, uint32_t ConflictMask) {
     // Emit a diagnostic indicating that either the entry function or a function
     // called by the entry function contains a disallowed operation.
     if (F == EntryFn)
@@ -5001,22 +5001,22 @@ struct CompatibilityChecker {
       ValCtx.EmitFnError(EntryFn, ValidationRule::SmIncompatibleCallInEntry);
 
     // Emit diagnostics for each conflict found in this function.
-    Diagnose(F, conflictMask, ConflictKind::Stage,
+    Diagnose(F, ConflictMask, ConflictKind::Stage,
              ValidationRule::SmIncompatibleStage,
-             {ShaderModel::GetKindName(props.shaderKind)});
-    Diagnose(F, conflictMask, ConflictKind::ShaderModel,
+             {ShaderModel::GetKindName(Props.shaderKind)});
+    Diagnose(F, ConflictMask, ConflictKind::ShaderModel,
              ValidationRule::SmIncompatibleShaderModel);
-    Diagnose(F, conflictMask, ConflictKind::DerivLaunch,
+    Diagnose(F, ConflictMask, ConflictKind::DerivLaunch,
              ValidationRule::SmIncompatibleDerivLaunch,
-             {GetLaunchTypeStr(props.Node.LaunchType)});
-    Diagnose(F, conflictMask, ConflictKind::DerivThreadGroupDim,
+             {GetLaunchTypeStr(Props.Node.LaunchType)});
+    Diagnose(F, ConflictMask, ConflictKind::DerivThreadGroupDim,
              ValidationRule::SmIncompatibleThreadGroupDim,
-             {std::to_string(props.numThreads[0]),
-              std::to_string(props.numThreads[1]),
-              std::to_string(props.numThreads[2])});
-    Diagnose(F, conflictMask, ConflictKind::DerivInComputeShaderModel,
+             {std::to_string(Props.numThreads[0]),
+              std::to_string(Props.numThreads[1]),
+              std::to_string(Props.numThreads[2])});
+    Diagnose(F, ConflictMask, ConflictKind::DerivInComputeShaderModel,
              ValidationRule::SmIncompatibleDerivInComputeShaderModel);
-    Diagnose(F, conflictMask, ConflictKind::RequiresGroup,
+    Diagnose(F, ConflictMask, ConflictKind::RequiresGroup,
              ValidationRule::SmIncompatibleRequiresGroup);
   }
 
@@ -5025,59 +5025,59 @@ struct CompatibilityChecker {
   // functions called by that function introduced the conflict.
   // In those cases, the called functions themselves will emit the diagnostic.
   // Return conflict mask for this function.
-  uint32_t Visit(Function *F, uint32_t &remainingMask,
-                 llvm::SmallPtrSet<Function *, 8> &visited, CallGraph &CG) {
+  uint32_t Visit(Function *F, uint32_t &RemainingMask,
+                 llvm::SmallPtrSet<Function *, 8> &Visited, CallGraph &CG) {
     // Recursive check looks for where a conflict is found and not present
     // in functions called by the current function.
     // - When a source is found, emit diagnostics and clear the conflict
     // flags introduced by this function from the working mask so we don't
     // report this conflict again.
-    // - When the remainingMask is 0, we are done.
+    // - When the RemainingMask is 0, we are done.
 
-    if (remainingMask == 0)
+    if (RemainingMask == 0)
       return 0; // Nothing left to search for.
-    if (!visited.insert(F).second)
+    if (!Visited.insert(F).second)
       return 0; // Already visited.
 
-    const DxilModule::ShaderCompatInfo *compatInfo =
+    const DxilModule::ShaderCompatInfo *CompatInfo =
         ValCtx.DxilMod.GetCompatInfoForFunction(F);
-    DXASSERT(compatInfo, "otherwise, compat info not computed in module");
-    if (!compatInfo)
+    DXASSERT(CompatInfo, "otherwise, compat info not computed in module");
+    if (!CompatInfo)
       return 0;
-    uint32_t maskForThisFunction = IdentifyConflict(*compatInfo);
+    uint32_t MaskForThisFunction = IdentifyConflict(*CompatInfo);
 
-    uint32_t maskForCalls = 0;
+    uint32_t MaskForCalls = 0;
     if (CallGraphNode *CGNode = CG[F]) {
       for (auto &Call : *CGNode) {
         Function *called = Call.second->getFunction();
         if (called->isDeclaration())
           continue;
-        maskForCalls |= Visit(called, remainingMask, visited, CG);
-        if (remainingMask == 0)
+        MaskForCalls |= Visit(called, RemainingMask, Visited, CG);
+        if (RemainingMask == 0)
           return 0; // Nothing left to search for.
       }
     }
 
     // Mask of incompatibilities introduced by this function.
-    uint32_t conflictsIntroduced =
-        remainingMask & maskForThisFunction & ~maskForCalls;
-    if (conflictsIntroduced) {
+    uint32_t ConflictsIntroduced =
+        RemainingMask & MaskForThisFunction & ~MaskForCalls;
+    if (ConflictsIntroduced) {
       // This function introduces at least one conflict.
-      DiagnoseConflicts(F, conflictsIntroduced);
+      DiagnoseConflicts(F, ConflictsIntroduced);
       // Mask off diagnosed incompatibilities.
-      remainingMask &= ~conflictsIntroduced;
+      RemainingMask &= ~ConflictsIntroduced;
     }
-    return maskForThisFunction;
+    return MaskForThisFunction;
   }
 
-  void FindIncompatibleCall(const DxilModule::ShaderCompatInfo &compatInfo) {
-    uint32_t conflictMask = IdentifyConflict(compatInfo);
-    if (conflictMask == 0)
+  void FindIncompatibleCall(const DxilModule::ShaderCompatInfo &CompatInfo) {
+    uint32_t ConflictMask = IdentifyConflict(CompatInfo);
+    if (ConflictMask == 0)
       return;
 
     CallGraph &CG = ValCtx.GetCallGraph();
-    llvm::SmallPtrSet<Function *, 8> visited;
-    Visit(EntryFn, conflictMask, visited, CG);
+    llvm::SmallPtrSet<Function *, 8> Visited;
+    Visit(EntryFn, ConflictMask, Visited, CG);
   }
 };
 
@@ -5086,14 +5086,14 @@ static void ValidateEntryCompatibility(ValidationContext &ValCtx) {
   DxilModule &DM = ValCtx.DxilMod;
   for (Function &F : DM.GetModule()->functions()) {
     if (DM.HasDxilEntryProps(&F)) {
-      const DxilModule::ShaderCompatInfo *compatInfo =
+      const DxilModule::ShaderCompatInfo *CompatInfo =
           DM.GetCompatInfoForFunction(&F);
-      DXASSERT(compatInfo, "otherwise, compat info not computed in module");
-      if (!compatInfo)
+      DXASSERT(CompatInfo, "otherwise, compat info not computed in module");
+      if (!CompatInfo)
         continue;
 
       CompatibilityChecker checker(ValCtx, &F);
-      checker.FindIncompatibleCall(*compatInfo);
+      checker.FindIncompatibleCall(*CompatInfo);
     }
   }
 }
@@ -5101,101 +5101,101 @@ static void ValidateEntryCompatibility(ValidationContext &ValCtx) {
 static void CheckPatchConstantSemantic(ValidationContext &ValCtx,
                                        const DxilEntryProps &EntryProps,
                                        EntryStatus &Status, Function *F) {
-  const DxilFunctionProps &props = EntryProps.props;
-  bool isHS = props.IsHS();
+  const DxilFunctionProps &Props = EntryProps.props;
+  bool IsHs = Props.IsHS();
 
-  DXIL::TessellatorDomain domain =
-      isHS ? props.ShaderProps.HS.domain : props.ShaderProps.DS.domain;
+  DXIL::TessellatorDomain Domain =
+      IsHs ? Props.ShaderProps.HS.domain : Props.ShaderProps.DS.domain;
 
-  const DxilSignature &patchConstantSig =
+  const DxilSignature &PatchConstantSig =
       EntryProps.sig.PatchConstOrPrimSignature;
 
-  const unsigned kQuadEdgeSize = 4;
-  const unsigned kQuadInsideSize = 2;
-  const unsigned kQuadDomainLocSize = 2;
+  const unsigned KQuadEdgeSize = 4;
+  const unsigned KQuadInsideSize = 2;
+  const unsigned KQuadDomainLocSize = 2;
 
-  const unsigned kTriEdgeSize = 3;
-  const unsigned kTriInsideSize = 1;
-  const unsigned kTriDomainLocSize = 3;
+  const unsigned KTriEdgeSize = 3;
+  const unsigned KTriInsideSize = 1;
+  const unsigned KTriDomainLocSize = 3;
 
-  const unsigned kIsolineEdgeSize = 2;
-  const unsigned kIsolineInsideSize = 0;
-  const unsigned kIsolineDomainLocSize = 3;
+  const unsigned KIsolineEdgeSize = 2;
+  const unsigned KIsolineInsideSize = 0;
+  const unsigned KIsolineDomainLocSize = 3;
 
-  const char *domainName = "";
+  const char *DomainName = "";
 
   DXIL::SemanticKind kEdgeSemantic = DXIL::SemanticKind::TessFactor;
-  unsigned edgeSize = 0;
+  unsigned EdgeSize = 0;
 
   DXIL::SemanticKind kInsideSemantic = DXIL::SemanticKind::InsideTessFactor;
-  unsigned insideSize = 0;
+  unsigned InsideSize = 0;
 
   Status.domainLocSize = 0;
 
-  switch (domain) {
+  switch (Domain) {
   case DXIL::TessellatorDomain::IsoLine:
-    domainName = "IsoLine";
-    edgeSize = kIsolineEdgeSize;
-    insideSize = kIsolineInsideSize;
-    Status.domainLocSize = kIsolineDomainLocSize;
+    DomainName = "IsoLine";
+    EdgeSize = KIsolineEdgeSize;
+    InsideSize = KIsolineInsideSize;
+    Status.domainLocSize = KIsolineDomainLocSize;
     break;
   case DXIL::TessellatorDomain::Tri:
-    domainName = "Tri";
-    edgeSize = kTriEdgeSize;
-    insideSize = kTriInsideSize;
-    Status.domainLocSize = kTriDomainLocSize;
+    DomainName = "Tri";
+    EdgeSize = KTriEdgeSize;
+    InsideSize = KTriInsideSize;
+    Status.domainLocSize = KTriDomainLocSize;
     break;
   case DXIL::TessellatorDomain::Quad:
-    domainName = "Quad";
-    edgeSize = kQuadEdgeSize;
-    insideSize = kQuadInsideSize;
-    Status.domainLocSize = kQuadDomainLocSize;
+    DomainName = "Quad";
+    EdgeSize = KQuadEdgeSize;
+    InsideSize = KQuadInsideSize;
+    Status.domainLocSize = KQuadDomainLocSize;
     break;
   default:
     // Don't bother with other tests if domain is invalid
     return;
   }
 
-  bool bFoundEdgeSemantic = false;
-  bool bFoundInsideSemantic = false;
-  for (auto &SE : patchConstantSig.GetElements()) {
-    Semantic::Kind kind = SE->GetSemantic()->GetKind();
-    if (kind == kEdgeSemantic) {
-      bFoundEdgeSemantic = true;
-      if (SE->GetRows() != edgeSize || SE->GetCols() > 1) {
+  bool FoundEdgeSemantic = false;
+  bool FoundInsideSemantic = false;
+  for (auto &SE : PatchConstantSig.GetElements()) {
+    Semantic::Kind Kind = SE->GetSemantic()->GetKind();
+    if (Kind == kEdgeSemantic) {
+      FoundEdgeSemantic = true;
+      if (SE->GetRows() != EdgeSize || SE->GetCols() > 1) {
         ValCtx.EmitFnFormatError(F, ValidationRule::SmTessFactorSizeMatchDomain,
                                  {std::to_string(SE->GetRows()),
-                                  std::to_string(SE->GetCols()), domainName,
-                                  std::to_string(edgeSize)});
+                                  std::to_string(SE->GetCols()), DomainName,
+                                  std::to_string(EdgeSize)});
       }
-    } else if (kind == kInsideSemantic) {
-      bFoundInsideSemantic = true;
-      if (SE->GetRows() != insideSize || SE->GetCols() > 1) {
+    } else if (Kind == kInsideSemantic) {
+      FoundInsideSemantic = true;
+      if (SE->GetRows() != InsideSize || SE->GetCols() > 1) {
         ValCtx.EmitFnFormatError(
             F, ValidationRule::SmInsideTessFactorSizeMatchDomain,
             {std::to_string(SE->GetRows()), std::to_string(SE->GetCols()),
-             domainName, std::to_string(insideSize)});
+             DomainName, std::to_string(InsideSize)});
       }
     }
   }
 
-  if (isHS) {
-    if (!bFoundEdgeSemantic) {
+  if (IsHs) {
+    if (!FoundEdgeSemantic) {
       ValCtx.EmitFnError(F, ValidationRule::SmTessFactorForDomain);
     }
-    if (!bFoundInsideSemantic && domain != DXIL::TessellatorDomain::IsoLine) {
+    if (!FoundInsideSemantic && Domain != DXIL::TessellatorDomain::IsoLine) {
       ValCtx.EmitFnError(F, ValidationRule::SmTessFactorForDomain);
     }
   }
 }
 
 static void ValidatePassThruHS(ValidationContext &ValCtx,
-                               const DxilEntryProps &entryProps, Function *F) {
+                               const DxilEntryProps &EntryProps, Function *F) {
   // Check pass thru HS.
   if (F->isDeclaration()) {
-    const auto &props = entryProps.props;
-    if (props.IsHS()) {
-      const auto &HS = props.ShaderProps.HS;
+    const auto &Props = EntryProps.props;
+    if (Props.IsHS()) {
+      const auto &HS = Props.ShaderProps.HS;
       if (HS.inputControlPoints < HS.outputControlPoints) {
         ValCtx.EmitFnError(
             F, ValidationRule::SmHullPassThruControlPointCountMatch);
@@ -5203,12 +5203,12 @@ static void ValidatePassThruHS(ValidationContext &ValCtx,
 
       // Check declared control point outputs storage amounts are ok to pass
       // through (less output storage than input for control points).
-      const DxilSignature &outSig = entryProps.sig.OutputSignature;
-      unsigned totalOutputCPScalars = 0;
-      for (auto &SE : outSig.GetElements()) {
-        totalOutputCPScalars += SE->GetRows() * SE->GetCols();
+      const DxilSignature &OutSig = EntryProps.sig.OutputSignature;
+      unsigned TotalOutputCpScalars = 0;
+      for (auto &SE : OutSig.GetElements()) {
+        TotalOutputCpScalars += SE->GetRows() * SE->GetCols();
       }
-      if (totalOutputCPScalars * HS.outputControlPoints >
+      if (TotalOutputCpScalars * HS.outputControlPoints >
           DXIL::kMaxHSOutputControlPointsTotalScalars) {
         ValCtx.EmitFnError(F,
                            ValidationRule::SmOutputControlPointsTotalScalars);
@@ -5223,35 +5223,35 @@ static void ValidatePassThruHS(ValidationContext &ValCtx,
 // validate wave size (currently allowed only on CS and node shaders but might
 // be supported on other shader types in the future)
 static void ValidateWaveSize(ValidationContext &ValCtx,
-                             const DxilEntryProps &entryProps, Function *F) {
-  const DxilFunctionProps &props = entryProps.props;
-  const hlsl::DxilWaveSize &waveSize = props.WaveSize;
+                             const DxilEntryProps &EntryProps, Function *F) {
+  const DxilFunctionProps &Props = EntryProps.props;
+  const hlsl::DxilWaveSize &WaveSize = Props.WaveSize;
 
-  switch (waveSize.Validate()) {
+  switch (WaveSize.Validate()) {
   case hlsl::DxilWaveSize::ValidationResult::Success:
     break;
   case hlsl::DxilWaveSize::ValidationResult::InvalidMin:
     ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue,
-                             {"Min", std::to_string(waveSize.Min),
+                             {"Min", std::to_string(WaveSize.Min),
                               std::to_string(DXIL::kMinWaveSize),
                               std::to_string(DXIL::kMaxWaveSize)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::InvalidMax:
     ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue,
-                             {"Max", std::to_string(waveSize.Max),
+                             {"Max", std::to_string(WaveSize.Max),
                               std::to_string(DXIL::kMinWaveSize),
                               std::to_string(DXIL::kMaxWaveSize)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::InvalidPreferred:
     ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue,
-                             {"Preferred", std::to_string(waveSize.Preferred),
+                             {"Preferred", std::to_string(WaveSize.Preferred),
                               std::to_string(DXIL::kMinWaveSize),
                               std::to_string(DXIL::kMaxWaveSize)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::MaxOrPreferredWhenUndefined:
     ValCtx.EmitFnFormatError(
         F, ValidationRule::SmWaveSizeAllZeroWhenUndefined,
-        {std::to_string(waveSize.Max), std::to_string(waveSize.Preferred)});
+        {std::to_string(WaveSize.Max), std::to_string(WaveSize.Preferred)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::MaxEqualsMin:
     // This case is allowed because users may disable the ErrorDefault warning.
@@ -5259,227 +5259,227 @@ static void ValidateWaveSize(ValidationContext &ValCtx,
   case hlsl::DxilWaveSize::ValidationResult::PreferredWhenNoRange:
     ValCtx.EmitFnFormatError(
         F, ValidationRule::SmWaveSizeMaxAndPreferredZeroWhenNoRange,
-        {std::to_string(waveSize.Max), std::to_string(waveSize.Preferred)});
+        {std::to_string(WaveSize.Max), std::to_string(WaveSize.Preferred)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::MaxLessThanMin:
     ValCtx.EmitFnFormatError(
         F, ValidationRule::SmWaveSizeMaxGreaterThanMin,
-        {std::to_string(waveSize.Max), std::to_string(waveSize.Min)});
+        {std::to_string(WaveSize.Max), std::to_string(WaveSize.Min)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::PreferredOutOfRange:
     ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizePreferredInRange,
-                             {std::to_string(waveSize.Preferred),
-                              std::to_string(waveSize.Min),
-                              std::to_string(waveSize.Max)});
+                             {std::to_string(WaveSize.Preferred),
+                              std::to_string(WaveSize.Min),
+                              std::to_string(WaveSize.Max)});
     break;
   }
 
   // Check shader model and kind.
-  if (waveSize.IsDefined()) {
-    if (!props.IsCS() && !props.IsNode()) {
+  if (WaveSize.IsDefined()) {
+    if (!Props.IsCS() && !Props.IsNode()) {
       ValCtx.EmitFnError(F, ValidationRule::SmWaveSizeOnComputeOrNode);
     }
   }
 }
 
 static void ValidateEntryProps(ValidationContext &ValCtx,
-                               const DxilEntryProps &entryProps,
+                               const DxilEntryProps &EntryProps,
                                EntryStatus &Status, Function *F) {
-  const DxilFunctionProps &props = entryProps.props;
-  DXIL::ShaderKind ShaderType = props.shaderKind;
+  const DxilFunctionProps &Props = EntryProps.props;
+  DXIL::ShaderKind ShaderType = Props.shaderKind;
 
-  ValidateWaveSize(ValCtx, entryProps, F);
+  ValidateWaveSize(ValCtx, EntryProps, F);
 
-  if (ShaderType == DXIL::ShaderKind::Compute || props.IsNode()) {
-    unsigned x = props.numThreads[0];
-    unsigned y = props.numThreads[1];
-    unsigned z = props.numThreads[2];
+  if (ShaderType == DXIL::ShaderKind::Compute || Props.IsNode()) {
+    unsigned X = Props.numThreads[0];
+    unsigned Y = Props.numThreads[1];
+    unsigned Z = Props.numThreads[2];
 
-    unsigned threadsInGroup = x * y * z;
+    unsigned ThreadsInGroup = X * Y * Z;
 
-    if ((x < DXIL::kMinCSThreadGroupX) || (x > DXIL::kMaxCSThreadGroupX)) {
+    if ((X < DXIL::kMinCSThreadGroupX) || (X > DXIL::kMaxCSThreadGroupX)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"X", std::to_string(x),
+                               {"X", std::to_string(X),
                                 std::to_string(DXIL::kMinCSThreadGroupX),
                                 std::to_string(DXIL::kMaxCSThreadGroupX)});
     }
-    if ((y < DXIL::kMinCSThreadGroupY) || (y > DXIL::kMaxCSThreadGroupY)) {
+    if ((Y < DXIL::kMinCSThreadGroupY) || (Y > DXIL::kMaxCSThreadGroupY)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Y", std::to_string(y),
+                               {"Y", std::to_string(Y),
                                 std::to_string(DXIL::kMinCSThreadGroupY),
                                 std::to_string(DXIL::kMaxCSThreadGroupY)});
     }
-    if ((z < DXIL::kMinCSThreadGroupZ) || (z > DXIL::kMaxCSThreadGroupZ)) {
+    if ((Z < DXIL::kMinCSThreadGroupZ) || (Z > DXIL::kMaxCSThreadGroupZ)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Z", std::to_string(z),
+                               {"Z", std::to_string(Z),
                                 std::to_string(DXIL::kMinCSThreadGroupZ),
                                 std::to_string(DXIL::kMaxCSThreadGroupZ)});
     }
 
-    if (threadsInGroup > DXIL::kMaxCSThreadsPerGroup) {
+    if (ThreadsInGroup > DXIL::kMaxCSThreadsPerGroup) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup,
-                               {std::to_string(threadsInGroup),
+                               {std::to_string(ThreadsInGroup),
                                 std::to_string(DXIL::kMaxCSThreadsPerGroup)});
     }
 
-    // type of threadID, thread group ID take care by DXIL operation overload
+    // type of ThreadID, thread group ID take care by DXIL operation overload
     // check.
   } else if (ShaderType == DXIL::ShaderKind::Mesh) {
-    const auto &MS = props.ShaderProps.MS;
-    unsigned x = props.numThreads[0];
-    unsigned y = props.numThreads[1];
-    unsigned z = props.numThreads[2];
+    const auto &MS = Props.ShaderProps.MS;
+    unsigned X = Props.numThreads[0];
+    unsigned Y = Props.numThreads[1];
+    unsigned Z = Props.numThreads[2];
 
-    unsigned threadsInGroup = x * y * z;
+    unsigned ThreadsInGroup = X * Y * Z;
 
-    if ((x < DXIL::kMinMSASThreadGroupX) || (x > DXIL::kMaxMSASThreadGroupX)) {
+    if ((X < DXIL::kMinMSASThreadGroupX) || (X > DXIL::kMaxMSASThreadGroupX)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"X", std::to_string(x),
+                               {"X", std::to_string(X),
                                 std::to_string(DXIL::kMinMSASThreadGroupX),
                                 std::to_string(DXIL::kMaxMSASThreadGroupX)});
     }
-    if ((y < DXIL::kMinMSASThreadGroupY) || (y > DXIL::kMaxMSASThreadGroupY)) {
+    if ((Y < DXIL::kMinMSASThreadGroupY) || (Y > DXIL::kMaxMSASThreadGroupY)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Y", std::to_string(y),
+                               {"Y", std::to_string(Y),
                                 std::to_string(DXIL::kMinMSASThreadGroupY),
                                 std::to_string(DXIL::kMaxMSASThreadGroupY)});
     }
-    if ((z < DXIL::kMinMSASThreadGroupZ) || (z > DXIL::kMaxMSASThreadGroupZ)) {
+    if ((Z < DXIL::kMinMSASThreadGroupZ) || (Z > DXIL::kMaxMSASThreadGroupZ)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Z", std::to_string(z),
+                               {"Z", std::to_string(Z),
                                 std::to_string(DXIL::kMinMSASThreadGroupZ),
                                 std::to_string(DXIL::kMaxMSASThreadGroupZ)});
     }
 
-    if (threadsInGroup > DXIL::kMaxMSASThreadsPerGroup) {
+    if (ThreadsInGroup > DXIL::kMaxMSASThreadsPerGroup) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup,
-                               {std::to_string(threadsInGroup),
+                               {std::to_string(ThreadsInGroup),
                                 std::to_string(DXIL::kMaxMSASThreadsPerGroup)});
     }
 
-    // type of threadID, thread group ID take care by DXIL operation overload
+    // type of ThreadID, thread group ID take care by DXIL operation overload
     // check.
 
-    unsigned maxVertexCount = MS.maxVertexCount;
-    if (maxVertexCount > DXIL::kMaxMSOutputVertexCount) {
+    unsigned MaxVertexCount = MS.maxVertexCount;
+    if (MaxVertexCount > DXIL::kMaxMSOutputVertexCount) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmMeshShaderMaxVertexCount,
                                {std::to_string(DXIL::kMaxMSOutputVertexCount),
-                                std::to_string(maxVertexCount)});
+                                std::to_string(MaxVertexCount)});
     }
 
-    unsigned maxPrimitiveCount = MS.maxPrimitiveCount;
-    if (maxPrimitiveCount > DXIL::kMaxMSOutputPrimitiveCount) {
+    unsigned MaxPrimitiveCount = MS.maxPrimitiveCount;
+    if (MaxPrimitiveCount > DXIL::kMaxMSOutputPrimitiveCount) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmMeshShaderMaxPrimitiveCount,
           {std::to_string(DXIL::kMaxMSOutputPrimitiveCount),
-           std::to_string(maxPrimitiveCount)});
+           std::to_string(MaxPrimitiveCount)});
     }
   } else if (ShaderType == DXIL::ShaderKind::Amplification) {
-    unsigned x = props.numThreads[0];
-    unsigned y = props.numThreads[1];
-    unsigned z = props.numThreads[2];
+    unsigned X = Props.numThreads[0];
+    unsigned Y = Props.numThreads[1];
+    unsigned Z = Props.numThreads[2];
 
-    unsigned threadsInGroup = x * y * z;
+    unsigned ThreadsInGroup = X * Y * Z;
 
-    if ((x < DXIL::kMinMSASThreadGroupX) || (x > DXIL::kMaxMSASThreadGroupX)) {
+    if ((X < DXIL::kMinMSASThreadGroupX) || (X > DXIL::kMaxMSASThreadGroupX)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"X", std::to_string(x),
+                               {"X", std::to_string(X),
                                 std::to_string(DXIL::kMinMSASThreadGroupX),
                                 std::to_string(DXIL::kMaxMSASThreadGroupX)});
     }
-    if ((y < DXIL::kMinMSASThreadGroupY) || (y > DXIL::kMaxMSASThreadGroupY)) {
+    if ((Y < DXIL::kMinMSASThreadGroupY) || (Y > DXIL::kMaxMSASThreadGroupY)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Y", std::to_string(y),
+                               {"Y", std::to_string(Y),
                                 std::to_string(DXIL::kMinMSASThreadGroupY),
                                 std::to_string(DXIL::kMaxMSASThreadGroupY)});
     }
-    if ((z < DXIL::kMinMSASThreadGroupZ) || (z > DXIL::kMaxMSASThreadGroupZ)) {
+    if ((Z < DXIL::kMinMSASThreadGroupZ) || (Z > DXIL::kMaxMSASThreadGroupZ)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Z", std::to_string(z),
+                               {"Z", std::to_string(Z),
                                 std::to_string(DXIL::kMinMSASThreadGroupZ),
                                 std::to_string(DXIL::kMaxMSASThreadGroupZ)});
     }
 
-    if (threadsInGroup > DXIL::kMaxMSASThreadsPerGroup) {
+    if (ThreadsInGroup > DXIL::kMaxMSASThreadsPerGroup) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup,
-                               {std::to_string(threadsInGroup),
+                               {std::to_string(ThreadsInGroup),
                                 std::to_string(DXIL::kMaxMSASThreadsPerGroup)});
     }
 
-    // type of threadID, thread group ID take care by DXIL operation overload
+    // type of ThreadID, thread group ID take care by DXIL operation overload
     // check.
   } else if (ShaderType == DXIL::ShaderKind::Domain) {
-    const auto &DS = props.ShaderProps.DS;
-    DXIL::TessellatorDomain domain = DS.domain;
-    if (domain >= DXIL::TessellatorDomain::LastEntry)
-      domain = DXIL::TessellatorDomain::Undefined;
-    unsigned inputControlPointCount = DS.inputControlPoints;
+    const auto &DS = Props.ShaderProps.DS;
+    DXIL::TessellatorDomain Domain = DS.domain;
+    if (Domain >= DXIL::TessellatorDomain::LastEntry)
+      Domain = DXIL::TessellatorDomain::Undefined;
+    unsigned InputControlPointCount = DS.inputControlPoints;
 
-    if (inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
+    if (InputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmDSInputControlPointCountRange,
           {std::to_string(DXIL::kMaxIAPatchControlPointCount),
-           std::to_string(inputControlPointCount)});
+           std::to_string(InputControlPointCount)});
     }
-    if (domain == DXIL::TessellatorDomain::Undefined) {
+    if (Domain == DXIL::TessellatorDomain::Undefined) {
       ValCtx.EmitFnError(F, ValidationRule::SmValidDomain);
     }
-    CheckPatchConstantSemantic(ValCtx, entryProps, Status, F);
+    CheckPatchConstantSemantic(ValCtx, EntryProps, Status, F);
   } else if (ShaderType == DXIL::ShaderKind::Hull) {
-    const auto &HS = props.ShaderProps.HS;
-    DXIL::TessellatorDomain domain = HS.domain;
-    if (domain >= DXIL::TessellatorDomain::LastEntry)
-      domain = DXIL::TessellatorDomain::Undefined;
-    unsigned inputControlPointCount = HS.inputControlPoints;
-    if (inputControlPointCount == 0) {
-      const DxilSignature &inputSig = entryProps.sig.InputSignature;
-      if (!inputSig.GetElements().empty()) {
+    const auto &HS = Props.ShaderProps.HS;
+    DXIL::TessellatorDomain Domain = HS.domain;
+    if (Domain >= DXIL::TessellatorDomain::LastEntry)
+      Domain = DXIL::TessellatorDomain::Undefined;
+    unsigned InputControlPointCount = HS.inputControlPoints;
+    if (InputControlPointCount == 0) {
+      const DxilSignature &InputSig = EntryProps.sig.InputSignature;
+      if (!InputSig.GetElements().empty()) {
         ValCtx.EmitFnError(F,
                            ValidationRule::SmZeroHSInputControlPointWithInput);
       }
-    } else if (inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
+    } else if (InputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmHSInputControlPointCountRange,
           {std::to_string(DXIL::kMaxIAPatchControlPointCount),
-           std::to_string(inputControlPointCount)});
+           std::to_string(InputControlPointCount)});
     }
 
-    unsigned outputControlPointCount = HS.outputControlPoints;
-    if (outputControlPointCount < DXIL::kMinIAPatchControlPointCount ||
-        outputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
+    unsigned OutputControlPointCount = HS.outputControlPoints;
+    if (OutputControlPointCount < DXIL::kMinIAPatchControlPointCount ||
+        OutputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmOutputControlPointCountRange,
           {std::to_string(DXIL::kMinIAPatchControlPointCount),
            std::to_string(DXIL::kMaxIAPatchControlPointCount),
-           std::to_string(outputControlPointCount)});
+           std::to_string(OutputControlPointCount)});
     }
-    if (domain == DXIL::TessellatorDomain::Undefined) {
+    if (Domain == DXIL::TessellatorDomain::Undefined) {
       ValCtx.EmitFnError(F, ValidationRule::SmValidDomain);
     }
-    DXIL::TessellatorPartitioning partition = HS.partition;
-    if (partition == DXIL::TessellatorPartitioning::Undefined) {
+    DXIL::TessellatorPartitioning Partition = HS.partition;
+    if (Partition == DXIL::TessellatorPartitioning::Undefined) {
       ValCtx.EmitFnError(F, ValidationRule::MetaTessellatorPartition);
     }
 
-    DXIL::TessellatorOutputPrimitive tessOutputPrimitive = HS.outputPrimitive;
-    if (tessOutputPrimitive == DXIL::TessellatorOutputPrimitive::Undefined ||
-        tessOutputPrimitive == DXIL::TessellatorOutputPrimitive::LastEntry) {
+    DXIL::TessellatorOutputPrimitive TessOutputPrimitive = HS.outputPrimitive;
+    if (TessOutputPrimitive == DXIL::TessellatorOutputPrimitive::Undefined ||
+        TessOutputPrimitive == DXIL::TessellatorOutputPrimitive::LastEntry) {
       ValCtx.EmitFnError(F, ValidationRule::MetaTessellatorOutputPrimitive);
     }
 
-    float maxTessFactor = HS.maxTessFactor;
-    if (maxTessFactor < DXIL::kHSMaxTessFactorLowerBound ||
-        maxTessFactor > DXIL::kHSMaxTessFactorUpperBound) {
+    float MaxTessFactor = HS.maxTessFactor;
+    if (MaxTessFactor < DXIL::kHSMaxTessFactorLowerBound ||
+        MaxTessFactor > DXIL::kHSMaxTessFactorUpperBound) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::MetaMaxTessFactor,
           {std::to_string(DXIL::kHSMaxTessFactorLowerBound),
            std::to_string(DXIL::kHSMaxTessFactorUpperBound),
-           std::to_string(maxTessFactor)});
+           std::to_string(MaxTessFactor)});
     }
     // Domain and OutPrimivtive match.
-    switch (domain) {
+    switch (Domain) {
     case DXIL::TessellatorDomain::IsoLine:
-      switch (tessOutputPrimitive) {
+      switch (TessOutputPrimitive) {
       case DXIL::TessellatorOutputPrimitive::TriangleCW:
       case DXIL::TessellatorOutputPrimitive::TriangleCCW:
         ValCtx.EmitFnError(F, ValidationRule::SmIsoLineOutputPrimitiveMismatch);
@@ -5489,7 +5489,7 @@ static void ValidateEntryProps(ValidationContext &ValCtx,
       }
       break;
     case DXIL::TessellatorDomain::Tri:
-      switch (tessOutputPrimitive) {
+      switch (TessOutputPrimitive) {
       case DXIL::TessellatorOutputPrimitive::Line:
         ValCtx.EmitFnError(F, ValidationRule::SmTriOutputPrimitiveMismatch);
         break;
@@ -5498,7 +5498,7 @@ static void ValidateEntryProps(ValidationContext &ValCtx,
       }
       break;
     case DXIL::TessellatorDomain::Quad:
-      switch (tessOutputPrimitive) {
+      switch (TessOutputPrimitive) {
       case DXIL::TessellatorOutputPrimitive::Line:
         ValCtx.EmitFnError(F, ValidationRule::SmTriOutputPrimitiveMismatch);
         break;
@@ -5511,39 +5511,39 @@ static void ValidateEntryProps(ValidationContext &ValCtx,
       break;
     }
 
-    CheckPatchConstantSemantic(ValCtx, entryProps, Status, F);
+    CheckPatchConstantSemantic(ValCtx, EntryProps, Status, F);
   } else if (ShaderType == DXIL::ShaderKind::Geometry) {
-    const auto &GS = props.ShaderProps.GS;
-    unsigned maxVertexCount = GS.maxVertexCount;
-    if (maxVertexCount > DXIL::kMaxGSOutputVertexCount) {
+    const auto &GS = Props.ShaderProps.GS;
+    unsigned MaxVertexCount = GS.maxVertexCount;
+    if (MaxVertexCount > DXIL::kMaxGSOutputVertexCount) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmGSOutputVertexCountRange,
                                {std::to_string(DXIL::kMaxGSOutputVertexCount),
-                                std::to_string(maxVertexCount)});
+                                std::to_string(MaxVertexCount)});
     }
 
-    unsigned instanceCount = GS.instanceCount;
-    if (instanceCount > DXIL::kMaxGSInstanceCount || instanceCount < 1) {
+    unsigned InstanceCount = GS.instanceCount;
+    if (InstanceCount > DXIL::kMaxGSInstanceCount || InstanceCount < 1) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmGSInstanceCountRange,
                                {std::to_string(DXIL::kMaxGSInstanceCount),
-                                std::to_string(instanceCount)});
+                                std::to_string(InstanceCount)});
     }
 
-    DXIL::PrimitiveTopology topo = DXIL::PrimitiveTopology::Undefined;
-    bool bTopoMismatch = false;
-    for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) {
-      if (GS.streamPrimitiveTopologies[i] !=
+    DXIL::PrimitiveTopology Topo = DXIL::PrimitiveTopology::Undefined;
+    bool TopoMismatch = false;
+    for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) {
+      if (GS.streamPrimitiveTopologies[I] !=
           DXIL::PrimitiveTopology::Undefined) {
-        if (topo == DXIL::PrimitiveTopology::Undefined)
-          topo = GS.streamPrimitiveTopologies[i];
-        else if (topo != GS.streamPrimitiveTopologies[i]) {
-          bTopoMismatch = true;
+        if (Topo == DXIL::PrimitiveTopology::Undefined)
+          Topo = GS.streamPrimitiveTopologies[I];
+        else if (Topo != GS.streamPrimitiveTopologies[I]) {
+          TopoMismatch = true;
           break;
         }
       }
     }
-    if (bTopoMismatch)
-      topo = DXIL::PrimitiveTopology::Undefined;
-    switch (topo) {
+    if (TopoMismatch)
+      Topo = DXIL::PrimitiveTopology::Undefined;
+    switch (Topo) {
     case DXIL::PrimitiveTopology::PointList:
     case DXIL::PrimitiveTopology::LineStrip:
     case DXIL::PrimitiveTopology::TriangleStrip:
@@ -5553,9 +5553,9 @@ static void ValidateEntryProps(ValidationContext &ValCtx,
     } break;
     }
 
-    DXIL::InputPrimitive inputPrimitive = GS.inputPrimitive;
-    unsigned VertexCount = GetNumVertices(inputPrimitive);
-    if (VertexCount == 0 && inputPrimitive != DXIL::InputPrimitive::Undefined) {
+    DXIL::InputPrimitive InputPrimitive = GS.inputPrimitive;
+    unsigned VertexCount = GetNumVertices(InputPrimitive);
+    if (VertexCount == 0 && InputPrimitive != DXIL::InputPrimitive::Undefined) {
       ValCtx.EmitFnError(F, ValidationRule::SmGSValidInputPrimitive);
     }
   }
@@ -5566,10 +5566,10 @@ static void ValidateShaderState(ValidationContext &ValCtx) {
   if (ValCtx.isLibProfile) {
     for (Function &F : DM.GetModule()->functions()) {
       if (DM.HasDxilEntryProps(&F)) {
-        DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F);
+        DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F);
         EntryStatus &Status = ValCtx.GetEntryStatus(&F);
-        ValidateEntryProps(ValCtx, entryProps, Status, &F);
-        ValidatePassThruHS(ValCtx, entryProps, &F);
+        ValidateEntryProps(ValCtx, EntryProps, Status, &F);
+        ValidatePassThruHS(ValCtx, EntryProps, &F);
       }
     }
   } else {
@@ -5580,33 +5580,33 @@ static void ValidateShaderState(ValidationContext &ValCtx) {
       return;
     }
     EntryStatus &Status = ValCtx.GetEntryStatus(Entry);
-    DxilEntryProps &entryProps = DM.GetDxilEntryProps(Entry);
-    ValidateEntryProps(ValCtx, entryProps, Status, Entry);
-    ValidatePassThruHS(ValCtx, entryProps, Entry);
+    DxilEntryProps &EntryProps = DM.GetDxilEntryProps(Entry);
+    ValidateEntryProps(ValCtx, EntryProps, Status, Entry);
+    ValidatePassThruHS(ValCtx, EntryProps, Entry);
   }
 }
 
 static CallGraphNode *
-CalculateCallDepth(CallGraphNode *node,
-                   std::unordered_map<CallGraphNode *, unsigned> &depthMap,
-                   std::unordered_set<CallGraphNode *> &callStack,
-                   std::unordered_set<Function *> &funcSet) {
-  unsigned depth = callStack.size();
-  funcSet.insert(node->getFunction());
-  for (auto it = node->begin(), ei = node->end(); it != ei; it++) {
-    CallGraphNode *toNode = it->second;
-    if (callStack.insert(toNode).second == false) {
+CalculateCallDepth(CallGraphNode *Node,
+                   std::unordered_map<CallGraphNode *, unsigned> &DepthMap,
+                   std::unordered_set<CallGraphNode *> &CallStack,
+                   std::unordered_set<Function *> &FuncSet) {
+  unsigned Depth = CallStack.size();
+  FuncSet.insert(Node->getFunction());
+  for (auto It = Node->begin(), EIt = Node->end(); It != EIt; It++) {
+    CallGraphNode *ToNode = It->second;
+    if (CallStack.insert(ToNode).second == false) {
       // Recursive.
-      return toNode;
+      return ToNode;
     }
-    if (depthMap[toNode] < depth)
-      depthMap[toNode] = depth;
+    if (DepthMap[ToNode] < Depth)
+      DepthMap[ToNode] = Depth;
     if (CallGraphNode *N =
-            CalculateCallDepth(toNode, depthMap, callStack, funcSet)) {
+            CalculateCallDepth(ToNode, DepthMap, CallStack, FuncSet)) {
       // Recursive
       return N;
     }
-    callStack.erase(toNode);
+    CallStack.erase(ToNode);
   }
 
   return nullptr;
@@ -5616,29 +5616,29 @@ static void ValidateCallGraph(ValidationContext &ValCtx) {
   // Build CallGraph.
   CallGraph &CG = ValCtx.GetCallGraph();
 
-  std::unordered_map<CallGraphNode *, unsigned> depthMap;
-  std::unordered_set<CallGraphNode *> callStack;
-  CallGraphNode *entryNode = CG[ValCtx.DxilMod.GetEntryFunction()];
-  depthMap[entryNode] = 0;
-  if (CallGraphNode *N = CalculateCallDepth(entryNode, depthMap, callStack,
+  std::unordered_map<CallGraphNode *, unsigned> DepthMap;
+  std::unordered_set<CallGraphNode *> CallStack;
+  CallGraphNode *EntryNode = CG[ValCtx.DxilMod.GetEntryFunction()];
+  DepthMap[EntryNode] = 0;
+  if (CallGraphNode *N = CalculateCallDepth(EntryNode, DepthMap, CallStack,
                                             ValCtx.entryFuncCallSet))
     ValCtx.EmitFnError(N->getFunction(), ValidationRule::FlowNoRecursion);
   if (ValCtx.DxilMod.GetShaderModel()->IsHS()) {
-    CallGraphNode *patchConstantNode =
+    CallGraphNode *PatchConstantNode =
         CG[ValCtx.DxilMod.GetPatchConstantFunction()];
-    depthMap[patchConstantNode] = 0;
-    callStack.clear();
+    DepthMap[PatchConstantNode] = 0;
+    CallStack.clear();
     if (CallGraphNode *N =
-            CalculateCallDepth(patchConstantNode, depthMap, callStack,
+            CalculateCallDepth(PatchConstantNode, DepthMap, CallStack,
                                ValCtx.patchConstFuncCallSet))
       ValCtx.EmitFnError(N->getFunction(), ValidationRule::FlowNoRecursion);
   }
 }
 
 static void ValidateFlowControl(ValidationContext &ValCtx) {
-  bool reducible =
+  bool Reducible =
       IsReducible(*ValCtx.DxilMod.GetModule(), IrreducibilityAction::Ignore);
-  if (!reducible) {
+  if (!Reducible) {
     ValCtx.EmitError(ValidationRule::FlowReducible);
     return;
   }
@@ -5653,28 +5653,28 @@ static void ValidateFlowControl(ValidationContext &ValCtx) {
     DominatorTree DT = DTA.run(F);
     LoopInfo LI;
     LI.Analyze(DT);
-    for (auto loopIt = LI.begin(); loopIt != LI.end(); loopIt++) {
-      Loop *loop = *loopIt;
-      SmallVector<BasicBlock *, 4> exitBlocks;
-      loop->getExitBlocks(exitBlocks);
-      if (exitBlocks.empty())
+    for (auto LoopIt = LI.begin(); LoopIt != LI.end(); LoopIt++) {
+      Loop *Loop = *LoopIt;
+      SmallVector<BasicBlock *, 4> ExitBlocks;
+      Loop->getExitBlocks(ExitBlocks);
+      if (ExitBlocks.empty())
         ValCtx.EmitFnError(&F, ValidationRule::FlowDeadLoop);
     }
 
     // validate that there is no use of a value that has been output-completed
     // for this function.
 
-    hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP();
+    hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP();
 
-    for (auto &it : hlslOP->GetOpFuncList(DXIL::OpCode::OutputComplete)) {
-      Function *pF = it.second;
+    for (auto &It : HlslOP->GetOpFuncList(DXIL::OpCode::OutputComplete)) {
+      Function *pF = It.second;
       if (!pF)
         continue;
 
       // first, collect all the output complete calls that are not dominated
       // by another OutputComplete call for the same handle value
       llvm::SmallMapVector<Value *, llvm::SmallPtrSet<CallInst *, 4>, 4>
-          handleToCI;
+          HandleToCI;
       for (User *U : pF->users()) {
         // all OutputComplete calls are instructions, and call instructions,
         // so there shouldn't need to be a null check.
@@ -5686,33 +5686,33 @@ static void ValidateFlowControl(ValidationContext &ValCtx) {
           continue;
 
         DxilInst_OutputComplete OutputComplete(CI);
-        Value *completedRecord = OutputComplete.get_output();
+        Value *CompletedRecord = OutputComplete.get_output();
 
-        auto vIt = handleToCI.find(completedRecord);
-        if (vIt == handleToCI.end()) {
+        auto vIt = HandleToCI.find(CompletedRecord);
+        if (vIt == HandleToCI.end()) {
           llvm::SmallPtrSet<CallInst *, 4> s;
           s.insert(CI);
-          handleToCI.insert(std::make_pair(completedRecord, s));
+          HandleToCI.insert(std::make_pair(CompletedRecord, s));
         } else {
           // if the handle is already in the map, make sure the map's set of
           // output complete calls that dominate the handle and do not dominate
           // each other gets updated if necessary
           bool CI_is_dominated = false;
-          for (auto ocIt = vIt->second.begin(); ocIt != vIt->second.end();) {
+          for (auto OcIt = vIt->second.begin(); OcIt != vIt->second.end();) {
             // if our new OC CI dominates an OC instruction in the set,
             // then replace the instruction in the set with the new OC CI.
 
-            if (DT.dominates(CI, *ocIt)) {
-              auto cur_it = ocIt++;
+            if (DT.dominates(CI, *OcIt)) {
+              auto cur_it = OcIt++;
               vIt->second.erase(*cur_it);
               continue;
             }
             // Remember if our new CI gets dominated by any CI in the set.
-            if (DT.dominates(*ocIt, CI)) {
+            if (DT.dominates(*OcIt, CI)) {
               CI_is_dominated = true;
               break;
             }
-            ocIt++;
+            OcIt++;
           }
           // if no CI in the set dominates our new CI,
           // the new CI should be added to the set
@@ -5721,14 +5721,14 @@ static void ValidateFlowControl(ValidationContext &ValCtx) {
         }
       }
 
-      for (auto handle_iter = handleToCI.begin(), e = handleToCI.end();
+      for (auto handle_iter = HandleToCI.begin(), e = HandleToCI.end();
            handle_iter != e; handle_iter++) {
         for (auto user_itr = handle_iter->first->user_begin();
              user_itr != handle_iter->first->user_end(); user_itr++) {
           User *pU = *user_itr;
-          Instruction *useInstr = cast<Instruction>(pU);
-          if (useInstr) {
-            if (CallInst *CI = dyn_cast<CallInst>(useInstr)) {
+          Instruction *UseInstr = cast<Instruction>(pU);
+          if (UseInstr) {
+            if (CallInst *CI = dyn_cast<CallInst>(UseInstr)) {
               // if the user is an output complete call that is in the set of
               // OutputComplete calls not dominated by another OutputComplete
               // call for the same handle value, no diagnostics need to be
@@ -5739,15 +5739,15 @@ static void ValidateFlowControl(ValidationContext &ValCtx) {
 
             // make sure any output complete call in the set
             // that dominates this use gets its diagnostic emitted.
-            for (auto ocIt = handle_iter->second.begin();
-                 ocIt != handle_iter->second.end(); ocIt++) {
-              Instruction *ocInstr = cast<Instruction>(*ocIt);
-              if (DT.dominates(ocInstr, useInstr)) {
+            for (auto OcIt = handle_iter->second.begin();
+                 OcIt != handle_iter->second.end(); OcIt++) {
+              Instruction *OcInstr = cast<Instruction>(*OcIt);
+              if (DT.dominates(OcInstr, UseInstr)) {
                 ValCtx.EmitInstrError(
-                    useInstr,
+                    UseInstr,
                     ValidationRule::InstrNodeRecordHandleUseAfterComplete);
                 ValCtx.EmitInstrNote(
-                    *ocIt, "record handle invalidated by OutputComplete");
+                    *OcIt, "record handle invalidated by OutputComplete");
                 break;
               }
             }
@@ -5763,57 +5763,57 @@ static void ValidateFlowControl(ValidationContext &ValCtx) {
 static void ValidateUninitializedOutput(ValidationContext &ValCtx,
                                         Function *F) {
   DxilModule &DM = ValCtx.DxilMod;
-  DxilEntryProps &entryProps = DM.GetDxilEntryProps(F);
+  DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F);
   EntryStatus &Status = ValCtx.GetEntryStatus(F);
-  const DxilFunctionProps &props = entryProps.props;
+  const DxilFunctionProps &Props = EntryProps.props;
   // For HS only need to check Tessfactor which is in patch constant sig.
-  if (props.IsHS()) {
-    std::vector<unsigned> &patchConstOrPrimCols = Status.patchConstOrPrimCols;
-    const DxilSignature &patchConstSig =
-        entryProps.sig.PatchConstOrPrimSignature;
-    for (auto &E : patchConstSig.GetElements()) {
-      unsigned mask = patchConstOrPrimCols[E->GetID()];
-      unsigned requireMask = (1 << E->GetCols()) - 1;
+  if (Props.IsHS()) {
+    std::vector<unsigned> &PatchConstOrPrimCols = Status.patchConstOrPrimCols;
+    const DxilSignature &PatchConstSig =
+        EntryProps.sig.PatchConstOrPrimSignature;
+    for (auto &E : PatchConstSig.GetElements()) {
+      unsigned Mask = PatchConstOrPrimCols[E->GetID()];
+      unsigned RequireMask = (1 << E->GetCols()) - 1;
       // TODO: check other case uninitialized output is allowed.
-      if (mask != requireMask && !E->GetSemantic()->IsArbitrary()) {
+      if (Mask != RequireMask && !E->GetSemantic()->IsArbitrary()) {
         ValCtx.EmitFnFormatError(F, ValidationRule::SmUndefinedOutput,
                                  {E->GetName()});
       }
     }
     return;
   }
-  const DxilSignature &outSig = entryProps.sig.OutputSignature;
-  std::vector<unsigned> &outputCols = Status.outputCols;
-  for (auto &E : outSig.GetElements()) {
-    unsigned mask = outputCols[E->GetID()];
-    unsigned requireMask = (1 << E->GetCols()) - 1;
+  const DxilSignature &OutSig = EntryProps.sig.OutputSignature;
+  std::vector<unsigned> &OutputCols = Status.outputCols;
+  for (auto &E : OutSig.GetElements()) {
+    unsigned Mask = OutputCols[E->GetID()];
+    unsigned RequireMask = (1 << E->GetCols()) - 1;
     // TODO: check other case uninitialized output is allowed.
-    if (mask != requireMask && !E->GetSemantic()->IsArbitrary() &&
+    if (Mask != RequireMask && !E->GetSemantic()->IsArbitrary() &&
         E->GetSemantic()->GetKind() != Semantic::Kind::Target) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmUndefinedOutput,
                                {E->GetName()});
     }
   }
 
-  if (!props.IsGS()) {
-    unsigned posMask = Status.OutputPositionMask[0];
-    if (posMask != 0xf && Status.hasOutputPosition[0]) {
+  if (!Props.IsGS()) {
+    unsigned PosMask = Status.OutputPositionMask[0];
+    if (PosMask != 0xf && Status.hasOutputPosition[0]) {
       ValCtx.EmitFnError(F, ValidationRule::SmCompletePosition);
     }
   } else {
-    const auto &GS = props.ShaderProps.GS;
-    unsigned streamMask = 0;
-    for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) {
-      if (GS.streamPrimitiveTopologies[i] !=
+    const auto &GS = Props.ShaderProps.GS;
+    unsigned StreamMask = 0;
+    for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) {
+      if (GS.streamPrimitiveTopologies[I] !=
           DXIL::PrimitiveTopology::Undefined) {
-        streamMask |= 1 << i;
+        StreamMask |= 1 << I;
       }
     }
 
-    for (unsigned i = 0; i < DXIL::kNumOutputStreams; i++) {
-      if (streamMask & (1 << i)) {
-        unsigned posMask = Status.OutputPositionMask[i];
-        if (posMask != 0xf && Status.hasOutputPosition[i]) {
+    for (unsigned I = 0; I < DXIL::kNumOutputStreams; I++) {
+      if (StreamMask & (1 << I)) {
+        unsigned PosMask = Status.OutputPositionMask[I];
+        if (PosMask != 0xf && Status.hasOutputPosition[I]) {
           ValCtx.EmitFnError(F, ValidationRule::SmCompletePosition);
         }
       }

From 0ffd60accba540b0127e727f68b61b8075d6130a Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Fri, 4 Apr 2025 13:10:28 -0700
Subject: [PATCH 71/88] [SM6.9] Native vector load/store lowering (#7292)

Enables the declaration of long vector types for raw buffers, the
lowering of those and traditional vectors in loads and stores maintaining
the native types with new dxil ops along with validation and testing
support of the same.

Allow declaring long vector rawbuffer resources.
Previously disallowed along with other global types, this provides a
mechanism for indicating which buffers are raw and allowing them to
contain long vectors, continuing to produce an error for other resource
types verified by existing tests

Introduce native vector DXIL load/store intrinsics.
Add new raw buffer vector load/store intrinsics using the new vector
overload types.
Include them in validation associated with similar load/stores

Lower native vector raw buffers load/stores into new ops.
When the loaded/stored type is a vector of more than 1 element, the
shader model is 6.9 or higher, and the operation is on a raw buffer,
enable the generation of a native vector raw buffer load or store.

Incidental removal of unused parameter in load translation and some
refactoring
of the lowering to flow better with the new resret types.

add validation and compute shader tests

Vector to scalar raw buffer load lowering pass
Native vector loads and stores are generated for 6.9 targets and above.
This includes the 6.x target used when compiling to libraries. This adds
a pass run when linking that will lower the vector operations to scalar
operations for shader models that don't have native vector support. This
allows libraries compiled for supportive shader models to be linked to
targets without support.

Validate native vector loads and stores for properly defined parameters
of the correct type. Add tests for both vector load/stores and the
original scalar load/stores since they share a lot of validation code.

Fixes #7118
---
 include/dxc/DXIL/DxilConstants.h              |  28 +-
 include/dxc/DXIL/DxilInstructions.h           |  93 +++
 include/dxc/HLSL/DxilGenerationPass.h         |   2 +
 lib/DXIL/DxilOperations.cpp                   |  46 +-
 lib/DxilValidation/DxilValidation.cpp         |  87 ++-
 lib/HLSL/CMakeLists.txt                       |   1 +
 lib/HLSL/DxilLinker.cpp                       |   4 +
 lib/HLSL/DxilScalarizeVectorLoadStores.cpp    | 231 ++++++
 lib/HLSL/HLOperationLower.cpp                 |  72 +-
 tools/clang/lib/Sema/SemaHLSL.cpp             |   2 +-
 .../intrinsics/buffer-load-stores-sm69.hlsl   |  91 +++
 .../hlsl/types/longvec-operators-cs.hlsl      | 719 ++++++++++++++++++
 .../types/longvec-operators-vec1s-cs.hlsl     | 680 +++++++++++++++++
 .../hlsl/types/longvec-operators-vec1s.hlsl   |  62 +-
 .../hlsl/types/longvec-operators.hlsl         |  18 -
 .../longvec-load-stores-scalarizevecldst.ll   | 478 ++++++++++++
 .../DXILValidation/load-store-validation.hlsl |  74 ++
 .../DXILValidation/vector-validation.hlsl     |  14 +
 .../load-store-validation.ll                  | 229 ++++++
 .../LitDXILValidation/vector-validation.ll    |  78 ++
 .../hlsl/types/invalid-longvecs-sm68.hlsl     |   2 +
 tools/clang/unittests/HLSL/ValidationTest.cpp |  26 +-
 utils/hct/hctdb.py                            |  96 ++-
 23 files changed, 2991 insertions(+), 142 deletions(-)
 create mode 100644 lib/HLSL/DxilScalarizeVectorLoadStores.cpp
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll
 create mode 100644 tools/clang/test/DXILValidation/load-store-validation.hlsl
 create mode 100644 tools/clang/test/DXILValidation/vector-validation.hlsl
 create mode 100644 tools/clang/test/LitDXILValidation/load-store-validation.ll
 create mode 100644 tools/clang/test/LitDXILValidation/vector-validation.ll

diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
index 447728300b..4f8c521851 100644
--- a/include/dxc/DXIL/DxilConstants.h
+++ b/include/dxc/DXIL/DxilConstants.h
@@ -898,8 +898,11 @@ enum class OpCode : unsigned {
   GetDimensions = 72,   // gets texture size information
   RawBufferLoad = 139,  // reads from a raw buffer and structured buffer
   RawBufferStore = 140, // writes to a RWByteAddressBuffer or RWStructuredBuffer
-  TextureLoad = 66,     // reads texel data without any filtering or sampling
-  TextureStore = 67,    // reads texel data without any filtering or sampling
+  RawBufferVectorLoad = 303, // reads from a raw buffer and structured buffer
+  RawBufferVectorStore =
+      304,           // writes to a RWByteAddressBuffer or RWStructuredBuffer
+  TextureLoad = 66,  // reads texel data without any filtering or sampling
+  TextureStore = 67, // reads texel data without any filtering or sampling
   TextureStoreSample = 225, // stores texel data at specified sample index
 
   // Sampler Feedback
@@ -1044,7 +1047,7 @@ enum class OpCode : unsigned {
   NumOpCodes_Dxil_1_7 = 226,
   NumOpCodes_Dxil_1_8 = 258,
 
-  NumOpCodes = 303 // exclusive last value of enumeration
+  NumOpCodes = 305 // exclusive last value of enumeration
 };
 // OPCODE-ENUM:END
 
@@ -1278,6 +1281,8 @@ enum class OpCodeClass : unsigned {
   GetDimensions,
   RawBufferLoad,
   RawBufferStore,
+  RawBufferVectorLoad,
+  RawBufferVectorStore,
   TextureLoad,
   TextureStore,
   TextureStoreSample,
@@ -1356,7 +1361,7 @@ enum class OpCodeClass : unsigned {
   NumOpClasses_Dxil_1_7 = 153,
   NumOpClasses_Dxil_1_8 = 174,
 
-  NumOpClasses = 177 // exclusive last value of enumeration
+  NumOpClasses = 179 // exclusive last value of enumeration
 };
 // OPCODECLASS-ENUM:END
 
@@ -1415,6 +1420,12 @@ const unsigned kRawBufferLoadElementOffsetOpIdx = 3;
 const unsigned kRawBufferLoadMaskOpIdx = 4;
 const unsigned kRawBufferLoadAlignmentOpIdx = 5;
 
+// RawBufferVectorLoad.
+const unsigned kRawBufferVectorLoadHandleOpIdx = 1;
+const unsigned kRawBufferVectorLoadIndexOpIdx = 2;
+const unsigned kRawBufferVectorLoadElementOffsetOpIdx = 3;
+const unsigned kRawBufferVectorLoadAlignmentOpIdx = 4;
+
 // RawBufferStore
 const unsigned kRawBufferStoreHandleOpIdx = 1;
 const unsigned kRawBufferStoreIndexOpIdx = 2;
@@ -1424,7 +1435,14 @@ const unsigned kRawBufferStoreVal1OpIdx = 5;
 const unsigned kRawBufferStoreVal2OpIdx = 6;
 const unsigned kRawBufferStoreVal3OpIdx = 7;
 const unsigned kRawBufferStoreMaskOpIdx = 8;
-const unsigned kRawBufferStoreAlignmentOpIdx = 8;
+const unsigned kRawBufferStoreAlignmentOpIdx = 9;
+
+// RawBufferVectorStore
+const unsigned kRawBufferVectorStoreHandleOpIdx = 1;
+const unsigned kRawBufferVectorStoreIndexOpIdx = 2;
+const unsigned kRawBufferVectorStoreElementOffsetOpIdx = 3;
+const unsigned kRawBufferVectorStoreValOpIdx = 4;
+const unsigned kRawBufferVectorStoreAlignmentOpIdx = 5;
 
 // TextureStore.
 const unsigned kTextureStoreHandleOpIdx = 1;
diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h
index f8d9ae77f3..6ee22869a5 100644
--- a/include/dxc/DXIL/DxilInstructions.h
+++ b/include/dxc/DXIL/DxilInstructions.h
@@ -8923,5 +8923,98 @@ struct DxilInst_HitObject_MakeNop {
   // Metadata
   bool requiresUniformInputs() const { return false; }
 };
+
+/// This instruction reads from a raw buffer and structured buffer
+struct DxilInst_RawBufferVectorLoad {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_RawBufferVectorLoad(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::RawBufferVectorLoad);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (5 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_buf = 1,
+    arg_index = 2,
+    arg_elementOffset = 3,
+    arg_alignment = 4,
+  };
+  // Accessors
+  llvm::Value *get_buf() const { return Instr->getOperand(1); }
+  void set_buf(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_index() const { return Instr->getOperand(2); }
+  void set_index(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_elementOffset() const { return Instr->getOperand(3); }
+  void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_alignment() const { return Instr->getOperand(4); }
+  void set_alignment(llvm::Value *val) { Instr->setOperand(4, val); }
+  int32_t get_alignment_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(4))
+                         ->getZExtValue());
+  }
+  void set_alignment_val(int32_t val) {
+    Instr->setOperand(4, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
+
+/// This instruction writes to a RWByteAddressBuffer or RWStructuredBuffer
+struct DxilInst_RawBufferVectorStore {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_RawBufferVectorStore(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::RawBufferVectorStore);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (6 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_uav = 1,
+    arg_index = 2,
+    arg_elementOffset = 3,
+    arg_value0 = 4,
+    arg_alignment = 5,
+  };
+  // Accessors
+  llvm::Value *get_uav() const { return Instr->getOperand(1); }
+  void set_uav(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_index() const { return Instr->getOperand(2); }
+  void set_index(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_elementOffset() const { return Instr->getOperand(3); }
+  void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_value0() const { return Instr->getOperand(4); }
+  void set_value0(llvm::Value *val) { Instr->setOperand(4, val); }
+  llvm::Value *get_alignment() const { return Instr->getOperand(5); }
+  void set_alignment(llvm::Value *val) { Instr->setOperand(5, val); }
+  int32_t get_alignment_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(5))
+                         ->getZExtValue());
+  }
+  void set_alignment_val(int32_t val) {
+    Instr->setOperand(5, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
 // INSTR-HELPER:END
 } // namespace hlsl
diff --git a/include/dxc/HLSL/DxilGenerationPass.h b/include/dxc/HLSL/DxilGenerationPass.h
index c77ddab3d0..9df93e9232 100644
--- a/include/dxc/HLSL/DxilGenerationPass.h
+++ b/include/dxc/HLSL/DxilGenerationPass.h
@@ -81,6 +81,7 @@ ModulePass *createResumePassesPass();
 FunctionPass *createMatrixBitcastLowerPass();
 ModulePass *createDxilCleanupAddrSpaceCastPass();
 ModulePass *createDxilRenameResourcesPass();
+ModulePass *createDxilScalarizeVectorLoadStoresPass();
 
 void initializeDxilLowerCreateHandleForLibPass(llvm::PassRegistry &);
 void initializeDxilAllocateResourcesForLibPass(llvm::PassRegistry &);
@@ -115,6 +116,7 @@ void initializeResumePassesPass(llvm::PassRegistry &);
 void initializeMatrixBitcastLowerPassPass(llvm::PassRegistry &);
 void initializeDxilCleanupAddrSpaceCastPass(llvm::PassRegistry &);
 void initializeDxilRenameResourcesPass(llvm::PassRegistry &);
+void initializeDxilScalarizeVectorLoadStoresPass(llvm::PassRegistry &);
 
 ModulePass *createDxilValidateWaveSensitivityPass();
 void initializeDxilValidateWaveSensitivityPass(llvm::PassRegistry &);
diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp
index 56cdd0d04f..0b4c7218d4 100644
--- a/lib/DXIL/DxilOperations.cpp
+++ b/lib/DXIL/DxilOperations.cpp
@@ -2633,6 +2633,24 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      0,
      {},
      {}}, // Overloads: v
+
+    // Resources
+    {OC::RawBufferVectorLoad,
+     "RawBufferVectorLoad",
+     OCC::RawBufferVectorLoad,
+     "rawBufferVectorLoad",
+     Attribute::ReadOnly,
+     1,
+     {{0x4e7}},
+     {{0xe7}}}, // Overloads: hfwidl<hfwidl
+    {OC::RawBufferVectorStore,
+     "RawBufferVectorStore",
+     OCC::RawBufferVectorStore,
+     "rawBufferVectorStore",
+     Attribute::None,
+     1,
+     {{0x4e7}},
+     {{0xe7}}}, // Overloads: hfwidl<hfwidl
 };
 // OPCODE-OLOADS:END
 
@@ -3390,8 +3408,9 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
     }
     return;
   }
-  // Instructions: AllocateRayQuery2=258
-  if (op == 258) {
+  // Instructions: AllocateRayQuery2=258, RawBufferVectorLoad=303,
+  // RawBufferVectorStore=304
+  if (op == 258 || (303 <= op && op <= 304)) {
     major = 6;
     minor = 9;
     return;
@@ -5739,6 +5758,25 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(pV);
     A(pI32);
     break;
+
+    // Resources
+  case OpCode::RawBufferVectorLoad:
+    RRT(pETy);
+    A(pI32);
+    A(pRes);
+    A(pI32);
+    A(pI32);
+    A(pI32);
+    break;
+  case OpCode::RawBufferVectorStore:
+    A(pV);
+    A(pI32);
+    A(pRes);
+    A(pI32);
+    A(pI32);
+    A(pETy);
+    A(pI32);
+    break;
   // OPCODE-OLOAD-FUNCS:END
   default:
     DXASSERT(false, "otherwise unhandled case");
@@ -5888,6 +5926,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::StoreVertexOutput:
   case OpCode::StorePrimitiveOutput:
   case OpCode::DispatchMesh:
+  case OpCode::RawBufferVectorStore:
     if (FT->getNumParams() <= 4)
       return nullptr;
     return FT->getParamType(4);
@@ -6134,7 +6173,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::TextureGatherRaw:
   case OpCode::SampleCmpLevel:
   case OpCode::SampleCmpGrad:
-  case OpCode::SampleCmpBias: {
+  case OpCode::SampleCmpBias:
+  case OpCode::RawBufferVectorLoad: {
     StructType *ST = cast<StructType>(Ty);
     return ST->getElementType(0);
   }
diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
index 97bde6ca24..a788f21d4e 100644
--- a/lib/DxilValidation/DxilValidation.cpp
+++ b/lib/DxilValidation/DxilValidation.cpp
@@ -1475,34 +1475,35 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode,
       }
     }
   } break;
-  case DXIL::OpCode::RawBufferLoad: {
+  case DXIL::OpCode::RawBufferLoad:
     if (!ValCtx.DxilMod.GetShaderModel()->IsSM63Plus()) {
       Type *Ty = OP::GetOverloadType(DXIL::OpCode::RawBufferLoad,
                                      CI->getCalledFunction());
-      if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) {
+      if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32)
         ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore);
-      }
     }
-    DxilInst_RawBufferLoad BufLd(CI);
+    LLVM_FALLTHROUGH;
+  case DXIL::OpCode::RawBufferVectorLoad: {
+    Value *Handle =
+        CI->getOperand(DXIL::OperandIndex::kRawBufferLoadHandleOpIdx);
     DXIL::ComponentType CompTy;
     DXIL::ResourceClass ResClass;
     DXIL::ResourceKind ResKind =
-        GetResourceKindAndCompTy(BufLd.get_srv(), CompTy, ResClass, ValCtx);
+        GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx);
 
     if (ResClass != DXIL::ResourceClass::SRV &&
-        ResClass != DXIL::ResourceClass::UAV) {
+        ResClass != DXIL::ResourceClass::UAV)
+
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad);
-    }
 
-    Value *Offset = BufLd.get_elementOffset();
-    Value *Align = BufLd.get_alignment();
-    unsigned AlignSize = 0;
-    if (!isa<ConstantInt>(Align)) {
-      ValCtx.EmitInstrError(CI,
-                            ValidationRule::InstrCoordinateCountForRawTypedBuf);
-    } else {
-      AlignSize = BufLd.get_alignment_val();
-    }
+    unsigned AlignIdx = DXIL::OperandIndex::kRawBufferLoadAlignmentOpIdx;
+    if (DXIL::OpCode::RawBufferVectorLoad == Opcode)
+      AlignIdx = DXIL::OperandIndex::kRawBufferVectorLoadAlignmentOpIdx;
+    if (!isa<ConstantInt>(CI->getOperand(AlignIdx)))
+      ValCtx.EmitInstrError(CI, ValidationRule::InstrConstAlignForRawBuf);
+
+    Value *Offset =
+        CI->getOperand(DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx);
     switch (ResKind) {
     case DXIL::ResourceKind::RawBuffer:
       if (!isa<UndefValue>(Offset)) {
@@ -1526,38 +1527,44 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode,
     if (!ValCtx.DxilMod.GetShaderModel()->IsSM63Plus()) {
       Type *Ty = OP::GetOverloadType(DXIL::OpCode::RawBufferStore,
                                      CI->getCalledFunction());
-      if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) {
+      if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32)
         ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore);
-      }
     }
-    DxilInst_RawBufferStore BufSt(CI);
-    DXIL::ComponentType CompTy;
-    DXIL::ResourceClass ResClass;
-    DXIL::ResourceKind ResKind =
-        GetResourceKindAndCompTy(BufSt.get_uav(), CompTy, ResClass, ValCtx);
-
-    if (ResClass != DXIL::ResourceClass::UAV) {
-      ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore);
-    }
-
-    ConstantInt *Mask = dyn_cast<ConstantInt>(BufSt.get_mask());
+    DxilInst_RawBufferStore bufSt(CI);
+    ConstantInt *Mask = dyn_cast<ConstantInt>(bufSt.get_mask());
     unsigned StValMask =
-        StoreValueToMask({BufSt.get_value0(), BufSt.get_value1(),
-                          BufSt.get_value2(), BufSt.get_value3()});
+        StoreValueToMask({bufSt.get_value0(), bufSt.get_value1(),
+                          bufSt.get_value2(), bufSt.get_value3()});
 
     if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, false /*IsTyped*/,
                               ValCtx))
       return;
+  }
+    LLVM_FALLTHROUGH;
+  case DXIL::OpCode::RawBufferVectorStore: {
+    Value *Handle =
+        CI->getOperand(DXIL::OperandIndex::kRawBufferStoreHandleOpIdx);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx);
 
-    Value *Offset = BufSt.get_elementOffset();
-    Value *Align = BufSt.get_alignment();
-    unsigned AlignSize = 0;
-    if (!isa<ConstantInt>(Align)) {
-      ValCtx.EmitInstrError(CI,
-                            ValidationRule::InstrCoordinateCountForRawTypedBuf);
-    } else {
-      AlignSize = BufSt.get_alignment_val();
+    if (ResClass != DXIL::ResourceClass::UAV)
+      ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore);
+
+    unsigned AlignIdx = DXIL::OperandIndex::kRawBufferStoreAlignmentOpIdx;
+    if (DXIL::OpCode::RawBufferVectorStore == Opcode) {
+      AlignIdx = DXIL::OperandIndex::kRawBufferVectorStoreAlignmentOpIdx;
+      unsigned ValueIx = DXIL::OperandIndex::kRawBufferVectorStoreValOpIdx;
+      if (isa<UndefValue>(CI->getOperand(ValueIx)))
+        ValCtx.EmitInstrError(CI,
+                              ValidationRule::InstrUndefinedValueForUAVStore);
     }
+    if (!isa<ConstantInt>(CI->getOperand(AlignIdx)))
+      ValCtx.EmitInstrError(CI, ValidationRule::InstrConstAlignForRawBuf);
+
+    Value *Offset =
+        CI->getOperand(DXIL::OperandIndex::kRawBufferStoreElementOffsetOpIdx);
     switch (ResKind) {
     case DXIL::ResourceKind::RawBuffer:
       if (!isa<UndefValue>(Offset)) {
@@ -1684,6 +1691,8 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   case DXIL::OpCode::CBufferLoadLegacy:
   case DXIL::OpCode::RawBufferLoad:
   case DXIL::OpCode::RawBufferStore:
+  case DXIL::OpCode::RawBufferVectorLoad:
+  case DXIL::OpCode::RawBufferVectorStore:
     ValidateResourceDxilOp(CI, Opcode, ValCtx);
     break;
   // Input output.
diff --git a/lib/HLSL/CMakeLists.txt b/lib/HLSL/CMakeLists.txt
index 947fc4c14f..21bb9523a7 100644
--- a/lib/HLSL/CMakeLists.txt
+++ b/lib/HLSL/CMakeLists.txt
@@ -25,6 +25,7 @@ add_llvm_library(LLVMHLSL
   DxilNoops.cpp
   DxilPreserveAllOutputs.cpp
   DxilRenameResourcesPass.cpp
+  DxilScalarizeVectorLoadStores.cpp
   DxilSimpleGVNHoist.cpp
   DxilSignatureValidation.cpp
   DxilTargetLowering.cpp
diff --git a/lib/HLSL/DxilLinker.cpp b/lib/HLSL/DxilLinker.cpp
index ca343662ab..75d1bf78e9 100644
--- a/lib/HLSL/DxilLinker.cpp
+++ b/lib/HLSL/DxilLinker.cpp
@@ -1247,6 +1247,10 @@ void DxilLinkJob::RunPreparePass(Module &M) {
   PM.add(createDxilReinsertNopsPass());
   PM.add(createAlwaysInlinerPass(/*InsertLifeTime*/ false));
 
+  // If we need SROA and dynamicindexvector to array,
+  // do it early to allow following scalarization to go forward.
+  PM.add(createDxilScalarizeVectorLoadStoresPass());
+
   // Remove unused functions.
   PM.add(createDxilDeadFunctionEliminationPass());
 
diff --git a/lib/HLSL/DxilScalarizeVectorLoadStores.cpp b/lib/HLSL/DxilScalarizeVectorLoadStores.cpp
new file mode 100644
index 0000000000..febcf32358
--- /dev/null
+++ b/lib/HLSL/DxilScalarizeVectorLoadStores.cpp
@@ -0,0 +1,231 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilScalarizeVectorLoadStores.cpp                                         //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Lowers native vector load stores to potentially multiple scalar calls.    //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/DXIL/DxilInstructions.h"
+#include "dxc/DXIL/DxilModule.h"
+#include "dxc/HLSL/DxilGenerationPass.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+using namespace hlsl;
+
+static void scalarizeVectorLoad(hlsl::OP *HlslOP, const DataLayout &DL,
+                                CallInst *CI);
+static void scalarizeVectorStore(hlsl::OP *HlslOP, const DataLayout &DL,
+                                 CallInst *CI);
+
+class DxilScalarizeVectorLoadStores : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilScalarizeVectorLoadStores() : ModulePass(ID) {}
+
+  StringRef getPassName() const override {
+    return "DXIL scalarize vector load/stores";
+  }
+
+  bool runOnModule(Module &M) override {
+    DxilModule &DM = M.GetOrCreateDxilModule();
+    // Shader Model 6.9 allows native vectors and doesn't need this pass.
+    if (DM.GetShaderModel()->IsSM69Plus())
+      return false;
+
+    bool Changed = false;
+
+    hlsl::OP *HlslOP = DM.GetOP();
+    for (auto FIt : HlslOP->GetOpFuncList(DXIL::OpCode::RawBufferVectorLoad)) {
+      Function *Func = FIt.second;
+      if (!Func)
+        continue;
+      for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) {
+        CallInst *CI = cast<CallInst>(*(U++));
+        scalarizeVectorLoad(HlslOP, M.getDataLayout(), CI);
+        Changed = true;
+      }
+    }
+    for (auto FIt : HlslOP->GetOpFuncList(DXIL::OpCode::RawBufferVectorStore)) {
+      Function *Func = FIt.second;
+      if (!Func)
+        continue;
+      for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) {
+        CallInst *CI = cast<CallInst>(*(U++));
+        scalarizeVectorStore(HlslOP, M.getDataLayout(), CI);
+        Changed = true;
+      }
+    }
+    return Changed;
+  }
+};
+
+static unsigned GetRawBufferMask(unsigned NumComponents) {
+  switch (NumComponents) {
+  case 0:
+    return 0;
+  case 1:
+    return DXIL::kCompMask_X;
+  case 2:
+    return DXIL::kCompMask_X | DXIL::kCompMask_Y;
+  case 3:
+    return DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z;
+  case 4:
+  default:
+    return DXIL::kCompMask_All;
+  }
+  return DXIL::kCompMask_All;
+}
+
+static void scalarizeVectorLoad(hlsl::OP *HlslOP, const DataLayout &DL,
+                                CallInst *CI) {
+  IRBuilder<> Builder(CI);
+  // Collect the information required to break this into scalar ops from args.
+  DxilInst_RawBufferVectorLoad VecLd(CI);
+  OP::OpCode OpCode = OP::OpCode::RawBufferLoad;
+  llvm::Constant *OpArg = Builder.getInt32((unsigned)OpCode);
+  SmallVector<Value *, 10> Args;
+  Args.emplace_back(OpArg);                     // opcode @0.
+  Args.emplace_back(VecLd.get_buf());           // Resource handle @1.
+  Args.emplace_back(VecLd.get_index());         // Index @2.
+  Args.emplace_back(VecLd.get_elementOffset()); // Offset @3.
+  Args.emplace_back(nullptr);                   // Mask to be set later @4.
+  Args.emplace_back(VecLd.get_alignment());     // Alignment @5.
+
+  // Set offset to increment depending on whether the real offset is defined.
+  unsigned OffsetIdx;
+  if (isa<UndefValue>(VecLd.get_elementOffset()))
+    // Byte Address Buffers can't use offset, so use index.
+    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx;
+  else
+    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx;
+
+  StructType *ResRetTy = cast<StructType>(CI->getType());
+  Type *Ty = ResRetTy->getElementType(0);
+  unsigned NumComponents = Ty->getVectorNumElements();
+  Type *EltTy = Ty->getScalarType();
+  unsigned EltSize = DL.getTypeAllocSize(EltTy);
+
+  const unsigned MaxElemCount = 4;
+  SmallVector<Value *, 4> Elts(NumComponents);
+  Value *Ld = nullptr;
+  for (unsigned EIx = 0; EIx < NumComponents;) {
+    // Load 4 elements or however many less than 4 are left to load.
+    unsigned ChunkSize = std::min(NumComponents - EIx, MaxElemCount);
+    Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
+        HlslOP->GetI8Const(GetRawBufferMask(ChunkSize));
+    // If we've loaded a chunk already, update offset to next chunk.
+    if (EIx > 0)
+      Args[OffsetIdx] =
+          Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize));
+    Function *F = HlslOP->GetOpFunc(OpCode, EltTy);
+    Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(OpCode));
+    for (unsigned ChIx = 0; ChIx < ChunkSize; ChIx++, EIx++)
+      Elts[EIx] = Builder.CreateExtractValue(Ld, ChIx);
+  }
+
+  Value *RetValNew = UndefValue::get(VectorType::get(EltTy, NumComponents));
+  for (unsigned ElIx = 0; ElIx < NumComponents; ElIx++)
+    RetValNew = Builder.CreateInsertElement(RetValNew, Elts[ElIx], ElIx);
+
+  // Replace users of the vector extracted from the vector load resret.
+  Value *Status = nullptr;
+  for (auto CU = CI->user_begin(), CE = CI->user_end(); CU != CE;) {
+    auto EV = cast<ExtractValueInst>(*(CU++));
+    unsigned Ix = EV->getIndices()[0];
+    if (Ix == 0) {
+      // Handle value uses.
+      EV->replaceAllUsesWith(RetValNew);
+    } else if (Ix == 1) {
+      // Handle status uses.
+      if (!Status)
+        Status = Builder.CreateExtractValue(Ld, DXIL::kResRetStatusIndex);
+      EV->replaceAllUsesWith(Status);
+    }
+    EV->eraseFromParent();
+  }
+  CI->eraseFromParent();
+}
+
+static void scalarizeVectorStore(hlsl::OP *HlslOP, const DataLayout &DL,
+                                 CallInst *CI) {
+  IRBuilder<> Builder(CI);
+  // Collect the information required to break this into scalar ops from args.
+  DxilInst_RawBufferVectorStore VecSt(CI);
+  OP::OpCode OpCode = OP::OpCode::RawBufferStore;
+  llvm::Constant *OpArg = Builder.getInt32((unsigned)OpCode);
+  SmallVector<Value *, 10> Args;
+  Args.emplace_back(OpArg);                     // opcode @0.
+  Args.emplace_back(VecSt.get_uav());           // Resource handle @1.
+  Args.emplace_back(VecSt.get_index());         // Index @2.
+  Args.emplace_back(VecSt.get_elementOffset()); // Offset @3.
+  Args.emplace_back(nullptr);                   // Val0 to be set later @4.
+  Args.emplace_back(nullptr);                   // Val1 to be set later @5.
+  Args.emplace_back(nullptr);                   // Val2 to be set later @6.
+  Args.emplace_back(nullptr);                   // Val3 to be set later @7.
+  Args.emplace_back(nullptr);                   // Mask to be set later @8.
+  Args.emplace_back(VecSt.get_alignment());     // Alignment @9.
+
+  // Set offset to increment depending on whether the real offset is defined.
+  unsigned OffsetIdx;
+  if (isa<UndefValue>(VecSt.get_elementOffset()))
+    // Byte Address Buffers can't use offset, so use index.
+    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx;
+  else
+    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx;
+
+  Value *VecVal = VecSt.get_value0();
+
+  const unsigned MaxElemCount = 4;
+  Type *Ty = VecVal->getType();
+  const unsigned NumComponents = Ty->getVectorNumElements();
+  Type *EltTy = Ty->getScalarType();
+  Value *UndefVal = UndefValue::get(EltTy);
+  unsigned EltSize = DL.getTypeAllocSize(EltTy);
+  Function *F = HlslOP->GetOpFunc(OpCode, EltTy);
+  for (unsigned EIx = 0; EIx < NumComponents;) {
+    // Store 4 elements or however many less than 4 are left to store.
+    unsigned ChunkSize = std::min(NumComponents - EIx, MaxElemCount);
+    // For second and subsequent store calls, increment the resource-appropriate
+    // index or offset parameter.
+    if (EIx > 0)
+      Args[OffsetIdx] =
+          Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize));
+    // Populate all value arguments either with the vector or undefs.
+    uint8_t Mask = 0;
+    unsigned ChIx = 0;
+    for (; ChIx < ChunkSize; ChIx++, EIx++) {
+      Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] =
+          Builder.CreateExtractElement(VecVal, EIx);
+      Mask |= (1 << ChIx);
+    }
+    for (; ChIx < MaxElemCount; ChIx++)
+      Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] = UndefVal;
+
+    Args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] =
+        HlslOP->GetU8Const(Mask);
+    Builder.CreateCall(F, Args);
+  }
+  CI->eraseFromParent();
+}
+
+char DxilScalarizeVectorLoadStores::ID = 0;
+
+ModulePass *llvm::createDxilScalarizeVectorLoadStoresPass() {
+  return new DxilScalarizeVectorLoadStores();
+}
+
+INITIALIZE_PASS(DxilScalarizeVectorLoadStores,
+                "hlsl-dxil-scalarize-vector-load-stores",
+                "DXIL scalarize vector load/stores", false, false)
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index 445dbcc879..4d8201df8d 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -3956,6 +3956,11 @@ struct ResLoadHelper {
       : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst),
         addr(idx), offset(Offset), status(nullptr), mipLevel(mip) {
     opcode = LoadOpFromResKind(RK);
+    Type *Ty = Inst->getType();
+    if (opcode == OP::OpCode::RawBufferLoad && Ty->isVectorTy() &&
+        Ty->getVectorNumElements() > 1 &&
+        Inst->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus())
+      opcode = OP::OpCode::RawBufferVectorLoad;
   }
   OP::OpCode opcode;
   IntrinsicOp intrinsicOpCode;
@@ -4025,6 +4030,14 @@ ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
     if (RC == DxilResourceBase::Class::SRV)
       OffsetIdx = IsMS ? HLOperandIndex::kTex2DMSLoadOffsetOpIdx
                        : HLOperandIndex::kTexLoadOffsetOpIdx;
+  } else if (opcode == OP::OpCode::RawBufferLoad) {
+    // If native vectors are available and this load had a vector
+    // with more than one elements, convert the RawBufferLod to the
+    // native vector variant RawBufferVectorLoad.
+    Type *Ty = CI->getType();
+    if (Ty->isVectorTy() && Ty->getVectorNumElements() > 1 &&
+        CI->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus())
+      opcode = OP::OpCode::RawBufferVectorLoad;
   }
 
   // Set offset.
@@ -4082,7 +4095,7 @@ Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset,
 // Sets up arguments for buffer load call.
 static SmallVector<Value *, 10> GetBufLoadArgs(ResLoadHelper helper,
                                                HLResource::Kind RK,
-                                               IRBuilder<> Builder, Type *EltTy,
+                                               IRBuilder<> Builder,
                                                unsigned LdSize) {
   OP::OpCode opcode = helper.opcode;
   llvm::Constant *opArg = Builder.getInt32((uint32_t)opcode);
@@ -4130,6 +4143,7 @@ static SmallVector<Value *, 10> GetBufLoadArgs(ResLoadHelper helper,
     // If not TextureLoad, it could be a typed or raw buffer load.
     // They have mostly similar arguments.
     DXASSERT(opcode == OP::OpCode::RawBufferLoad ||
+                 opcode == OP::OpCode::RawBufferVectorLoad ||
                  opcode == OP::OpCode::BufferLoad,
              "Wrong opcode in get load args");
     Args.emplace_back(
@@ -4140,6 +4154,9 @@ static SmallVector<Value *, 10> GetBufLoadArgs(ResLoadHelper helper,
       // Unlike typed buffer load, raw buffer load has mask and alignment.
       Args.emplace_back(nullptr);      // Mask will be added later %4.
       Args.emplace_back(alignmentVal); // alignment @5.
+    } else if (opcode == OP::OpCode::RawBufferVectorLoad) {
+      // RawBufferVectorLoad takes just alignment, no mask.
+      Args.emplace_back(alignmentVal); // alignment @4
     }
   }
   return Args;
@@ -4165,18 +4182,21 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
   if (isBool || (is64 && isTyped))
     EltTy = Builder.getInt32Ty();
 
-  // 64-bit types are stored as int32 pairs in typed buffers.
+  // Calculate load size with the scalar memory element type.
+  unsigned LdSize = DL.getTypeAllocSize(EltTy);
+
+  // Adjust number of components as needed.
   if (is64 && isTyped) {
+    // 64-bit types are stored as int32 pairs in typed buffers.
     DXASSERT(NumComponents <= 2, "Typed buffers only allow 4 dwords.");
     NumComponents *= 2;
+  } else if (opcode == OP::OpCode::RawBufferVectorLoad) {
+    // Native vector loads only have a single vector element in ResRet.
+    EltTy = VectorType::get(EltTy, NumComponents);
+    NumComponents = 1;
   }
 
-  unsigned LdSize = DL.getTypeAllocSize(EltTy);
-
-  SmallVector<Value *, 4> Elts(NumComponents);
-
-  SmallVector<Value *, 10> Args =
-      GetBufLoadArgs(helper, RK, Builder, EltTy, LdSize);
+  SmallVector<Value *, 10> Args = GetBufLoadArgs(helper, RK, Builder, LdSize);
 
   // Keep track of the first load for debug info migration.
   Value *FirstLd = nullptr;
@@ -4188,9 +4208,10 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
   else if (RK == DxilResource::Kind::StructuredBuffer)
     OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx;
 
-  // Create calls to function object.
+  // Create call(s) to function object and collect results in Elts.
   // Typed buffer loads are limited to one load of up to 4 32-bit values.
   // Raw buffer loads might need multiple loads in chunks of 4.
+  SmallVector<Value *, 4> Elts(NumComponents);
   for (unsigned i = 0; i < NumComponents;) {
     // Load 4 elements or however many less than 4 are left to load.
     unsigned chunkSize = std::min(NumComponents - i, 4U);
@@ -4200,7 +4221,7 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
       Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
           GetRawBufferMaskForETy(EltTy, chunkSize, OP);
       // If we've loaded a chunk already, update offset to next chunk.
-      if (FirstLd != nullptr && opcode == OP::OpCode::RawBufferLoad)
+      if (FirstLd != nullptr)
         Args[OffsetIdx] =
             Builder.CreateAdd(Args[OffsetIdx], OP->GetU32Const(4 * LdSize));
     }
@@ -4209,8 +4230,13 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
     Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode));
 
     // Extract elements from returned ResRet.
-    for (unsigned j = 0; j < chunkSize; j++, i++)
-      Elts[i] = Builder.CreateExtractValue(Ld, j);
+    // Native vector loads just have one vector element in the ResRet.
+    // Others have up to four scalars that need to be individually extracted.
+    if (opcode == OP::OpCode::RawBufferVectorLoad)
+      Elts[i++] = Builder.CreateExtractValue(Ld, 0);
+    else
+      for (unsigned j = 0; j < chunkSize; j++, i++)
+        Elts[i] = Builder.CreateExtractValue(Ld, j);
 
     // Update status.
     UpdateStatus(Ld, helper.status, Builder, OP);
@@ -4248,9 +4274,10 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
     }
   }
 
-  // Package elements into a vector.
+  // Package elements into a vector as needed.
   Value *retValNew = nullptr;
-  if (!Ty->isVectorTy()) {
+  // Scalar or native vector loads need not construct vectors from elements.
+  if (!Ty->isVectorTy() || opcode == OP::OpCode::RawBufferVectorLoad) {
     retValNew = Elts[0];
   } else {
     retValNew = UndefValue::get(VectorType::get(EltTy, NumComponents));
@@ -4348,6 +4375,10 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
   case DxilResource::Kind::StructuredBuffer:
     IsTyped = false;
     opcode = OP::OpCode::RawBufferStore;
+    // Where shader model and type allows, use vector store intrinsic.
+    if (OP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() &&
+        Ty->isVectorTy() && Ty->getVectorNumElements() > 1)
+      opcode = OP::OpCode::RawBufferVectorStore;
     break;
   case DxilResource::Kind::TypedBuffer:
     opcode = OP::OpCode::BufferStore;
@@ -4390,7 +4421,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
     EltTy = i32Ty;
   }
 
-  Function *F = OP->GetOpFunc(opcode, EltTy);
   llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
 
   llvm::Value *undefI =
@@ -4404,6 +4434,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
 
   unsigned OffsetIdx = 0;
   if (opcode == OP::OpCode::RawBufferStore ||
+      opcode == OP::OpCode::RawBufferVectorStore ||
       opcode == OP::OpCode::BufferStore) {
     // Append Coord0 (Index) value.
     if (Idx->getType()->isVectorTy()) {
@@ -4423,7 +4454,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
       OffsetIdx = storeArgs.size() - 1;
 
     // Coord1 (Offset).
-    // Only relevant when storing more than 4 elements to structured buffers.
     storeArgs.emplace_back(offset);
   } else {
     // texture store
@@ -4444,6 +4474,16 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
     // TODO: support mip for texture ST
   }
 
+  // RawBufferVectorStore only takes a single value and alignment arguments.
+  if (opcode == DXIL::OpCode::RawBufferVectorStore) {
+    storeArgs.emplace_back(val);
+    storeArgs.emplace_back(Alignment);
+    Function *F = OP->GetOpFunc(DXIL::OpCode::RawBufferVectorStore, Ty);
+    Builder.CreateCall(F, storeArgs);
+    return;
+  }
+  Function *F = OP->GetOpFunc(opcode, EltTy);
+
   constexpr unsigned MaxStoreElemCount = 4;
   const unsigned CompCount = Ty->isVectorTy() ? Ty->getVectorNumElements() : 1;
   const unsigned StoreInstCount =
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index f9e011f8d4..027d7d3cbc 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -15193,7 +15193,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
   }
 
   // Disallow long vecs from $Global cbuffers.
-  if (isGlobal && !isStatic && !isGroupShared) {
+  if (isGlobal && !isStatic && !isGroupShared && !IS_BASIC_OBJECT(basicKind)) {
     // Suppress actual emitting of errors for incompletable types here
     // They are redundant to those produced in ActOnUninitializedDecl.
     struct SilentDiagnoser : public TypeDiagnoser {
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl
new file mode 100644
index 0000000000..5305ee495b
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl
@@ -0,0 +1,91 @@
+// RUN: %dxc -DTYPE=float    -DNUM=4 -T vs_6_9 %s | FileCheck %s
+// RUN: %dxc -DTYPE=bool     -DNUM=4 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1
+// RUN: %dxc -DTYPE=uint64_t -DNUM=2 -T vs_6_9 %s | FileCheck %s
+// RUN: %dxc -DTYPE=double   -DNUM=2 -T vs_6_9 %s | FileCheck %s
+
+// RUN: %dxc -DTYPE=float    -DNUM=6  -T vs_6_9 %s | FileCheck %s
+// RUN: %dxc -DTYPE=bool     -DNUM=13 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1
+// RUN: %dxc -DTYPE=uint64_t -DNUM=24 -T vs_6_9 %s | FileCheck %s
+// RUN: %dxc -DTYPE=double   -DNUM=32 -T vs_6_9 %s | FileCheck %s
+
+///////////////////////////////////////////////////////////////////////
+// Test codegen for various load and store operations and conversions
+//  for different scalar/vector buffer types and indices.
+///////////////////////////////////////////////////////////////////////
+
+// CHECK: %dx.types.ResRet.[[VTY:v[0-9]*[a-z][0-9][0-9]]] = type { <[[NUM:[0-9]*]] x [[TYPE:[a-z_0-9]*]]>, i32 }
+
+ByteAddressBuffer RoByBuf : register(t1);
+RWByteAddressBuffer RwByBuf : register(u1);
+
+StructuredBuffer<vector<TYPE, NUM> > RoStBuf : register(t2);
+RWStructuredBuffer<vector<TYPE, NUM> > RwStBuf : register(u2);
+
+ConsumeStructuredBuffer<vector<TYPE, NUM> > CnStBuf : register(u4);
+AppendStructuredBuffer<vector<TYPE, NUM> > ApStBuf  : register(u5);
+
+// CHECK-LABEL: define void @main
+[shader("vertex")]
+void main(uint ix[2] : IX) {
+  // ByteAddressBuffer Tests
+
+  // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false)
+  // CHECK-DAG: [[HDLRWBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false)
+
+  // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false)
+  // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false)
+
+  // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false)
+  // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false)
+
+  // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4,
+
+  // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]]
+  // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]]
+  // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer
+  vector<TYPE, NUM>  babElt1 = RwByBuf.Load< vector<TYPE, NUM>  >(ix[0]);
+
+  // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]]
+  // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]]
+  // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer
+  vector<TYPE, NUM>  babElt2 = RoByBuf.Load< vector<TYPE, NUM>  >(ix[0]);
+
+  // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32>
+  // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]]
+  RwByBuf.Store< vector<TYPE, NUM>  >(ix[0], babElt1 + babElt2);
+
+  // StructuredBuffer Tests
+  // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]]
+  // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]]
+  // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer
+  vector<TYPE, NUM>  stbElt1 = RwStBuf.Load(ix[0]);
+  // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4,
+  // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]]
+  // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer
+  vector<TYPE, NUM>  stbElt2 = RwStBuf[ix[1]];
+
+  // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]]
+  // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]]
+  // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer
+  vector<TYPE, NUM>  stbElt3 = RoStBuf.Load(ix[0]);
+  // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]]
+  // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer
+  vector<TYPE, NUM>  stbElt4 = RoStBuf[ix[1]];
+
+  // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32>
+  // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]]
+  RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4;
+
+  // {Append/Consume}StructuredBuffer Tests
+  // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]]
+  // CHECK: [[CONIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLCON]], i8 -1) 
+  // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]]
+  // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer
+  vector<TYPE, NUM>  cnElt = CnStBuf.Consume();
+
+  // CHECK: [[ANHDLAPP:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLAPP]]
+  // CHECK: [[APPIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLAPP]], i8 1) 
+  // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32>
+  // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]]
+  ApStBuf.Append(cnElt);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl
new file mode 100644
index 0000000000..0a115bd709
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl
@@ -0,0 +1,719 @@
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float    -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float    -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int      -DNUM=2 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint     -DNUM=5 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=double   -DNUM=3 -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL,NOINT
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DNUM=9 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float16_t -DNUM=17 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int16_t   -DNUM=33 -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+
+// Linking tests.
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=6 -Fo %t.1 %s
+// RUN: %dxl -T cs_6_9 %t.1 | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DNUM=3 -DDBL -Fo %t.2 %s
+// RUN: %dxl -T cs_6_9 %t.2 | FileCheck %s --check-prefixes=CHECK,DBL,NOINT
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t -DNUM=12 -DINT -enable-16bit-types -Fo %t.3 %s
+// RUN: %dxl -T cs_6_9 %t.3 | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+
+// Test relevant operators on an assortment vector sizes and types with 6.9 native vectors.
+// Tests in a CS environment where vector operations were previously disallowed to confirm that they are retained.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// Uses non vector buffer to avoid interacting with that implementation.
+// CHECK-DAG: %dx.types.ResRet.[[TY:v[0-9]*[a-z][0-9]*]] = type { <[[NUM:[0-9]*]] x [[TYPE:[a-z_0-9]*]]>
+// CHECK-DAG: %dx.types.ResRet.[[STY:[a-z][0-9]*]] = type { [[STYPE:[a-z0-9_]*]]
+// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32>
+
+void assignments(inout vector<TYPE, NUM> things[11], TYPE scales[10]);
+vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11];
+vector<TYPE, NUM> scarithmetic(vector<TYPE, NUM> things[11], TYPE scales[10])[11];
+vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> consequences[11])[10];
+vector<TYPE, NUM> index(vector<TYPE, NUM> things[11], int i)[11];
+void bittwiddlers(inout vector<TYPE, NUM> things[13]);
+
+struct Viface {
+  vector<TYPE, NUM> values[11];
+};
+
+struct Siface {
+  TYPE values[10];
+};
+
+struct Liface {
+  vector<bool, NUM> values[10];
+};
+
+struct Binface {
+  vector<TYPE, NUM> values[13];
+};
+
+RWStructuredBuffer<Viface> Input : register(u11);
+RWStructuredBuffer<Viface> Output : register(u12);
+RWStructuredBuffer<Siface> Scales : register(u13);
+RWStructuredBuffer<Liface> Truths : register(u14);
+RWStructuredBuffer<Binface> Bits : register(u15);
+RWStructuredBuffer<vector<uint,13> > Offsets : register(u16);
+
+[shader("compute")]
+[numthreads(8,1,1)]
+// CHECK-LABEL: define void @main
+void main(uint3 GID : SV_GroupThreadID) {
+
+  // CHECK-DAG: [[Input:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 11, i32 11, i32 0, i8 1 }, i32 11
+  // CHECK-DAG: [[Output:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 12, i32 12, i32 0, i8 1 }, i32 12
+  // CHECK-DAG: [[Scales:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 13, i32 13, i32 0, i8 1 }, i32 13
+  // CHECK-DAG: [[Truths:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 14, i32 14, i32 0, i8 1 }, i32 14
+  // INT-DAG: [[Bits:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 15, i32 15, i32 0, i8 1 }, i32 15
+
+  // CHECK: [[InIx1:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0)
+  // CHECK: [[InIx2:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1)
+  // CHECK: [[OutIx:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 2)
+  // CHECK: [[scratch1:%.*]] = alloca [11 x <[[NUM]] x [[TYPE]]>]
+  // CHECK: [[scratch2:%.*]] = alloca [11 x <[[NUM]] x [[TYPE]]>]
+
+  uint InIx1 = GID[0];
+  uint InIx2 = GID[1];
+  uint OutIx = GID[2];
+
+  // Assign vector offsets to capture the expected values.
+  // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 0, i32 0, <13 x i32> <i32 [[OFF0:[0-9]*]], i32 [[OFF1:[0-9]*]], i32 [[OFF2:[0-9]*]], i32 [[OFF3:[0-9]*]], i32 [[OFF4:[0-9]*]], i32 [[OFF5:[0-9]*]], i32 [[OFF6:[0-9]*]], i32 [[OFF7:[0-9]*]], i32 [[OFF8:[0-9]*]], i32 [[OFF9:[0-9]*]], i32 [[OFF10:[0-9]*]], i32 [[OFF11:[0-9]*]], i32 [[OFF12:[0-9]*]]>
+  Offsets[0] = vector<uint,13>(sizeof(vector<TYPE, NUM>)*0,
+                               sizeof(vector<TYPE, NUM>)*1,
+                               sizeof(vector<TYPE, NUM>)*2,
+                               sizeof(vector<TYPE, NUM>)*3,
+                               sizeof(vector<TYPE, NUM>)*4,
+                               sizeof(vector<TYPE, NUM>)*5,
+                               sizeof(vector<TYPE, NUM>)*6,
+                               sizeof(vector<TYPE, NUM>)*7,
+                               sizeof(vector<TYPE, NUM>)*8,
+                               sizeof(vector<TYPE, NUM>)*9,
+                               sizeof(vector<TYPE, NUM>)*10,
+                               sizeof(vector<TYPE, NUM>)*11,
+                               sizeof(vector<TYPE, NUM>)*12);
+
+  // Assign scalar offsets to capture the expected values.
+  // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 1, i32 0, <13 x i32> <i32 [[SOFF0:[0-9]*]], i32 [[SOFF1:[0-9]*]], i32 [[SOFF2:[0-9]*]], i32 [[SOFF3:[0-9]*]], i32 [[SOFF4:[0-9]*]], i32 [[SOFF5:[0-9]*]], i32 [[SOFF6:[0-9]*]], i32 [[SOFF7:[0-9]*]], i32 [[SOFF8:[0-9]*]], i32 [[SOFF9:[0-9]*]], i32 [[SOFF10:[0-9]*]], i32 [[ALN:[0-9]*]], i32 [[IALN:[0-9]*]]>
+  Offsets[1] = vector<uint,13>(sizeof(TYPE)*0,
+                               sizeof(TYPE)*1,
+                               sizeof(TYPE)*2,
+                               sizeof(TYPE)*3,
+                               sizeof(TYPE)*4,
+                               sizeof(TYPE)*5,
+                               sizeof(TYPE)*6,
+                               sizeof(TYPE)*7,
+                               sizeof(TYPE)*8,
+                               sizeof(TYPE)*9,
+                               sizeof(TYPE)*10,
+                               sizeof(TYPE),// Effectively alignof.
+                               sizeof(int));// Effectively integer alignof.
+
+  // Assign boolean offsets to capture the expected values.
+  // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 2, i32 0, <13 x i32> <i32 [[BOFF0:[0-9]*]], i32 [[BOFF1:[0-9]*]], i32 [[BOFF2:[0-9]*]], i32 [[BOFF3:[0-9]*]], i32 [[BOFF4:[0-9]*]], i32 [[BOFF5:[0-9]*]], i32 [[BOFF6:[0-9]*]], i32 [[BOFF7:[0-9]*]], i32 [[BOFF8:[0-9]*]], i32 [[BOFF9:[0-9]*]], i32 [[BOFF10:[0-9]*]], i32 [[BOFF11:[0-9]*]], i32 [[BOFF12:[0-9]*]]>
+  Offsets[2] = vector<uint,13>(sizeof(vector<int,NUM>)*0,
+                               sizeof(vector<int,NUM>)*1,
+                               sizeof(vector<int,NUM>)*2,
+                               sizeof(vector<int,NUM>)*3,
+                               sizeof(vector<int,NUM>)*4,
+                               sizeof(vector<int,NUM>)*5,
+                               sizeof(vector<int,NUM>)*6,
+                               sizeof(vector<int,NUM>)*7,
+                               sizeof(vector<int,NUM>)*8,
+                               sizeof(vector<int,NUM>)*9,
+                               sizeof(vector<int,NUM>)*10,
+                               sizeof(vector<int,NUM>)*11,
+                               sizeof(vector<int,NUM>)*12);
+
+  assignments(Input[InIx1+1].values, Scales[InIx2+1].values);
+  Output[OutIx+2].values = arithmetic(Input[InIx1+2].values);
+  Output[OutIx+3].values = scarithmetic(Input[InIx1+3].values, Scales[InIx2+3].values);
+  Truths[OutIx+4].values = logic(Truths[InIx2+4].values, Input[InIx1+4].values);
+  Output[OutIx+5].values = index(Input[InIx1+5].values, InIx2+5);
+#ifdef INT
+  bittwiddlers(Bits[InIx1+6].values);
+#endif
+}
+
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+void assignments(inout vector<TYPE, NUM> things[11], TYPE scales[10]) {
+
+  // CHECK: [[VcIx:%.*]] = add i32 [[InIx1]], 1
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], i32 [[ALN]])
+  // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], i32 [[ALN]])
+  // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], i32 [[ALN]])
+  // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], i32 [[ALN]])
+  // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], i32 [[ALN]])
+  // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], i32 [[ALN]])
+  // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], i32 [[ALN]])
+  // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], i32 [[ALN]])
+  // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], i32 [[ALN]])
+  // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // CHECK: [[ScIx:%.*]] = add i32 [[InIx2]], 1
+  // CHECK: [[ScHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0
+  // CHECK: [[res0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  things[0] = scales[0];
+
+  // CHECK: [[res1:%[0-9]*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec1]]
+  things[1] += things[5];
+
+  // CHECK: [[res2:%[0-9]*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]]
+  things[2] -= things[6];
+
+  // CHECK: [[res3:%[0-9]*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec7]], [[vec3]]
+  things[3] *= things[7];
+
+  // CHECK: [[res4:%[0-9]*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]]
+  things[4] /= things[8];
+
+#ifdef DBL
+  // DBL can't use remainder operator, do something anyway to keep the rest consistent.
+  // DBL: [[fvec9:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec9]] to <[[NUM]] x float>
+  // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float>
+  // DBL: [[fres5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x float> [[fvec5]], [[fvec9]]
+  // DBL: [[res5:%[0-9]*]] = fpext <[[NUM]] x float> [[fres5]] to <[[NUM]] x double>
+  vector<float,NUM> f9 = (vector<float,NUM>)things[9];
+  vector<float,NUM> f5 = (vector<float,NUM>)things[5];
+  f5 %= f9;
+  things[5] = f5;
+#else
+  // NODBL: [[res5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]]
+  things[5] %= things[9];
+#endif
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0
+  // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res6:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt1]], [[vec6]]
+  things[6] += scales[1];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0
+  // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res7:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec7]], [[spt2]]
+  things[7] -= scales[2];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0
+  // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res8:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt3]], [[vec8]]
+  things[8] *= scales[3];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0
+  // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res9:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec9]], [[spt4]]
+  things[9] /= scales[4];
+
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]])
+
+}
+
+// Test arithmetic operators.
+vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11] {
+  vector<TYPE, NUM> res[11];
+
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 2
+  // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
+  // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 2
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]])
+  // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]])
+  // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]])
+  // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]])
+  // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]])
+  // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]])
+  // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]])
+  // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], i32 [[ALN]])
+  // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], i32 [[ALN]])
+  // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], i32 [[ALN]])
+  // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], i32 [[ALN]])
+  // CHECK: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // NOINT: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> <[[TYPE]] {{-?(0|0\.0*e\+0*|0xH8000),.*}}>, [[vec0]]
+  // INT: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> zeroinitializer, [[vec0]]
+  res[0] = -things[0];
+  res[1] = +things[0];
+
+  // CHECK: [[res2:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec1]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[res3:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[res4:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[res5:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  res[5] = things[4] / things[5];
+
+  // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float>
+#ifdef DBL
+  // DBL can't use remainder operator, do something anyway to keep the rest consistent.
+  // DBL: [[fvec6:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec6]] to <[[NUM]] x float>
+  // DBL: [[fres6:%[0-9]*]] = [[REM]] <[[NUM]] x float> [[fvec5]], [[fvec6]]
+  // DBL: [[res6:%[0-9]*]] = fpext <[[NUM]] x float> [[fres6]] to <[[NUM]] x double>
+  res[6] = (vector<float,NUM>)things[5] % (vector<float,NUM>)things[6];
+#else
+  // NODBL: [[res6:%[0-9]*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[res7:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]], <[[TYPE]] [[POS1:(1|1\.0*e\+0*|0xH3C00)]]
+  res[7] = things[7]++;
+
+  // CHECK: [[res8:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]], <[[TYPE]] [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]]
+  res[8] = things[8]--;
+
+  // CHECK: [[res9:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]], <[[TYPE]] [[POS1]]
+  res[9] = ++things[9];
+
+  // CHECK: [[res10:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]], <[[TYPE]] [[NEG1]]
+  res[10] = --things[10];
+
+  // Things[] input gets all the result values since pre/post inc/decrements don't change the end result.
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]])
+
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]])
+  // res1 is just vec0 since it was just the unary + operator.
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[vec0]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]])
+  // res[] input gets either the original or the preincremented value.
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[vec7]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[vec8]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]])
+
+  return res;
+}
+
+// Test arithmetic operators with scalars.
+vector<TYPE, NUM> scarithmetic(vector<TYPE, NUM> things[11], TYPE scales[10])[11] {
+  vector<TYPE, NUM> res[11];
+
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 3
+  // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
+  // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 3
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]])
+  // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]])
+  // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]])
+  // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]])
+  // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]])
+  // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]])
+  // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]])
+  // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // CHECK: [[SclIx:%.*]] = add i32 [[InIx2]], 3
+  // CHECK: [[SclHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl5:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl6:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0
+  // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt0]], [[vec0]]
+  res[0] = things[0] + scales[0];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0
+  // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res1:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec1]], [[spt1]]
+  res[1] = things[1] - scales[1];
+
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0
+  // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res2:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt2]], [[vec2]]
+  res[2] = things[2] * scales[2];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0
+  // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec3]], [[spt3]]
+  res[3] = things[3] / scales[3];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0
+  // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res4:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt4]], [[vec4]]
+  res[4] = scales[4] + things[4];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl5]], i32 0
+  // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res5:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[spt5]], [[vec5]]
+  res[5] = scales[5] - things[5];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl6]], i32 0
+  // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res6:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt6]], [[vec6]]
+  res[6] = scales[6] * things[6];
+  res[7] = res[8] = res[9] = res[10] = 0;
+
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]])
+
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> consequences[11])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 4
+  // CHECK: [[TruHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Truths]]
+  // CHECK: [[TruIx:%.*]] = add i32 [[InIx2]], 4
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF0]], i32 [[IALN]])
+  // CHECK: [[ivec0:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF1]], i32 [[IALN]])
+  // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF2]], i32 [[IALN]])
+  // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF3]], i32 [[IALN]])
+  // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF4]], i32 [[IALN]])
+  // CHECK: [[ivec4:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF5]], i32 [[IALN]])
+  // CHECK: [[ivec5:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+
+  // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 4
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]])
+  // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]])
+  // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]])
+  // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]])
+  // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]])
+  // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]])
+  // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]])
+  // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+
+  // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec0]], zeroinitializer
+  // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32>
+  res[0] = !truth[0];
+
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec1]], zeroinitializer
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec2]], zeroinitializer
+  // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]]
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec3]], zeroinitializer
+  // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]]
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec4]], zeroinitializer
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec5]], zeroinitializer
+  // CHECK: [[bres3:%[0-9]*]] = select <[[NUM]] x i1> [[bvec3]], <[[NUM]] x i1> [[bvec4]], <[[NUM]] x i1> [[bvec5]]
+  // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[cmp4:%[0-9]*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]]
+  // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp4]] to <[[NUM]] x i32>
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[cmp5:%[0-9]*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]]
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp5]] to <[[NUM]] x i32>
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[cmp6:%[0-9]*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp6]] to <[[NUM]] x i32>
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[cmp7:%[0-9]*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]]
+  // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp7]] to <[[NUM]] x i32>
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[cmp8:%[0-9]*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp8]] to <[[NUM]] x i32>
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[cmp9:%[0-9]*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp9]] to <[[NUM]] x i32>
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF0]], <[[NUM]] x i32> [[res0]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF1]], <[[NUM]] x i32> [[res1]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF2]], <[[NUM]] x i32> [[res2]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF3]], <[[NUM]] x i32> [[res3]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF4]], <[[NUM]] x i32> [[res4]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF5]], <[[NUM]] x i32> [[res5]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF6]], <[[NUM]] x i32> [[res6]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF7]], <[[NUM]] x i32> [[res7]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF8]], <[[NUM]] x i32> [[res8]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF9]], <[[NUM]] x i32> [[res9]], i32 4)
+
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+vector<TYPE, NUM> index(vector<TYPE, NUM> things[11], int i)[11] {
+  vector<TYPE, NUM> res[11];
+
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 5
+  // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
+  // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 5
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]])
+  // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 1
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]])
+  // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec1]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 2
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]])
+  // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec2]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 3
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]])
+  // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec3]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 4
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]])
+  // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec4]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 5
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]])
+  // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec5]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 6
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]])
+  // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec6]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 7
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], i32 [[ALN]])
+  // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 8
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], i32 [[ALN]])
+  // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 9
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], i32 [[ALN]])
+  // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec9]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 10
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], i32 [[ALN]])
+  // CHECK: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec10]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+
+  // CHECK: [[Ix:%.*]] = add i32 [[InIx2]], 5
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[adr0]], align [[ALN]]
+  res[0] = 0;
+
+
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 [[Ix]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] [[POS1]],{{[^>]*}}>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  res[i] = 1;
+
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] [[TWO:(2|2\.?0*e?\+?0*|0xH4000)]],{{[^>]*}}>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  res[3] = things[0];
+
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 [[Ix]]
+  // CHECK: [[ldix:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[ldix]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  res[4] = things[i];
+
+
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec2]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  res[5] = things[Ix];
+
+  // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 0, <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 1
+  // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> <[[TYPE]] [[TWO]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[vec0]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[ldix]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[vec2]], i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 6
+  // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 7
+  // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 8
+  // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 9
+  // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 10
+  // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]])
+
+  return res;
+}
+
+#ifdef INT
+// Test bit twiddling operators.
+void bittwiddlers(inout vector<TYPE, NUM> things[13]) {
+  // INT: [[VcIx:%.*]] = add i32 [[InIx1]], 6
+  // INT: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Bits]]
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], i32 [[ALN]])
+  // INT: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], i32 [[ALN]])
+  // INT: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], i32 [[ALN]])
+  // INT: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], i32 [[ALN]])
+  // INT: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], i32 [[ALN]])
+  // INT: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], i32 [[ALN]])
+  // INT: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], i32 [[ALN]])
+  // INT: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], i32 [[ALN]])
+  // INT: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], i32 [[ALN]])
+  // INT: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF10]], i32 [[ALN]])
+  // INT: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF11]], i32 [[ALN]])
+  // INT: [[vec11:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF12]], i32 [[ALN]])
+  // INT: [[vec12:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // INT: [[res0:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec1]], <[[TYPE]] -1
+  things[0] = ~things[1];
+
+  // INT: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]]
+  things[1] = things[2] | things[3];
+
+  // INT: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]]
+  things[2] = things[3] & things[4];
+
+  // INT: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  things[3] = things[4] ^ things[5];
+
+  // INT: [[shv6:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec6]]
+  // INT: [[res4:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[vec5]], [[shv6]]
+  things[4] = things[5] << things[6];
+
+  // INT: [[shv7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec7]]
+  // UNSIG: [[res5:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]]
+  // SIG: [[res5:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]]
+  things[5] = things[6] >> things[7];
+
+  // INT: [[res6:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec8]], [[vec6]]
+  things[6] |= things[8];
+
+  // INT: [[res7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec9]], [[vec7]]
+  things[7] &= things[9];
+
+  // INT: [[res8:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec8]], [[vec10]]
+  things[8] ^= things[10];
+
+  // INT: [[shv11:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec11]]
+  // INT: [[res9:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[vec9]], [[shv11]]
+  things[9] <<= things[11];
+
+  // INT: [[shv12:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec12]]
+  // UNSIG: [[res10:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[vec10]], [[shv12]]
+  // SIG: [[res10:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec10]], [[shv12]]
+  things[10] >>= things[12];
+
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF11]], <[[NUM]] x [[TYPE]]> [[vec11]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF12]], <[[NUM]] x [[TYPE]]> [[vec12]], i32 [[ALN]])
+
+  // CHECK-LABEL: ret void
+}
+#endif // INT
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl
new file mode 100644
index 0000000000..ca239a5b22
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl
@@ -0,0 +1,680 @@
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float          %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int      -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=double   -DDBL %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float16_t      -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int16_t  -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+
+// Scalar variants to confirm they match.
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=float          %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=int      -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=double   -DDBL %s | FileCheck %s
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=float16_t      -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=int16_t  -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+
+// Linking tests.
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -Fo %t.1 %s
+// RUN: %dxl -T cs_6_9 %t.1 | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DDBL -Fo %t.2 %s
+// RUN: %dxl -T cs_6_9 %t.2 | FileCheck %s --check-prefixes=CHECK,DBL,NOINT
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t -DINT -enable-16bit-types -Fo %t.3 %s
+// RUN: %dxl -T cs_6_9 %t.3 | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+
+// Test relevant operators on vec1s in a 6.9 compute shader to ensure they continue to be treated as scalars.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// CHECK-DAG: %dx.types.ResRet.[[TY:[a-z][0-9]*]] = type { [[TYPE:[a-z0-9_]*]]
+// CHECK-DAG: %dx.types.ResRet.[[ITY:i32]] = type { i32
+
+#ifdef SCL
+#define VTYPE TYPE
+#else
+#define VTYPE vector<TYPE, 1>
+#endif
+
+void assignments(inout VTYPE things[11], TYPE scales[10]);
+VTYPE arithmetic(inout VTYPE things[11])[11];
+VTYPE scarithmetic(VTYPE things[11], TYPE scales[10])[11];
+bool1 logic(bool1 truth[10], VTYPE consequences[11])[10];
+VTYPE index(VTYPE things[11], int i)[11];
+void bittwiddlers(inout VTYPE things[13]);
+
+struct Viface {
+  VTYPE values[11];
+};
+
+struct Siface {
+  TYPE values[10];
+};
+
+struct Liface {
+  bool1 values[10];
+};
+
+struct Binface {
+  VTYPE values[13];
+};
+
+RWStructuredBuffer<Viface> Input  : register(u11);
+RWStructuredBuffer<Viface> Output : register(u12);
+RWStructuredBuffer<Siface> Scales : register(u13);
+RWStructuredBuffer<Liface> Truths : register(u14);
+RWStructuredBuffer<Binface> Bits  : register(u15);
+RWStructuredBuffer<vector<uint,13> > Offsets : register(u16);
+
+[shader("compute")]
+[numthreads(8,1,1)]
+// CHECK-LABEL: define void @main
+void main(uint3 GID : SV_GroupThreadID) {
+
+  // CHECK-DAG: [[Input:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 11, i32 11, i32 0, i8 1 }, i32 11
+  // CHECK-DAG: [[Output:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 12, i32 12, i32 0, i8 1 }, i32 12
+  // CHECK-DAG: [[Scales:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 13, i32 13, i32 0, i8 1 }, i32 13
+  // CHECK-DAG: [[Truths:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 14, i32 14, i32 0, i8 1 }, i32 14
+  // INT-DAG: [[Bits:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 15, i32 15, i32 0, i8 1 }, i32 15
+
+  // CHECK: [[InIx1:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0)
+  // CHECK: [[InIx2:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1)
+  // CHECK: [[OutIx:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 2)
+
+  uint InIx1 = GID[0];
+  uint InIx2 = GID[1];
+  uint OutIx = GID[2];
+
+  // Assign vector offsets to capture the expected values.
+  // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 0, i32 0, <13 x i32> <i32 [[OFF0:[0-9]*]], i32 [[OFF1:[0-9]*]], i32 [[OFF2:[0-9]*]], i32 [[OFF3:[0-9]*]], i32 [[OFF4:[0-9]*]], i32 [[OFF5:[0-9]*]], i32 [[OFF6:[0-9]*]], i32 [[OFF7:[0-9]*]], i32 [[OFF8:[0-9]*]], i32 [[OFF9:[0-9]*]], i32 [[OFF10:[0-9]*]], i32 [[OFF11:[0-9]*]], i32 [[OFF12:[0-9]*]]>
+  Offsets[0] = vector<uint,13>(sizeof(TYPE)*0,
+                               sizeof(TYPE)*1,
+                               sizeof(TYPE)*2,
+                               sizeof(TYPE)*3,
+                               sizeof(TYPE)*4,
+                               sizeof(TYPE)*5,
+                               sizeof(TYPE)*6,
+                               sizeof(TYPE)*7,
+                               sizeof(TYPE)*8,
+                               sizeof(TYPE)*9,
+                               sizeof(TYPE)*10,
+                               sizeof(TYPE)*11,
+                               sizeof(TYPE)*12);
+
+  // Assign boolean offsets to capture the expected values.
+  // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 1, i32 0, <13 x i32> <i32 [[BOFF0:[0-9]*]], i32 [[BOFF1:[0-9]*]], i32 [[BOFF2:[0-9]*]], i32 [[BOFF3:[0-9]*]], i32 [[BOFF4:[0-9]*]], i32 [[BOFF5:[0-9]*]], i32 [[BOFF6:[0-9]*]], i32 [[BOFF7:[0-9]*]], i32 [[BOFF8:[0-9]*]], i32 [[BOFF9:[0-9]*]], i32 [[BOFF10:[0-9]*]], i32 [[ALN:[0-9]*]], i32 [[IALN:[0-9]*]]>
+  Offsets[1] = vector<uint,13>(sizeof(int)*0,
+                               sizeof(int)*1,
+                               sizeof(int)*2,
+                               sizeof(int)*3,
+                               sizeof(int)*4,
+                               sizeof(int)*5,
+                               sizeof(int)*6,
+                               sizeof(int)*7,
+                               sizeof(int)*8,
+                               sizeof(int)*9,
+                               sizeof(int)*10,
+                               sizeof(TYPE),// Effectively alignof.
+                               sizeof(int));// Effectively integer alignof.
+
+  assignments(Input[InIx1+1].values, Scales[InIx2+1].values);
+  Output[OutIx+2].values = arithmetic(Input[InIx1+2].values);
+  Output[OutIx+3].values = scarithmetic(Input[InIx1+3].values, Scales[InIx2+3].values);
+  Truths[OutIx+4].values = logic(Truths[InIx2+4].values, Input[InIx1+4].values);
+  Output[OutIx+5].values = index(Input[InIx1+5].values, InIx2+5);
+#ifdef INT
+  bittwiddlers(Bits[InIx1+6].values);
+#endif
+}
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+void assignments(inout VTYPE things[11], TYPE scales[10]) {
+
+  // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 1
+
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], i8 1, i32 [[ALN]])
+  // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], i8 1, i32 [[ALN]])
+  // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], i8 1, i32 [[ALN]])
+  // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], i8 1, i32 [[ALN]])
+  // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+
+  // CHECK: [[ScIx:%.*]] = add i32 [[InIx2]], 1
+  // CHECK: [[ScHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // Nothing to check. Just a copy over.
+  things[0] = scales[0];
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]]{{( nsw)?}} [[TYPE]] [[val5]], [[val1]]
+  things[1] += things[5];
+
+  // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]]{{( nsw)?}} [[TYPE]] [[val2]], [[val6]]
+  things[2] -= things[6];
+
+  // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]]{{( nsw)?}} [[TYPE]] [[val7]], [[val3]]
+  things[3] *= things[7];
+
+  // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]]{{( nsw)?}} [[TYPE]] [[val4]], [[val8]]
+  things[4] /= things[8];
+
+#ifdef DBL
+  things[5] = 0; // Gotta give it something in any case for validation.
+#else
+  // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[TYPE]] [[val5]], [[val9]]
+  things[5] %= things[9];
+#endif
+
+  // CHECK: [[res6:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl1]], [[val6]]
+  things[6] += scales[1];
+
+  // CHECK: [[res7:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val7]], [[scl2]]
+  things[7] -= scales[2];
+
+  // CHECK: [[res8:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl3]], [[val8]]
+  things[8] *= scales[3];
+
+  // CHECK: [[res9:%[0-9]*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val9]], [[scl4]]
+  things[9] /= scales[4];
+
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], [[TYPE]] [[scl0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // NODBL: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], [[TYPE]] [[val10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+}
+
+// Test arithmetic operators.
+VTYPE arithmetic(inout VTYPE things[11])[11] {
+  TYPE res[11];
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 2
+  // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
+  // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 2
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  res[0] = +things[0];
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], i8 1, i32 [[ALN]])
+  // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], i8 1, i32 [[ALN]])
+  // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], i8 1, i32 [[ALN]])
+  // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], i8 1, i32 [[ALN]])
+  // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+
+  // CHECK: [[res1:%.*]] = [[SUB]]{{( nsw)?}} [[TYPE]] {{-?(0|0\.?0*e?\+?0*|0xH8000)}}, [[val0]]
+  res[1] = -things[0];
+
+  // CHECK: [[res2:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val2]], [[val1]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[res3:%.*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val2]], [[val3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[res4:%.*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[val4]], [[val3]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[res5:%.*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val4]], [[val5]]
+  res[5] = things[4] / things[5];
+
+#ifdef DBL
+  res[6] = 0; // Gotta give it something in any case for validation.
+#else
+  // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[val5]], [[val6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[res7:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val7]], [[POS1:(1|1\.0*e\+0*|0xH3C00)]]
+  res[7] = things[7]++;
+
+  // CHECK: [[res8:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val8]], [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]]
+  res[8] = things[8]--;
+
+  // CHECK: [[res9:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val9]], [[POS1]]
+  res[9] = ++things[9];
+
+  // CHECK: [[res10:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val10]], [[NEG1]]
+  res[10] = --things[10];
+
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], [[TYPE]] [[val1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], [[TYPE]] [[val2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], [[TYPE]] [[val3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], [[TYPE]] [[val4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], [[TYPE]] [[val5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], [[TYPE]] [[val6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // NODBL: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // Postincrement/decrements get the original value.
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], [[TYPE]] [[val7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], [[TYPE]] [[val8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+  return res;
+}
+
+// Test arithmetic operators with scalars.
+VTYPE scarithmetic(VTYPE things[11], TYPE scales[10])[11] {
+  VTYPE res[11];
+
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 3
+  // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
+  // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 3
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // CHECK: [[SclIx:%.*]] = add i32 [[InIx2]], 3
+  // CHECK: [[SclHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // CHECK: [[res0:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl0]], [[val0]]
+  res[0] = things[0] + scales[0];
+
+  // CHECK: [[res1:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val1]], [[scl1]]
+  res[1] = things[1] - scales[1];
+
+  // CHECK: [[res2:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl2]], [[val2]]
+  res[2] = things[2] * scales[2];
+
+  // CHECK: [[res3:%[0-9]*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val3]], [[scl3]]
+  res[3] = things[3] / scales[3];
+
+  // CHECK: [[res4:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl4]], [[val4]]
+  res[4] = scales[4] + things[4];
+
+  // CHECK: [[res5:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[scl5]], [[val5]]
+  res[5] = scales[5] - things[5];
+
+  // CHECK: [[res6:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl6]], [[val6]]
+  res[6] = scales[6] * things[6];
+  res[7] = res[8] = res[9] = res[10] = 0;
+
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], [[TYPE]] [[res0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+  return res;
+}
+
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+bool1 logic(bool1 truth[10], VTYPE consequences[11])[10] {
+  bool1 res[10];
+
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 4
+  // CHECK: [[TruHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Truths]]
+  // CHECK: [[TruIx:%.*]] = add i32 [[InIx2]], 4
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF0]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival0:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF1]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF2]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF3]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF4]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival4:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF5]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival5:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+
+  // CHECK: [[valIx:%.*]] = add i32 [[InIx1]], 4
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+
+  // CHECK: [[bres0:%.*]] = icmp eq i32 [[ival0]], 0
+  // CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32
+  res[0] = !truth[0];
+
+  // CHECK: [[res1:%.*]] = or i32 [[ival2]], [[ival1]]
+  // CHECK: [[bres1:%.*]] = icmp ne i32 [[res1]], 0
+  // CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[bval2:%.*]] = icmp ne i32 [[ival2]], 0
+  // CHECK: [[bval3:%.*]] = icmp ne i32 [[ival3]], 0
+  // CHECK: [[bres2:%.*]] = and i1 [[bval2]], [[bval3]]
+  // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[bval4:%.*]] = icmp ne i32 [[ival4]], 0
+  // CHECK: [[bval5:%.*]] = icmp ne i32 [[ival5]], 0
+  // CHECK: [[bres3:%.*]] = select i1 [[bval3]], i1 [[bval4]], i1 [[bval5]]
+  // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[TYPE]] [[val0]], [[val1]]
+  // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[val1]], [[val2]]
+  // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[val2]], [[val3]]
+  // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[val3]], [[val4]]
+  // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[val4]], [[val5]]
+  // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[val5]], [[val6]]
+  // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF0]], i32 [[res0]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF1]], i32 [[res1]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF2]], i32 [[res2]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF3]], i32 [[res3]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF4]], i32 [[res4]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF5]], i32 [[res5]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF6]], i32 [[res6]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF7]], i32 [[res7]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF8]], i32 [[res8]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF9]], i32 [[res9]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+VTYPE index(VTYPE things[11], int i)[11] {
+  VTYPE res[11];
+
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 5
+  // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
+  // CHECK: [[valIx:%.*]] = add i32 [[InIx1]], 5
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1:%.*]], i32 0, i32 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val0]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 1
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val1]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 2
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val2]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 3
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val3]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 4
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val4]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 5
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val5]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 6
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val6]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 7
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF7]], i8 1, i32 [[ALN]])
+  // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val7]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 8
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF8]], i8 1, i32 [[ALN]])
+  // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val8]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 9
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF9]], i8 1, i32 [[ALN]])
+  // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val9]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 10
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF10]], i8 1, i32 [[ALN]])
+  // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val10]], [[TYPE]]* [[adr]], align [[ALN]]
+
+  // CHECK: [[Ix:%.*]] = add i32 [[InIx2]], 5
+
+  // CHECK: [[adr0:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2:%.*]], i32 0, i32 0
+  // CHECK: store [[TYPE]] {{(0|0\.?0*e?\+?0*|0xH0000)}}, [[TYPE]]* [[adr0]], align [[ALN]]
+  res[0] = 0;
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 [[Ix]]
+  // CHECK: store [[TYPE]] [[POS1]], [[TYPE]]* [[adr]]
+  res[i] = 1;
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 2
+  // CHECK: store [[TYPE]] [[TWO:(2|2\.?0*e?\+?0*|0xH4000)]], [[TYPE]]* [[adr]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 3
+  // CHECK: store [[TYPE]] [[val0]], [[TYPE]]* [[adr]]
+  res[3] = things[0];
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 [[Ix]]
+  // CHECK: [[vali:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 4
+  // CHECK: store [[TYPE]] [[vali]], [[TYPE]]* [[adr]]
+  res[4] = things[i];
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 5
+  // CHECK: store [[TYPE]] [[val2]], [[TYPE]]* [[adr]]
+  res[5] = things[Ix];
+
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 0, [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 1
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[TWO]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[vali]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[val2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 6
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 7
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 8
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 9
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 10
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+  return res;
+}
+
+#ifdef INT
+// Test bit twiddling operators.
+void bittwiddlers(inout VTYPE things[13]) {
+  // INT: [[ValIx:%.*]] = add i32 [[InIx1]], 6
+  // INT: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Bits]]
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // INT: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // INT: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // INT: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // INT: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // INT: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // INT: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF7]], i8 1, i32 [[ALN]])
+  // INT: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF8]], i8 1, i32 [[ALN]])
+  // INT: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF9]], i8 1, i32 [[ALN]])
+  // INT: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF10]], i8 1, i32 [[ALN]])
+  // INT: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF11]], i8 1, i32 [[ALN]])
+  // INT: [[val11:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF12]], i8 1, i32 [[ALN]])
+  // INT: [[val12:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // INT: [[res0:%[0-9]*]] = xor [[TYPE]] [[val1]], -1
+  things[0] = ~things[1];
+
+  // INT: [[res1:%[0-9]*]] = or [[TYPE]] [[val3]], [[val2]]
+  things[1] = things[2] | things[3];
+
+  // INT: [[res2:%[0-9]*]] = and [[TYPE]] [[val4]], [[val3]]
+  things[2] = things[3] & things[4];
+
+  // INT: [[res3:%[0-9]*]] = xor [[TYPE]] [[val5]], [[val4]]
+  things[3] = things[4] ^ things[5];
+
+  // INT: [[shv6:%[0-9]*]] = and [[TYPE]] [[val6]]
+  // INT: [[res4:%[0-9]*]] = shl [[TYPE]] [[val5]], [[shv6]]
+  things[4] = things[5] << things[6];
+
+  // INT: [[shv7:%[0-9]*]] = and [[TYPE]] [[val7]]
+  // UNSIG: [[res5:%[0-9]*]] = lshr [[TYPE]] [[val6]], [[shv7]]
+  // SIG: [[res5:%[0-9]*]] = ashr [[TYPE]] [[val6]], [[shv7]]
+  things[5] = things[6] >> things[7];
+
+  // INT: [[res6:%[0-9]*]] = or [[TYPE]] [[val8]], [[val6]]
+  things[6] |= things[8];
+
+  // INT: [[res7:%[0-9]*]] = and [[TYPE]] [[val9]], [[val7]]
+  things[7] &= things[9];
+
+  // INT: [[res8:%[0-9]*]] = xor [[TYPE]] [[val10]], [[val8]]
+  things[8] ^= things[10];
+
+  // INT: [[shv11:%[0-9]*]] = and [[TYPE]] [[val11]]
+  // INT: [[res9:%[0-9]*]] = shl [[TYPE]] [[val9]], [[shv11]]
+  things[9] <<= things[11];
+
+  // INT: [[shv12:%[0-9]*]] = and [[TYPE]] [[val12]]
+  // UNSIG: [[res10:%[0-9]*]] = lshr [[TYPE]] [[val10]], [[shv12]]
+  // SIG: [[res10:%[0-9]*]] = ashr [[TYPE]] [[val10]], [[shv12]]
+  things[10] >>= things[12];
+
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF0]], [[TYPE]] [[res0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF11]], [[TYPE]] [[val11]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF12]], [[TYPE]] [[val12]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+  // CHECK-LABEL: ret void
+}
+#endif // INT
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
index c366261406..44c9be17d4 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
@@ -1,51 +1,23 @@
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float1          %s | FileCheck %s --check-prefixes=CHECK,NODBL
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int1      -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double1   -DDBL %s | FileCheck %s --check-prefixes=CHECK
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t1 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t1      -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
-// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t1  -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float          %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int      -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double   -DDBL %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t      -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t  -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
 
-// Test relevant operators on an assortment bool vector sizes and types with 6.9 native vectors.
+// Test relevant operators on vec1s in 6.9 to ensure they continue to be treated as scalars.
+
+#define VTYPE vector<TYPE, 1>
 
 // Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
 // CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[ELTY:[a-z0-9_]*]]
 // CHECK: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:.*]] }
-RWStructuredBuffer<TYPE> buf;
-
-export void assignments(inout TYPE things[10], TYPE scales[10]);
-export TYPE arithmetic(inout TYPE things[11])[11];
-export bool logic(bool truth[10], TYPE consequences[10])[10];
-export TYPE index(TYPE things[10], int i, TYPE val)[10];
-
-struct Interface {
-  TYPE assigned[10];
-  TYPE arithmeticked[11];
-  bool logicked[10];
-  TYPE indexed[10];
-  TYPE scales[10];
-};
-
-#if 0
-// Requires vector loading support. Enable when available.
-RWStructuredBuffer<Interface> Input;
-RWStructuredBuffer<Interface> Output;
-
-TYPE g_val;
-
-[shader("compute")]
-[numthreads(8,1,1)]
-void main(uint GI : SV_GroupIndex) {
-  assignments(Output[GI].assigned, Input[GI].scales);
-  Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked);
-  Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned);
-  Output[GI].indexed = index(Input[GI].indexed, GI, g_val);
-}
-#endif
+RWStructuredBuffer<VTYPE> buf;
 
 // A mixed-type overload to test overload resolution and mingle different vector element types in ops
 // Test assignment operators.
 // CHECK-LABEL: define void @"\01?assignments
-export void assignments(inout TYPE things[10]) {
+export void assignments(inout VTYPE things[10]) {
 
   // CHECK: [[buf:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle {{%.*}}, i32 1, i32 0, i8 1, i32 {{8|4|2}})
   // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[buf]], 0
@@ -111,8 +83,8 @@ export void assignments(inout TYPE things[10]) {
 
 // Test arithmetic operators.
 // CHECK-LABEL: define void @"\01?arithmetic
-export TYPE arithmetic(inout TYPE things[11])[11] {
-  TYPE res[11];
+export VTYPE arithmetic(inout VTYPE things[11])[11] {
+  VTYPE res[11];
   // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0
   // CHECK: [[res0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
   // CHECK: [[val0:%.*]] = extractelement [[TYPE]] [[res0]], i32 0
@@ -226,7 +198,7 @@ export TYPE arithmetic(inout TYPE things[11])[11] {
 // Test logic operators.
 // Only permissable in pre-HLSL2021
 // CHECK-LABEL: define void @"\01?logic
-export bool logic(bool truth[10], TYPE consequences[10])[10] {
+export bool logic(bool truth[10], VTYPE consequences[10])[10] {
   bool res[10];
   // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
   // CHECK: [[val0:%.*]] = load i32, i32* [[adr0]]
@@ -332,9 +304,9 @@ static const int Ix = 2;
 
 // Test indexing operators
 // CHECK-LABEL: define void @"\01?index
-export TYPE index(TYPE things[10], int i)[10] {
+export VTYPE index(VTYPE things[10], int i)[10] {
   // CHECK: [[res:%.*]] = alloca [10 x [[ELTY]]]
-  TYPE res[10];
+  VTYPE res[10];
 
   // CHECK: [[res0:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 0
   // CHECK: store [[ELTY]] {{(0|0*\.?0*e?\+?0*|0xH0000)}}, [[ELTY]]* [[res0]]
@@ -375,7 +347,7 @@ export TYPE index(TYPE things[10], int i)[10] {
 #ifdef INT
 // Test bit twiddling operators.
 // INT-LABEL: define void @"\01?bittwiddlers
-export void bittwiddlers(inout TYPE things[13]) {
+export void bittwiddlers(inout VTYPE things[13]) {
   // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 1
   // INT: [[ld1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
   // INT: [[val1:%[0-9]*]] = extractelement [[TYPE]] [[ld1]], i32 0
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
index ed7a2bff25..ba76eca619 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
@@ -48,24 +48,6 @@ struct Interface {
   TYPE scales[10];
 };
 
-#if 0
-// Requires vector loading support. Enable when available.
-RWStructuredBuffer<Interface> Input;
-RWStructuredBuffer<Interface> Output;
-
-TYPE g_val;
-
-[shader("compute")]
-[numthreads(8,1,1)]
-void main(uint GI : SV_GroupIndex) {
-  assignments(Output[GI].assigned, Input[GI].scales);
-  Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked);
-  Output[GI].scarithmeticked = scarithmetic(Input[GI].scarithmeticked, Input[GI].scales);
-  Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned);
-  Output[GI].indexed = index(Input[GI].indexed, GI, g_val);
-}
-#endif
-
 // A mixed-type overload to test overload resolution and mingle different vector element types in ops
 // Test assignment operators.
 // CHECK-LABEL: define void @"\01?assignments
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll
new file mode 100644
index 0000000000..f9a9b3d677
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll
@@ -0,0 +1,478 @@
+; RUN: %dxopt %s -hlsl-passes-resume -hlsl-dxil-scalarize-vector-load-stores -S | FileCheck %s
+
+; Verify that scalarize vector load stores pass will convert raw buffer vector operations
+; into the equivalent collection of scalar load store calls.
+; Sourced from buffer-load-stors-sm69.hlsl.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.ResRet.v17f32 = type { <17 x float>, i32 }
+%struct.ByteAddressBuffer = type { i32 }
+%"class.StructuredBuffer<vector<float, 17> >" = type { <17 x float> }
+%struct.RWByteAddressBuffer = type { i32 }
+%"class.RWStructuredBuffer<vector<float, 17> >" = type { <17 x float> }
+%"class.ConsumeStructuredBuffer<vector<float, 17> >" = type { <17 x float> }
+%"class.AppendStructuredBuffer<vector<float, 17> >" = type { <17 x float> }
+
+@"\01?RoByBuf@@3UByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4
+@"\01?RwByBuf@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4
+@"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4
+@"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4
+@"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4
+@"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4
+
+define void @main() {
+bb:
+  %tmp = load %dx.types.Handle, %dx.types.Handle* @"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A", align 4
+  %tmp1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RoByBuf@@3UByteAddressBuffer@@A", align 4
+  %tmp2 = load %dx.types.Handle, %dx.types.Handle* @"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4
+  %tmp3 = load %dx.types.Handle, %dx.types.Handle* @"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4
+  %tmp4 = load %dx.types.Handle, %dx.types.Handle* @"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4
+  %tmp5 = load %dx.types.Handle, %dx.types.Handle* @"\01?RwByBuf@@3URWByteAddressBuffer@@A", align 4
+  %tmp6 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %tmp7 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp5)
+  %tmp8 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix1]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix2:%.*]] = add i32 [[ix1]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix2]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix3:%.*]] = add i32 [[ix2]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix3]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix4:%.*]] = add i32 [[ix3]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix4]], i32 undef, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp9 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, i32 4)
+  %tmp10 = extractvalue %dx.types.ResRet.v17f32 %tmp9, 0
+  %tmp11 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp1)
+  %tmp12 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp11, %dx.types.ResourceProperties { i32 11, i32 0 })
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 %tmp6, i32 undef, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix1]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix2:%.*]] = add i32 [[ix1]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix2]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix3:%.*]] = add i32 [[ix2]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix3]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix4:%.*]] = add i32 [[ix3]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix4]], i32 undef, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp13 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp12, i32 %tmp6, i32 undef, i32 4)
+  %tmp14 = extractvalue %dx.types.ResRet.v17f32 %tmp13, 0
+  %tmp15 = fadd fast <17 x float> %tmp14, %tmp10
+
+  ; CHECK: [[val0:%.*]] = extractelement <17 x float> %tmp15, i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <17 x float> %tmp15, i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <17 x float> %tmp15, i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <17 x float> %tmp15, i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4)
+  ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16
+  ; CHECK: [[val4:%.*]] = extractelement <17 x float> %tmp15, i64 4
+  ; CHECK: [[val5:%.*]] = extractelement <17 x float> %tmp15, i64 5
+  ; CHECK: [[val6:%.*]] = extractelement <17 x float> %tmp15, i64 6
+  ; CHECK: [[val7:%.*]] = extractelement <17 x float> %tmp15, i64 7
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix1]], i32 undef, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4)
+  ; CHECK: [[ix2:%.*]] = add i32 %80, 16
+  ; CHECK: [[val8:%.*]] = extractelement <17 x float> %tmp15, i64 8
+  ; CHECK: [[val9:%.*]] = extractelement <17 x float> %tmp15, i64 9
+  ; CHECK: [[val10:%.*]] = extractelement <17 x float> %tmp15, i64 10
+  ; CHECK: [[val11:%.*]] = extractelement <17 x float> %tmp15, i64 11
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix2]], i32 undef, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4)
+  ; CHECK: [[ix3:%.*]] = add i32 %85, 16
+  ; CHECK: [[val12:%.*]] = extractelement <17 x float> %tmp15, i64 12
+  ; CHECK: [[val13:%.*]] = extractelement <17 x float> %tmp15, i64 13
+  ; CHECK: [[val14:%.*]] = extractelement <17 x float> %tmp15, i64 14
+  ; CHECK: [[val15:%.*]] = extractelement <17 x float> %tmp15, i64 15
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix3]], i32 undef, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4)
+  ; CHECK: [[ix4:%.*]] = add i32 %90, 16
+  ; CHECK: [[val16:%.*]] = extractelement <17 x float> %tmp15, i64 16
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix4]], i32 undef, float [[val16]], float undef, float undef, float undef, i8 1, i32 4)
+  call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, <17 x float> %tmp15, i32 4)
+  %tmp16 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp4)
+  %tmp17 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp16, %dx.types.ResourceProperties { i32 4108, i32 68 })
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 16, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 32, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 48, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 64, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp18 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, i32 4)
+  %tmp19 = extractvalue %dx.types.ResRet.v17f32 %tmp18, 0
+  %tmp20 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 1, i8 0, i32 undef)
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 16, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 32, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 48, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 64, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp21 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp17, i32 %tmp20, i32 0, i32 4)
+  %tmp22 = extractvalue %dx.types.ResRet.v17f32 %tmp21, 0
+  %tmp23 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp)
+  %tmp24 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp23, %dx.types.ResourceProperties { i32 12, i32 68 })
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 16, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 32, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 48, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 64, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp25 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp24, i32 %tmp6, i32 0, i32 4)
+  %tmp26 = extractvalue %dx.types.ResRet.v17f32 %tmp25, 0
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 16, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 32, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 48, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 64, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp27 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp24, i32 %tmp20, i32 0, i32 4)
+  %tmp28 = extractvalue %dx.types.ResRet.v17f32 %tmp27, 0
+  %tmp29 = fadd fast <17 x float> %tmp22, %tmp19
+  %tmp30 = fadd fast <17 x float> %tmp29, %tmp26
+  %tmp31 = fadd fast <17 x float> %tmp30, %tmp28
+
+  ; CHECK: [[val0:%.*]] = extractelement <17 x float> %tmp31, i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <17 x float> %tmp31, i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <17 x float> %tmp31, i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <17 x float> %tmp31, i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractelement <17 x float> %tmp31, i64 4
+  ; CHECK: [[val5:%.*]] = extractelement <17 x float> %tmp31, i64 5
+  ; CHECK: [[val6:%.*]] = extractelement <17 x float> %tmp31, i64 6
+  ; CHECK: [[val7:%.*]] = extractelement <17 x float> %tmp31, i64 7
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 16, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractelement <17 x float> %tmp31, i64 8
+  ; CHECK: [[val9:%.*]] = extractelement <17 x float> %tmp31, i64 9
+  ; CHECK: [[val10:%.*]] = extractelement <17 x float> %tmp31, i64 10
+  ; CHECK: [[val11:%.*]] = extractelement <17 x float> %tmp31, i64 11
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 32, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractelement <17 x float> %tmp31, i64 12
+  ; CHECK: [[val13:%.*]] = extractelement <17 x float> %tmp31, i64 13
+  ; CHECK: [[val14:%.*]] = extractelement <17 x float> %tmp31, i64 14
+  ; CHECK: [[val15:%.*]] = extractelement <17 x float> %tmp31, i64 15
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 48, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4)
+  ; CHECK: [[val16:%.*]] = extractelement <17 x float> %tmp31, i64 16
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 64, float [[val16]], float undef, float undef, float undef, i8 1, i32 4)
+  call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, <17 x float> %tmp31, i32 4)
+  %tmp32 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp3)
+  %tmp33 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp32, %dx.types.ResourceProperties { i32 36876, i32 68 })
+  %tmp34 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %tmp33, i8 -1)
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 16, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 32, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 48, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 64, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp35 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp33, i32 %tmp34, i32 0, i32 4)
+  %tmp36 = extractvalue %dx.types.ResRet.v17f32 %tmp35, 0
+  %tmp37 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp2)
+  %tmp38 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp37, %dx.types.ResourceProperties { i32 36876, i32 68 })
+  %tmp39 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %tmp38, i8 1)
+
+  ; CHECK: [[val0:%.*]] = extractelement <17 x float> [[vec16]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <17 x float> [[vec16]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <17 x float> [[vec16]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <17 x float> [[vec16]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractelement <17 x float> [[vec16]], i64 4
+  ; CHECK: [[val5:%.*]] = extractelement <17 x float> [[vec16]], i64 5
+  ; CHECK: [[val6:%.*]] = extractelement <17 x float> [[vec16]], i64 6
+  ; CHECK: [[val7:%.*]] = extractelement <17 x float> [[vec16]], i64 7
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 16, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractelement <17 x float> [[vec16]], i64 8
+  ; CHECK: [[val9:%.*]] = extractelement <17 x float> [[vec16]], i64 9
+  ; CHECK: [[val10:%.*]] = extractelement <17 x float> [[vec16]], i64 10
+  ; CHECK: [[val11:%.*]] = extractelement <17 x float> [[vec16]], i64 11
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 32, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractelement <17 x float> [[vec16]], i64 12
+  ; CHECK: [[val13:%.*]] = extractelement <17 x float> [[vec16]], i64 13
+  ; CHECK: [[val14:%.*]] = extractelement <17 x float> [[vec16]], i64 14
+  ; CHECK: [[val15:%.*]] = extractelement <17 x float> [[vec16]], i64 15
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 48, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4)
+  ; CHECK: [[val16:%.*]] = extractelement <17 x float> [[vec16]], i64 16
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 64, float [[val16]], float undef, float undef, float undef, i8 1, i32 4)
+  call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp38, i32 %tmp39, i32 0, <17 x float> %tmp36, i32 4)
+  ret void
+}
+
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0
+declare %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32, %dx.types.Handle, i32, i32, i32) #1
+declare void @dx.op.rawBufferVectorStore.v17f32(i32, %dx.types.Handle, i32, i32, <17 x float>, i32) #2
+declare i32 @dx.op.bufferUpdateCounter(i32, %dx.types.Handle, i8) #2
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #0
+declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind }
+
+!dx.version = !{!1}
+!dx.valver = !{!1}
+!dx.shaderModel = !{!2}
+!dx.resources = !{!3}
+!dx.typeAnnotations = !{!13}
+!dx.entryPoints = !{!17, !19}
+
+!1 = !{i32 1, i32 8}
+!2 = !{!"lib", i32 6, i32 8}
+!3 = !{!4, !8, null, null}
+!4 = !{!5, !6}
+!5 = !{i32 0, %struct.ByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?RoByBuf@@3UByteAddressBuffer@@A" to %struct.ByteAddressBuffer*), !"RoByBuf", i32 0, i32 1, i32 1, i32 11, i32 0, null}
+!6 = !{i32 1, %"class.StructuredBuffer<vector<float, 17> >"* bitcast (%dx.types.Handle* @"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.StructuredBuffer<vector<float, 17> >"*), !"RoStBuf", i32 0, i32 2, i32 1, i32 12, i32 0, !7}
+!7 = !{i32 1, i32 68}
+!8 = !{!9, !10, !11, !12}
+!9 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?RwByBuf@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"RwByBuf", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!10 = !{i32 1, %"class.RWStructuredBuffer<vector<float, 17> >"* bitcast (%dx.types.Handle* @"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.RWStructuredBuffer<vector<float, 17> >"*), !"RwStBuf", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
+!11 = !{i32 2, %"class.ConsumeStructuredBuffer<vector<float, 17> >"* bitcast (%dx.types.Handle* @"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.ConsumeStructuredBuffer<vector<float, 17> >"*), !"CnStBuf", i32 0, i32 4, i32 1, i32 12, i1 false, i1 true, i1 false, !7}
+!12 = !{i32 3, %"class.AppendStructuredBuffer<vector<float, 17> >"* bitcast (%dx.types.Handle* @"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.AppendStructuredBuffer<vector<float, 17> >"*), !"ApStBuf", i32 0, i32 5, i32 1, i32 12, i1 false, i1 true, i1 false, !7}
+!13 = !{i32 1, void ()* @main, !14}
+!14 = !{!15}
+!15 = !{i32 0, !16, !16}
+!16 = !{}
+!17 = !{null, !"", null, !3, !18}
+!18 = !{i32 0, i64 8589934608}
+!19 = !{void ()* @main, !"main", !20, null, !24}
+!20 = !{!21, null, null}
+!21 = !{!22}
+!22 = !{i32 0, !"IX", i8 5, i8 0, !23, i8 0, i32 2, i8 1, i32 0, i8 0, null}
+!23 = !{i32 0, i32 1}
+!24 = !{i32 8, i32 1, i32 5, !25}
+!25 = !{i32 0}
diff --git a/tools/clang/test/DXILValidation/load-store-validation.hlsl b/tools/clang/test/DXILValidation/load-store-validation.hlsl
new file mode 100644
index 0000000000..d4e5e29db8
--- /dev/null
+++ b/tools/clang/test/DXILValidation/load-store-validation.hlsl
@@ -0,0 +1,74 @@
+// This file is not used directly for testing.
+// This is the HLSL source for validation of various invalid load/store parameters.
+// It is used to generate LitDxilValidation/load-store-validation.ll using `dxc -T ps_6_9`.
+// Output is modified to trigger various validation errors.
+
+Texture1D<float4> Tex;
+RWTexture1D<float4> RwTex;
+SamplerState Samp;
+
+StructuredBuffer<float4> VecBuf;
+StructuredBuffer<float> ScalBuf;
+ByteAddressBuffer BaBuf;
+
+RWStructuredBuffer<float4> OutVecBuf;
+RWStructuredBuffer<float> OutScalBuf;
+RWByteAddressBuffer OutBaBuf;
+
+// Some simple ways to generate the vector ops in question.
+float4 main(int i : IX) : SV_Target {
+  // Texture provides some invalid handles to plug in.
+  float4 TexVal = Tex.Sample(Samp, i);
+  RwTex[0] = TexVal;
+
+  // For invalid RC on Load (and inevitably invalid RK).
+  float BadRCLd = ScalBuf[0];
+  // For invalid RK on Load.
+  float BadRKLd = ScalBuf[1];
+  // For non-constant alignment on Load.
+  float BadAlnLd = ScalBuf[2];
+  // For undefined offset on Structured Buffer Load.
+  float BadStrOffLd = ScalBuf[3];
+  // For defined (and therefore invalid) offset on Byte Address Buffer Load.
+  float BadBabOffLd = BaBuf.Load<float>(0);
+
+  // For invalid RC on Vector Load (and inevitably invalid RK).
+  float4 BadRCVcLd = VecBuf[0];
+  // For invalid RK on Vector Load.
+  float4 BadRKVcLd = VecBuf[1];
+  // For non-constant alignment on Vector Load.
+  float4 BadAlnVcLd = VecBuf[2];
+  // For undefined offset on Structured Buffer Vector Load.
+  float4 BadStrOffVcLd = VecBuf[3];
+  // For defined (and therefore invalid) offset on Byte Address Buffer Vector Load.
+  float4 BadBabOffVcLd = BaBuf.Load<float4>(4);
+
+  // For Store to non-UAV.
+  OutScalBuf[0] = BadRCLd;
+  // For invalid RK on Store.
+  OutScalBuf[1] = BadRKLd;
+  // For non-constant alignment on Store.
+  OutScalBuf[2] = BadAlnLd;
+  // For undefined offset on Structured Buffer Store.
+  OutScalBuf[3] = BadStrOffLd;
+  // For undefined value Store.
+  OutScalBuf[4] = 77;
+  // For defined (and therefore invalid) offset on Byte Address Buffer Store.
+  OutBaBuf.Store<float>(0, BadBabOffLd);
+
+  // For Vector Store to non-UAV.
+  OutVecBuf[0] = BadRCVcLd;
+  // For invalid RK on Vector Store.
+  OutVecBuf[1] = BadRKVcLd;
+  // For non-constant alignment on Vector Store.
+  OutVecBuf[2] = BadAlnVcLd;
+  // For undefined offset on Structured Buffer Vector Store.
+  OutVecBuf[3] = BadStrOffVcLd;
+  // For undefinded value Vector Store.
+  OutVecBuf[4] = 77;
+  // For defined (and therefore invalid) offset on Byte Address Buffer Vector Store.
+  OutBaBuf.Store<float4>(4, BadBabOffVcLd);
+
+  return TexVal;
+}
+
diff --git a/tools/clang/test/DXILValidation/vector-validation.hlsl b/tools/clang/test/DXILValidation/vector-validation.hlsl
new file mode 100644
index 0000000000..5d6a5cd4a2
--- /dev/null
+++ b/tools/clang/test/DXILValidation/vector-validation.hlsl
@@ -0,0 +1,14 @@
+// This file is not used directly for testing.
+// This is the HLSL source for validation of disallowed 6.9 features in previous shader models.
+// It is used to generate LitDxilValidation/vector-validation.ll using `dxc -T ps_6_9`.
+// Output is modified to have shader model 6.8 instead.
+
+RWStructuredBuffer<float4> VecBuf;
+
+// some simple ways to generate the vector ops in question.
+float4 main(float val : VAL) :SV_Position {
+  float4 vec = VecBuf[1];
+  VecBuf[0] = val;
+  return vec[2];
+}
+
diff --git a/tools/clang/test/LitDXILValidation/load-store-validation.ll b/tools/clang/test/LitDXILValidation/load-store-validation.ll
new file mode 100644
index 0000000000..34b2f6b602
--- /dev/null
+++ b/tools/clang/test/LitDXILValidation/load-store-validation.ll
@@ -0,0 +1,229 @@
+; RUN: not %dxv %s 2>&1 | FileCheck %s
+
+; Ensure proper validation errors are produced for invalid parameters to load and store operations.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%dx.types.ResBind = type { i32, i32, i32, i8 }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
+%dx.types.ResRet.v4f32 = type { <4 x float>, i32 }
+%"class.Texture1D<vector<float, 4> >" = type { <4 x float>, %"class.Texture1D<vector<float, 4> >::mips_type" }
+%"class.Texture1D<vector<float, 4> >::mips_type" = type { i32 }
+%"class.StructuredBuffer<vector<float, 4> >" = type { <4 x float> }
+%"class.StructuredBuffer<float>" = type { float }
+%struct.ByteAddressBuffer = type { i32 }
+%"class.RWStructuredBuffer<vector<float, 4> >" = type { <4 x float> }
+%"class.RWStructuredBuffer<float>" = type { float }
+%struct.RWByteAddressBuffer = type { i32 }
+%struct.SamplerState = type { i32 }
+
+; Unfortunately, the validation errors come in weird orders.
+; Inlining them isn't helpful, so we'll just dump them all here.
+; Inline comments, variable names, and notes should help find the corresponding source.
+
+; CHECK: error: raw/typed buffer offset must be undef.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp44, i32 0, i32 0, float %badBabOff, float undef, float undef, float undef, i8 1, i32 4)'
+; CHECK: error: Assignment of undefined values to UAV.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp42, i32 4, i32 0, float undef, float undef, float undef, float undef, i8 1, i32 4)
+; CHECK: error: structured buffer requires defined index and offset coordinates.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp41, i32 3, i32 undef, float %badStrOff, float undef, float undef, float undef, i8 1, i32 4)
+; CHECK: error: Raw Buffer alignment value must be a constant.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp40, i32 2, i32 0, float %badAln, float undef, float undef, float undef, i8 1, i32 %ix)'
+; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %rwTex, i32 1, i32 0, float %badRK, float undef, float undef, float undef, i8 1, i32 4)'
+; CHECK: error: store should be on uav resource.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %scalBuf, i32 0, i32 0, float %badRC, float undef, float undef, float undef, i8 1, i32 4)'
+
+; CHECK: error: raw/typed buffer offset must be undef.
+; CHECK-NEXT: note: at '%badBabOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %baBuf, i32 0, i32 0, i8 1, i32 4)'
+; CHECK: error: structured buffer requires defined index and offset coordinates.
+; CHECK-NEXT: note: at '%badStrOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 3, i32 undef, i8 1, i32 4)'
+; CHECK: error: Raw Buffer alignment value must be a constant.
+; CHECK-NEXT: note: at '%badAlnLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 2, i32 0, i8 1, i32 %ix)'
+; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer
+; CHECK-NEXT: note: at '%badRKLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tex, i32 1, i32 0, i8 1, i32 4)'
+; CHECK: error: load can only run on UAV/SRV resource.
+; CHECK-NEXT: note: at '%badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4)'
+; CHECK-NEXT: error: buffer load/store only works on Raw/Typed/StructuredBuffer.
+; CHECK-NEXT: note: at '%badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4)'
+
+; CHECK: error: raw/typed buffer offset must be undef.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp51, i32 4, i32 0, <4 x float> %badBabOffVc, i32 4)'
+; CHECK: error: Assignment of undefined values to UAV.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp49, i32 4, i32 0, <4 x float> undef, i32 4)'
+; CHECK: error: structured buffer requires defined index and offset coordinates.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp48, i32 3, i32 undef, <4 x float> %badStrOffVc, i32 4)'
+; CHECK: error: Raw Buffer alignment value must be a constant.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp47, i32 2, i32 0, <4 x float> %badAlnVc, i32 %ix)'
+; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %rwTex, i32 1, i32 0, <4 x float> %badRKVc, i32 4)'
+; CHECK: error: store should be on uav resource.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %vecBuf, i32 0, i32 0, <4 x float> %badRCVc, i32 4)'
+
+; CHECK: error: raw/typed buffer offset must be undef.
+; CHECK-NEXT: note: at '%badBabOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %baBuf, i32 4, i32 0, i32 4)'
+; CHECK: error: structured buffer requires defined index and offset coordinates.
+; CHECK-NEXT: note: at '%badStrOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 3, i32 undef, i32 4)'
+; CHECK: error: Raw Buffer alignment value must be a constant.
+; CHECK-NEXT: note: at '%badAlnVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 2, i32 0, i32 %ix)'
+; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer
+; CHECK-NEXT: note: at '%badRKVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %tex, i32 1, i32 0, i32 4)'
+; CHECK: error: load can only run on UAV/SRV resource.
+; CHECK-NEXT: note: at '%badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4)'
+; CHECK-NEXT: error: buffer load/store only works on Raw/Typed/StructuredBuffer.
+; CHECK-NEXT: note: at '%badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4)'
+
+define void @main() {
+bb:
+  %tmp = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false)
+  %tmp1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false)
+  %tmp2 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false)
+  %tmp3 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false)
+  %tmp4 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false)
+  %tmp5 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false)
+  %tmp6 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind zeroinitializer, i32 0, i1 false)
+  %tmp7 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 3 }, i32 0, i1 false)
+  %tmp8 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 0, i1 false)
+  %ix = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %texIx = sitofp i32 %ix to float
+  %tex = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 1, i32 1033 })
+  %samp = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 14, i32 0 })
+  %tmp10 = call %dx.types.ResRet.f32 @dx.op.sample.f32(i32 60, %dx.types.Handle %tex, %dx.types.Handle %samp, float %texIx, float undef, float undef, float undef, i32 0, i32 undef, i32 undef, float undef)
+  %tmp11 = extractvalue %dx.types.ResRet.f32 %tmp10, 0
+  %tmp12 = extractvalue %dx.types.ResRet.f32 %tmp10, 1
+  %tmp13 = extractvalue %dx.types.ResRet.f32 %tmp10, 2
+  %tmp14 = extractvalue %dx.types.ResRet.f32 %tmp10, 3
+  %rwTex = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp8, %dx.types.ResourceProperties { i32 4097, i32 1033 })
+  call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle %rwTex, i32 0, i32 undef, i32 undef, float %tmp11, float %tmp12, float %tmp13, float %tmp14, i8 15)
+  %scalBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp4, %dx.types.ResourceProperties { i32 12, i32 4 })
+  ; Invalid RC on Load (and inevitably invalid RK).
+  %badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4)
+  %badRC = extractvalue %dx.types.ResRet.f32 %badRCLd, 0
+  ; Invalid RK on Load.
+  %badRKLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tex, i32 1, i32 0, i8 1, i32 4)
+  %badRK = extractvalue %dx.types.ResRet.f32 %badRKLd, 0
+  ; Non-constant alignment on Load.
+  %badAlnLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 2, i32 0, i8 1, i32 %ix)
+  %badAln = extractvalue %dx.types.ResRet.f32 %badAlnLd, 0
+  ; Undefined offset on Structured Buffer Load.
+  %badStrOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 3, i32 undef, i8 1, i32 4)
+  %badStrOff = extractvalue %dx.types.ResRet.f32 %badStrOffLd, 0
+  %baBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp3, %dx.types.ResourceProperties { i32 11, i32 0 })
+  ; Defined (and therefore invalid) offset on Byte Address Buffer Load.
+  %badBabOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %baBuf, i32 0, i32 0, i8 1, i32 4)
+  %badBabOff = extractvalue %dx.types.ResRet.f32 %badBabOffLd, 0
+
+  %vecBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp5, %dx.types.ResourceProperties { i32 12, i32 16 })
+  ; Invalid RC on Vector Load (and inevitably invalid RK).
+  %badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4)
+  %badRCVc = extractvalue %dx.types.ResRet.v4f32 %badRCVcLd, 0
+  ; Invalid RK on Vector Load.
+  %badRKVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %tex, i32 1, i32 0, i32 4)
+  %badRKVc = extractvalue %dx.types.ResRet.v4f32 %badRKVcLd, 0
+  ; Non-constant alignment on Vector Load.
+  %badAlnVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 2, i32 0, i32 %ix)
+  %badAlnVc = extractvalue %dx.types.ResRet.v4f32 %badAlnVcLd, 0
+  ; Undefined offset on Structured Buffer Vector Load.
+  %badStrOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 3, i32 undef, i32 4)
+  %badStrOffVc = extractvalue %dx.types.ResRet.v4f32 %badStrOffVcLd, 0
+  ; Defined (and therefore invalid) offset on Byte Address Buffer Vector Load.
+  %badBabOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %baBuf, i32 4, i32 0, i32 4)
+  %badBabOffVc = extractvalue %dx.types.ResRet.v4f32 %badBabOffVcLd, 0
+
+  ; Store to non-UAV.
+  %tmp38 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %scalBuf, i32 0, i32 0, float %badRC, float undef, float undef, float undef, i8 1, i32 4)
+  ; Invalid RK on Store.
+  %tmp39 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %rwTex, i32 1, i32 0, float %badRK, float undef, float undef, float undef, i8 1, i32 4)
+  ; Non-constant alignment on Store.
+  %tmp40 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp40, i32 2, i32 0, float %badAln, float undef, float undef, float undef, i8 1, i32 %ix)
+  ; Undefined offset on Structured Buffer Store.
+  %tmp41 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp41, i32 3, i32 undef, float %badStrOff, float undef, float undef, float undef, i8 1, i32 4)
+  ; Undefined value Store.
+  %tmp42 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp42, i32 4, i32 0, float undef, float undef, float undef, float undef, i8 1, i32 4)
+  ; Defined (and therefore invalid) offset on Byte Address Buffer Store.
+  %tmp44 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp44, i32 0, i32 0, float %badBabOff, float undef, float undef, float undef, i8 1, i32 4)
+
+  ; Vector Store to non-UAV.
+  %tmp45 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %rwTex, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %vecBuf, i32 0, i32 0, <4 x float> %badRCVc, i32 4)
+  ; Invalid RK on Vector Store.
+  %tmp46 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %rwTex, i32 1, i32 0, <4 x float> %badRKVc, i32 4)
+  ; Non-constant alignment on Vector Store.
+  %tmp47 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp47, i32 2, i32 0, <4 x float> %badAlnVc, i32 %ix)
+  ; Undefined offset on Structured Buffer Vector Store.
+  %tmp48 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp48, i32 3, i32 undef, <4 x float> %badStrOffVc, i32 4)
+  ; Undefinded value Vector Store.
+  %tmp49 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp49, i32 4, i32 0, <4 x float> undef, i32 4)
+  ; Defined (and therefore invalid) offset on Byte Address Buffer Vector Store.
+  %tmp51 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp51, i32 4, i32 0, <4 x float> %badBabOffVc, i32 4)
+
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %tmp11)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %tmp12)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %tmp13)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %tmp14)
+  ret void
+}
+
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #2
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
+declare %dx.types.ResRet.f32 @dx.op.sample.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32, float) #1
+declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #0
+declare void @dx.op.rawBufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8, i32) #0
+declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
+declare void @dx.op.rawBufferVectorStore.v4f32(i32, %dx.types.Handle, i32, i32, <4 x float>, i32) #0
+declare %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32, %dx.types.Handle, i32, i32, i32) #1
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2
+declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
+
+!dx.version = !{!1}
+!dx.valver = !{!1}
+!dx.shaderModel = !{!2}
+!dx.resources = !{!3}
+!dx.viewIdState = !{!18}
+!dx.entryPoints = !{!19}
+
+!1 = !{i32 1, i32 9}
+!2 = !{!"ps", i32 6, i32 9}
+!3 = !{!4, !12, null, !16}
+!4 = !{!5, !7, !9, !11}
+!5 = !{i32 0, %"class.Texture1D<vector<float, 4> >"* undef, !"", i32 0, i32 0, i32 1, i32 1, i32 0, !6}
+!6 = !{i32 0, i32 9}
+!7 = !{i32 1, %"class.StructuredBuffer<vector<float, 4> >"* undef, !"", i32 0, i32 1, i32 1, i32 12, i32 0, !8}
+!8 = !{i32 1, i32 16}
+!9 = !{i32 2, %"class.StructuredBuffer<float>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i32 0, !10}
+!10 = !{i32 1, i32 4}
+!11 = !{i32 3, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 3, i32 1, i32 11, i32 0, null}
+!12 = !{!13, !14, !15}
+!13 = !{i32 0, %"class.RWStructuredBuffer<vector<float, 4> >"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !8}
+!14 = !{i32 1, %"class.RWStructuredBuffer<float>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !10}
+!15 = !{i32 2, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 2, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!16 = !{!17}
+!17 = !{i32 0, %struct.SamplerState* undef, !"", i32 0, i32 0, i32 1, i32 0, null}
+!18 = !{[3 x i32] [i32 1, i32 4, i32 0]}
+!19 = !{void ()* @main, !"main", !20, !3, !27}
+!20 = !{!21, !24, null}
+!21 = !{!22}
+!22 = !{i32 0, !"IX", i8 4, i8 0, !23, i8 1, i32 1, i8 1, i32 0, i8 0, null}
+!23 = !{i32 0}
+!24 = !{!25}
+!25 = !{i32 0, !"SV_Target", i8 9, i8 16, !23, i8 0, i32 1, i8 4, i32 0, i8 0, !26}
+!26 = !{i32 3, i32 15}
+!27 = !{i32 0, i64 8589934608}
diff --git a/tools/clang/test/LitDXILValidation/vector-validation.ll b/tools/clang/test/LitDXILValidation/vector-validation.ll
new file mode 100644
index 0000000000..74e8116e88
--- /dev/null
+++ b/tools/clang/test/LitDXILValidation/vector-validation.ll
@@ -0,0 +1,78 @@
+; RUN: not %dxv %s 2>&1 | FileCheck %s
+
+; Confirm that 6.9 specific LLVM operations and DXIL intrinsics fail in 6.8
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%dx.types.ResBind = type { i32, i32, i32, i8 }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.ResRet.v4f32 = type { <4 x float>, i32 }
+%"class.RWStructuredBuffer<vector<float, 4> >" = type { <4 x float> }
+
+; CHECK: Function: main: error: Instructions must be of an allowed type.
+; CHECK: note: at '%6 = insertelement <4 x float> undef, float %2, i32 0
+; CHECK: Function: main: error: Instructions must be of an allowed type.
+; CHECK: note: at '%7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK: Function: main: error: Instructions must be of an allowed type.
+; CHECK: note: at '%8 = extractelement <4 x float> %5, i32 2
+; CHECK: Function: main: error: Opcode RawBufferVectorLoad not valid in shader model vs_6_8.
+; CHECK: note: at '%4 = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %3, i32 1, i32 0, i32 8)'
+; CHECK: Function: main: error: Opcode RawBufferVectorStore not valid in shader model vs_6_8.
+; CHECK: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %3, i32 0, i32 0, <4 x float> %7, i32 4)'
+; CHECK: Function: main: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties.  See other errors for details.
+; CHECK: Function: main: error: Function uses features incompatible with the shader model.
+define void @main() {
+  %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false)
+  %2 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  %4 = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %3, i32 1, i32 0, i32 8)
+  %5 = extractvalue %dx.types.ResRet.v4f32 %4, 0
+  %6 = insertelement <4 x float> undef, float %2, i32 0
+  %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %3, i32 0, i32 0, <4 x float> %7, i32 4)
+  %8 = extractelement <4 x float> %5, i32 2
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %8)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %8)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %8)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %8)
+  ret void
+}
+
+declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1
+declare %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32, %dx.types.Handle, i32, i32, i32) #2
+declare void @dx.op.rawBufferVectorStore.v4f32(i32, %dx.types.Handle, i32, i32, <4 x float>, i32) #1
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #0
+declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #0
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!1}
+!dx.valver = !{!1}
+!dx.shaderModel = !{!2}
+!dx.resources = !{!3}
+!dx.viewIdState = !{!7}
+!dx.entryPoints = !{!8}
+
+!1 = !{i32 1, i32 8}
+!2 = !{!"vs", i32 6, i32 8}
+!3 = !{null, !4, null, null}
+!4 = !{!5}
+!5 = !{i32 0, %"class.RWStructuredBuffer<vector<float, 4> >"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !6}
+!6 = !{i32 1, i32 16}
+!7 = !{[3 x i32] [i32 1, i32 4, i32 0]}
+!8 = !{void ()* @main, !"main", !9, !3, !17}
+!9 = !{!10, !14, null}
+!10 = !{!11}
+!11 = !{i32 0, !"VAL", i8 9, i8 0, !12, i8 0, i32 1, i8 1, i32 0, i8 0, !13}
+!12 = !{i32 0}
+!13 = !{i32 3, i32 1}
+!14 = !{!15}
+!15 = !{i32 0, !"SV_Position", i8 9, i8 3, !12, i8 4, i32 1, i8 4, i32 0, i8 0, !16}
+!16 = !{i32 3, i32 15}
+!17 = !{i32 0, i64 8590000144}
+
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl
index 42eb6b077c..54c85191da 100644
--- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl
@@ -3,6 +3,8 @@
 #define TYPE float
 #define NUM 5
 
+StructuredBuffer<vector<TYPE,NUM> > sbuf; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+
 struct LongVec {
   float4 f;
   vector<TYPE,NUM> vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
diff --git a/tools/clang/unittests/HLSL/ValidationTest.cpp b/tools/clang/unittests/HLSL/ValidationTest.cpp
index f69b0be204..01f24e0227 100644
--- a/tools/clang/unittests/HLSL/ValidationTest.cpp
+++ b/tools/clang/unittests/HLSL/ValidationTest.cpp
@@ -1506,21 +1506,23 @@ TEST_F(ValidationTest, StructBufStrideOutOfBound) {
 }
 
 TEST_F(ValidationTest, StructBufLoadCoordinates) {
-  RewriteAssemblyCheckMsg(L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0",
-                          "bufferLoad.f32(i32 68, %dx.types.Handle "
-                          "%buf1_texture_structbuf, i32 1, i32 8)",
-                          "bufferLoad.f32(i32 68, %dx.types.Handle "
-                          "%buf1_texture_structbuf, i32 1, i32 undef)",
-                          "structured buffer require 2 coordinates");
+  RewriteAssemblyCheckMsg(
+      L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0",
+      "bufferLoad.f32(i32 68, %dx.types.Handle "
+      "%buf1_texture_structbuf, i32 1, i32 8)",
+      "bufferLoad.f32(i32 68, %dx.types.Handle "
+      "%buf1_texture_structbuf, i32 1, i32 undef)",
+      "structured buffer requires defined index and offset coordinates");
 }
 
 TEST_F(ValidationTest, StructBufStoreCoordinates) {
-  RewriteAssemblyCheckMsg(L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0",
-                          "bufferStore.f32(i32 69, %dx.types.Handle "
-                          "%buf2_UAV_structbuf, i32 0, i32 0",
-                          "bufferStore.f32(i32 69, %dx.types.Handle "
-                          "%buf2_UAV_structbuf, i32 0, i32 undef",
-                          "structured buffer require 2 coordinates");
+  RewriteAssemblyCheckMsg(
+      L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0",
+      "bufferStore.f32(i32 69, %dx.types.Handle "
+      "%buf2_UAV_structbuf, i32 0, i32 0",
+      "bufferStore.f32(i32 69, %dx.types.Handle "
+      "%buf2_UAV_structbuf, i32 0, i32 undef",
+      "structured buffer requires defined index and offset coordinates");
 }
 
 TEST_F(ValidationTest, TypedBufRetType) {
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 5eb35fb52a..691c3ba58f 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -479,7 +479,7 @@ def populate_categories_and_models(self):
             self.name_idx[i].category = "Dot"
         for (
             i
-        ) in "CreateHandle,CBufferLoad,CBufferLoadLegacy,TextureLoad,TextureStore,TextureStoreSample,BufferLoad,BufferStore,BufferUpdateCounter,CheckAccessFullyMapped,GetDimensions,RawBufferLoad,RawBufferStore".split(
+        ) in "CreateHandle,CBufferLoad,CBufferLoadLegacy,TextureLoad,TextureStore,TextureStoreSample,BufferLoad,BufferStore,BufferUpdateCounter,CheckAccessFullyMapped,GetDimensions,RawBufferLoad,RawBufferStore,RawBufferVectorLoad,RawBufferVectorStore".split(
             ","
         ):
             self.name_idx[i].category = "Resources"
@@ -606,6 +606,8 @@ def populate_categories_and_models(self):
         for i in "RawBufferLoad,RawBufferStore".split(","):
             self.name_idx[i].shader_model = 6, 2
             self.name_idx[i].shader_model_translated = 6, 0
+        for i in "RawBufferVectorLoad,RawBufferVectorStore".split(","):
+            self.name_idx[i].shader_model = 6, 9
         for i in "DispatchRaysIndex,DispatchRaysDimensions".split(","):
             self.name_idx[i].category = "Ray Dispatch Arguments"
             self.name_idx[i].shader_model = 6, 3
@@ -5778,6 +5780,84 @@ def UFI(name, **mappings):
         # Reserved block C
         next_op_idx = self.reserve_dxil_op_range("ReservedC", next_op_idx, 10)
 
+        # Long Vectors
+        self.add_dxil_op(
+            "RawBufferVectorLoad",
+            next_op_idx,
+            "RawBufferVectorLoad",
+            "reads from a raw buffer and structured buffer",
+            "hfwidl<",
+            "ro",
+            [
+                db_dxil_param(0, "$r", "", "the loaded value"),
+                db_dxil_param(2, "res", "buf", "handle of Raw Buffer to load from"),
+                db_dxil_param(
+                    3,
+                    "i32",
+                    "index",
+                    "element index for StructuredBuffer, or byte offset for ByteAddressBuffer",
+                ),
+                db_dxil_param(
+                    4,
+                    "i32",
+                    "elementOffset",
+                    "offset into element for StructuredBuffer, or undef for ByteAddressBuffer",
+                ),
+                db_dxil_param(
+                    5,
+                    "i32",
+                    "alignment",
+                    "relative load access alignment",
+                    is_const=True,
+                ),
+            ],
+            counters=("tex_load",),
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "RawBufferVectorStore",
+            next_op_idx,
+            "RawBufferVectorStore",
+            "writes to a RWByteAddressBuffer or RWStructuredBuffer",
+            "hfwidl<",
+            "",
+            [
+                db_dxil_param(0, "v", "", ""),
+                db_dxil_param(2, "res", "uav", "handle of UAV to store to"),
+                db_dxil_param(
+                    3,
+                    "i32",
+                    "index",
+                    "element index for StructuredBuffer, or byte offset for ByteAddressBuffer",
+                ),
+                db_dxil_param(
+                    4,
+                    "i32",
+                    "elementOffset",
+                    "offset into element for StructuredBuffer, or undef for ByteAddressBuffer",
+                ),
+                db_dxil_param(5, "$o", "value0", "value"),
+                db_dxil_param(
+                    6,
+                    "i32",
+                    "alignment",
+                    "relative store access alignment",
+                    is_const=True,
+                ),
+            ],
+            counters=("tex_store",),
+        )
+        next_op_idx += 1
+
+        # End of DXIL 1.9 opcodes.
+        # NOTE!! Update and uncomment when DXIL 1.9 opcodes are finalized:
+        # self.set_op_count_for_version(1, 9, next_op_idx)
+        # assert next_op_idx == NNN, (
+        #    "NNN is expected next operation index but encountered %d and thus opcodes are broken"
+        #    % next_op_idx
+        # )
+
         # Set interesting properties.
         self.build_indices()
         for (
@@ -6385,6 +6465,12 @@ def add_pass(name, type_name, doc, opts):
             "DXIL Lower createHandleForLib",
             [],
         )
+        add_pass(
+            "hlsl-dxil-scalarize-vector-load-stores",
+            "DxilScalarizeVectorLoadStores",
+            "DXIL scalarize vector load/stores",
+            [],
+        )
         add_pass(
             "hlsl-dxil-cleanup-dynamic-resource-handle",
             "DxilCleanupDynamicResourceHandle",
@@ -7607,11 +7693,15 @@ def build_valrules(self):
         )
         self.add_valrule(
             "Instr.CoordinateCountForRawTypedBuf",
-            "raw/typed buffer don't need 2 coordinates.",
+            "raw/typed buffer offset must be undef.",
+        )
+        self.add_valrule(
+            "Instr.ConstAlignForRawBuf",
+            "Raw Buffer alignment value must be a constant.",
         )
         self.add_valrule(
             "Instr.CoordinateCountForStructBuf",
-            "structured buffer require 2 coordinates.",
+            "structured buffer requires defined index and offset coordinates.",
         )
         self.add_valrule(
             "Instr.MipLevelForGetDimension",

From 9e9184426c9103a96ec8da2fe4da290f467d4486 Mon Sep 17 00:00:00 2001
From: Chris B <cbieneman@microsoft.com>
Date: Mon, 7 Apr 2025 14:22:34 -0500
Subject: [PATCH 72/88] [NFC] containsLongVector -> ContainsLongVector (#7255)

I provided feedback during code review that this function should be
named following LLVM conventions. That feedback did not account for the
fact that SemaHLSL is otherwise consistent using CamelCase instead of
camelCase naming.

This corrects my error by renaming to match the consistent style in
SemaHLSL.h.

I've also updated the parameter naming in the source file to conform to
LLVM style since I was in the area anyways.
---
 tools/clang/include/clang/Sema/SemaHLSL.h   |  2 +-
 tools/clang/lib/Sema/SemaDXR.cpp            |  2 +-
 tools/clang/lib/Sema/SemaHLSL.cpp           | 24 ++++++++++-----------
 tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp |  4 ++--
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h
index d6103b55e6..ac6e08b3fa 100644
--- a/tools/clang/include/clang/Sema/SemaHLSL.h
+++ b/tools/clang/include/clang/Sema/SemaHLSL.h
@@ -128,7 +128,7 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema,
                                           const clang::InitListExpr *InitList,
                                           const clang::QualType EltTy);
 
-bool containsLongVector(clang::QualType qt);
+bool ContainsLongVector(clang::QualType);
 
 bool IsConversionToLessOrEqualElements(clang::Sema *self,
                                        const clang::ExprResult &sourceExpr,
diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp
index 0f27de8291..36ab55ea10 100644
--- a/tools/clang/lib/Sema/SemaDXR.cpp
+++ b/tools/clang/lib/Sema/SemaDXR.cpp
@@ -810,7 +810,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload,
     return;
   }
 
-  if (containsLongVector(Payload->getType())) {
+  if (ContainsLongVector(Payload->getType())) {
     const unsigned PayloadParametersIdx = 10;
     S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector)
         << PayloadParametersIdx;
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 027d7d3cbc..6796badcb6 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -5529,7 +5529,7 @@ class HLSLExternalSource : public ExternalSemaSource {
         m_sema->RequireCompleteType(argSrcLoc, argType,
                                     diag::err_typecheck_decl_incomplete_type);
 
-        if (containsLongVector(argType)) {
+        if (ContainsLongVector(argType)) {
           const unsigned ConstantBuffersOrTextureBuffersIdx = 0;
           m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector)
               << ConstantBuffersOrTextureBuffersIdx;
@@ -5637,7 +5637,7 @@ class HLSLExternalSource : public ExternalSemaSource {
       CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl();
       if (Decl && !Decl->isCompleteDefinition())
         return true;
-      if (containsLongVector(arg.getAsType())) {
+      if (ContainsLongVector(arg.getAsType())) {
         const unsigned TessellationPatchesIDx = 1;
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
@@ -5656,7 +5656,7 @@ class HLSLExternalSource : public ExternalSemaSource {
       CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl();
       if (Decl && !Decl->isCompleteDefinition())
         return true;
-      if (containsLongVector(arg.getAsType())) {
+      if (ContainsLongVector(arg.getAsType())) {
         const unsigned GeometryStreamsIdx = 2;
         m_sema->Diag(argLoc.getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
@@ -12545,14 +12545,14 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType,
   return false;
 }
 
-bool hlsl::containsLongVector(QualType qt) {
-  if (qt.isNull() || qt->isDependentType())
+bool hlsl::ContainsLongVector(QualType QT) {
+  if (QT.isNull() || QT->isDependentType())
     return false;
 
-  while (const ArrayType *Arr = qt->getAsArrayTypeUnsafe())
-    qt = Arr->getElementType();
+  while (const ArrayType *Arr = QT->getAsArrayTypeUnsafe())
+    QT = Arr->getElementType();
 
-  if (CXXRecordDecl *Decl = qt->getAsCXXRecordDecl()) {
+  if (CXXRecordDecl *Decl = QT->getAsCXXRecordDecl()) {
     if (!Decl->isCompleteDefinition())
       return false;
     return Decl->hasHLSLLongVector();
@@ -15201,7 +15201,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
       virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) {}
     } SD;
     RequireCompleteType(D.getLocStart(), qt, SD);
-    if (containsLongVector(qt)) {
+    if (ContainsLongVector(qt)) {
       unsigned CbuffersOrTbuffersIdx = 4;
       Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector)
           << CbuffersOrTbuffersIdx;
@@ -16099,7 +16099,7 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) {
 
 // Verify that user-defined intrinsic struct args contain no long vectors
 static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) {
-  if (containsLongVector(Arg->getType())) {
+  if (ContainsLongVector(Arg->getType())) {
     const unsigned UserDefinedStructParameterIdx = 5;
     S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector)
         << UserDefinedStructParameterIdx;
@@ -16842,14 +16842,14 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) {
   // Would be nice to check for resources here as they crash the compiler now.
   // See issue #7186.
   for (const auto *param : FD->params()) {
-    if (containsLongVector(param->getType())) {
+    if (ContainsLongVector(param->getType())) {
       const unsigned EntryFunctionParametersIdx = 6;
       S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector)
           << EntryFunctionParametersIdx;
     }
   }
 
-  if (containsLongVector(FD->getReturnType())) {
+  if (ContainsLongVector(FD->getReturnType())) {
     const unsigned EntryFunctionReturnIdx = 7;
     S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector)
         << EntryFunctionReturnIdx;
diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
index c562ee8d52..abca7cbf86 100644
--- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
+++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
@@ -710,14 +710,14 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
         }
       }
       for (const auto *param : pPatchFnDecl->params())
-        if (containsLongVector(param->getType())) {
+        if (ContainsLongVector(param->getType())) {
           const unsigned PatchConstantFunctionParametersIdx = 8;
           self->Diag(param->getLocation(),
                      diag::err_hlsl_unsupported_long_vector)
               << PatchConstantFunctionParametersIdx;
         }
 
-      if (containsLongVector(pPatchFnDecl->getReturnType())) {
+      if (ContainsLongVector(pPatchFnDecl->getReturnType())) {
         const unsigned PatchConstantFunctionReturnIdx = 9;
         self->Diag(pPatchFnDecl->getLocation(),
                    diag::err_hlsl_unsupported_long_vector)

From dc4a2b6e910f47ef51cc482c648f105e866f58f7 Mon Sep 17 00:00:00 2001
From: nopandbrk <202358470+nopandbrk@users.noreply.github.com>
Date: Mon, 7 Apr 2025 15:13:08 -0700
Subject: [PATCH 73/88] [PIX] Add a pass for PIX to log missing
 NonUniformResourceIndex usage into a UAV (#7272)

This is a pass to add instructions to determine missing usage of the
NonUniformResourceIndex qualifier when dynamically indexing resources.
The instruction numbers will be written out to a UAV for later ingestion
by PIX to present a view of the output.
---
 include/dxc/DxilPIXPasses/DxilPIXPasses.h     |   3 +
 lib/DxilPIXPasses/CMakeLists.txt              |   1 +
 ...NonUniformResourceIndexInstrumentation.cpp | 173 ++++++++++++++
 .../DxilShaderAccessTracking.cpp              |  89 +------
 lib/DxilPIXPasses/PixPassHelpers.cpp          |  84 +++++++
 lib/DxilPIXPasses/PixPassHelpers.h            |   7 +-
 tools/clang/unittests/HLSL/PixTest.cpp        | 219 ++++++++++++++++++
 utils/hct/hctdb.py                            |   6 +
 8 files changed, 499 insertions(+), 83 deletions(-)
 create mode 100644 lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp

diff --git a/include/dxc/DxilPIXPasses/DxilPIXPasses.h b/include/dxc/DxilPIXPasses/DxilPIXPasses.h
index ad0ddfdfd2..5cc7c4aa50 100644
--- a/include/dxc/DxilPIXPasses/DxilPIXPasses.h
+++ b/include/dxc/DxilPIXPasses/DxilPIXPasses.h
@@ -27,6 +27,7 @@ ModulePass *createDxilDebugInstrumentationPass();
 ModulePass *createDxilShaderAccessTrackingPass();
 ModulePass *createDxilPIXAddTidToAmplificationShaderPayloadPass();
 ModulePass *createDxilPIXDXRInvocationsLogPass();
+ModulePass *createDxilNonUniformResourceIndexInstrumentationPass();
 
 void initializeDxilAddPixelHitInstrumentationPass(llvm::PassRegistry &);
 void initializeDxilDbgValueToDbgDeclarePass(llvm::PassRegistry &);
@@ -41,5 +42,7 @@ void initializeDxilShaderAccessTrackingPass(llvm::PassRegistry &);
 void initializeDxilPIXAddTidToAmplificationShaderPayloadPass(
     llvm::PassRegistry &);
 void initializeDxilPIXDXRInvocationsLogPass(llvm::PassRegistry &);
+void initializeDxilNonUniformResourceIndexInstrumentationPass(
+    llvm::PassRegistry &);
 
 } // namespace llvm
diff --git a/lib/DxilPIXPasses/CMakeLists.txt b/lib/DxilPIXPasses/CMakeLists.txt
index c36d11d559..67e77f17cd 100644
--- a/lib/DxilPIXPasses/CMakeLists.txt
+++ b/lib/DxilPIXPasses/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_library(LLVMDxilPIXPasses
   PixPassHelpers.cpp
   DxilPIXAddTidToAmplificationShaderPayload.cpp
   DxilPIXDXRInvocationsLog.cpp
+  DxilNonUniformResourceIndexInstrumentation.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/IR
diff --git a/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp b/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp
new file mode 100644
index 0000000000..a442bfabed
--- /dev/null
+++ b/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp
@@ -0,0 +1,173 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilNonUniformResourceIndexInstrumentation.cpp                            //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides a pass to add instrumentation to determine missing usage of the  //
+// NonUniformResourceIndex qualifier when dynamically indexing resources.    //
+// Used by PIX.                                                              //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "PixPassHelpers.h"
+#include "dxc/DXIL/DxilInstructions.h"
+#include "dxc/DxilPIXPasses/DxilPIXPasses.h"
+#include "dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h"
+#include "dxc/Support/Global.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+using namespace hlsl;
+
+class DxilNonUniformResourceIndexInstrumentation : public ModulePass {
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilNonUniformResourceIndexInstrumentation() : ModulePass(ID) {}
+  StringRef getPassName() const override {
+    return "DXIL NonUniformResourceIndex Instrumentation";
+  }
+  bool runOnModule(Module &M) override;
+};
+
+bool DxilNonUniformResourceIndexInstrumentation::runOnModule(Module &M) {
+  // This pass adds instrumentation for incorrect NonUniformResourceIndex usage
+
+  DxilModule &DM = M.GetOrCreateDxilModule();
+  LLVMContext &Ctx = M.getContext();
+  OP *HlslOP = DM.GetOP();
+
+  hlsl::DxilResource *PixUAVResource = nullptr;
+
+  UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(Ctx));
+
+  // Use WaveActiveAllEqual to check if a dynamic index is uniform
+  Function *WaveActiveAllEqualFunc = HlslOP->GetOpFunc(
+      DXIL::OpCode::WaveActiveAllEqual, Type::getInt32Ty(Ctx));
+  Constant *WaveActiveAllEqualOpCode =
+      HlslOP->GetI32Const((int32_t)DXIL::OpCode::WaveActiveAllEqual);
+
+  // Atomic operation to use for writing to the result uav resource
+  Function *AtomicOpFunc =
+      HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(Ctx));
+  Constant *AtomicBinOpcode =
+      HlslOP->GetU32Const((uint32_t)OP::OpCode::AtomicBinOp);
+  Constant *AtomicOr = HlslOP->GetU32Const((uint32_t)DXIL::AtomicBinOpCode::Or);
+
+  std::map<Function *, CallInst *> FunctionToUAVHandle;
+
+  // This is the main pass that will iterate through all of the resources that
+  // are dynamically indexed. If not already marked NonUniformResourceIndex,
+  // then insert WaveActiveAllEqual to determine if the index is uniform
+  // and finally write to a UAV resource with the result.
+
+  PIXPassHelpers::ForEachDynamicallyIndexedResource(
+      DM, [&](bool IsNonUniformIndex, Instruction *CreateHandle,
+              Value *IndexOperand) {
+        if (IsNonUniformIndex) {
+          // The NonUniformResourceIndex qualifier was used, continue.
+          return true;
+        }
+
+        if (!PixUAVResource) {
+          PixUAVResource =
+              PIXPassHelpers::CreateGlobalUAVResource(DM, 0, "PixUAVResource");
+        }
+
+        CallInst *PixUAVHandle = nullptr;
+        Function *F = CreateHandle->getParent()->getParent();
+
+        const auto FunctionToUAVHandleIter = FunctionToUAVHandle.lower_bound(F);
+
+        if ((FunctionToUAVHandleIter != FunctionToUAVHandle.end()) &&
+            (FunctionToUAVHandleIter->first == F)) {
+          PixUAVHandle = FunctionToUAVHandleIter->second;
+        } else {
+          IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt());
+
+          PixUAVHandle = PIXPassHelpers::CreateHandleForResource(
+              DM, Builder, PixUAVResource, "PixUAVHandle");
+
+          FunctionToUAVHandle.insert(FunctionToUAVHandleIter,
+                                     {F, PixUAVHandle});
+        }
+
+        IRBuilder<> Builder(CreateHandle);
+
+        uint32_t InstructionNumber = 0;
+        if (!pix_dxil::PixDxilInstNum::FromInst(CreateHandle,
+                                                &InstructionNumber)) {
+          DXASSERT_NOMSG(false);
+        }
+
+        // The output UAV is treated as a bit array where each bit corresponds
+        // to an instruction number. This determines what byte offset to write
+        // our result to based on the instruction number.
+        const uint32_t InstructionNumByteOffset =
+            (InstructionNumber / 32u) * sizeof(uint32_t);
+        const uint32_t InstructionNumBitPosition = (InstructionNumber % 32u);
+        const uint32_t InstructionNumBitMask = 1u << InstructionNumBitPosition;
+
+        Constant *UAVByteOffsetArg =
+            HlslOP->GetU32Const(InstructionNumByteOffset);
+
+        CallInst *WaveActiveAllEqualCall = Builder.CreateCall(
+            WaveActiveAllEqualFunc, {WaveActiveAllEqualOpCode, IndexOperand});
+
+        // This takes the result of the WaveActiveAllEqual result and shifts
+        // it into the same bit position as the instruction number, followed
+        // by an xor to determine what to write to the UAV
+        Value *IsWaveEqual =
+            Builder.CreateZExt(WaveActiveAllEqualCall, Builder.getInt32Ty());
+        Value *WaveEqualBitMask =
+            Builder.CreateShl(IsWaveEqual, InstructionNumBitPosition);
+        Value *FinalResult =
+            Builder.CreateXor(WaveEqualBitMask, InstructionNumBitMask);
+
+        // Generate instructions to bitwise OR a UAV value corresponding
+        // to the instruction number and result of WaveActiveAllEqual.
+        // If WaveActiveAllEqual was false, we write a 1, otherwise a 0.
+        Builder.CreateCall(
+            AtomicOpFunc,
+            {
+                AtomicBinOpcode,  // i32, ; opcode
+                PixUAVHandle,     // %dx.types.Handle, ; resource handle
+                AtomicOr,         // i32, ; binary operation code :
+                                  // EXCHANGE, IADD, AND, OR, XOR
+                                  // IMIN, IMAX, UMIN, UMAX
+                UAVByteOffsetArg, // i32, ; coordinate c0: byte offset
+                UndefArg,         // i32, ; coordinate c1 (unused)
+                UndefArg,         // i32, ; coordinate c2 (unused)
+                FinalResult       // i32);  value
+            },
+            "UAVInstructionNumberBitSet");
+        return true;
+      });
+
+  const bool modified = (PixUAVResource != nullptr);
+
+  if (modified) {
+    DM.ReEmitDxilResources();
+
+    if (OSOverride != nullptr) {
+      formatted_raw_ostream FOS(*OSOverride);
+      FOS << "\nFoundDynamicIndexingNoNuri\n";
+    }
+  }
+
+  return modified;
+}
+
+char DxilNonUniformResourceIndexInstrumentation::ID = 0;
+
+ModulePass *llvm::createDxilNonUniformResourceIndexInstrumentationPass() {
+  return new DxilNonUniformResourceIndexInstrumentation();
+}
+
+INITIALIZE_PASS(DxilNonUniformResourceIndexInstrumentation,
+                "hlsl-dxil-non-uniform-resource-index-instrumentation",
+                "HLSL DXIL NonUniformResourceIndex instrumentation for PIX",
+                false, false)
diff --git a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
index 4f4cc7c620..bd96d83965 100644
--- a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
+++ b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
@@ -795,87 +795,6 @@ DxilShaderAccessTracking::GetResourceFromHandle(Value *resHandle,
   return ret;
 }
 
-static bool CheckForDynamicIndexing(OP *HlslOP, LLVMContext &Ctx,
-                                    DxilModule &DM) {
-  bool FoundDynamicIndexing = false;
-
-  for (llvm::Function &F : DM.GetModule()->functions()) {
-    if (F.isDeclaration() && !F.use_empty() && OP::IsDxilOpFunc(&F)) {
-      if (F.hasName()) {
-        if (F.getName().find("createHandleForLib") != StringRef::npos) {
-          auto FunctionUses = F.uses();
-          for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
-            auto &FunctionUse = *FI++;
-            auto FunctionUser = FunctionUse.getUser();
-            auto instruction = cast<Instruction>(FunctionUser);
-            Value *resourceLoad =
-                instruction->getOperand(kCreateHandleForLibResOpIdx);
-            if (auto *load = cast<LoadInst>(resourceLoad)) {
-              auto *resOrGep = load->getOperand(0);
-              if (isa<GetElementPtrInst>(resOrGep)) {
-                FoundDynamicIndexing = true;
-                break;
-              }
-            }
-          }
-        }
-      }
-    }
-    if (FoundDynamicIndexing) {
-      break;
-    }
-  }
-
-  if (!FoundDynamicIndexing) {
-    auto CreateHandleFn =
-        HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
-    for (auto FI = CreateHandleFn->user_begin();
-         FI != CreateHandleFn->user_end();) {
-      auto *FunctionUser = *FI++;
-      auto instruction = cast<Instruction>(FunctionUser);
-      Value *index = instruction->getOperand(kCreateHandleResIndexOpIdx);
-      if (!isa<Constant>(index)) {
-        FoundDynamicIndexing = true;
-        break;
-      }
-    }
-  }
-
-  if (!FoundDynamicIndexing) {
-    auto CreateHandleFromBindingFn = HlslOP->GetOpFunc(
-        DXIL::OpCode::CreateHandleFromBinding, Type::getVoidTy(Ctx));
-    for (auto FI = CreateHandleFromBindingFn->user_begin();
-         FI != CreateHandleFromBindingFn->user_end();) {
-      auto *FunctionUser = *FI++;
-      auto instruction = cast<Instruction>(FunctionUser);
-      Value *index =
-          instruction->getOperand(kCreateHandleFromBindingResIndexOpIdx);
-      if (!isa<Constant>(index)) {
-        FoundDynamicIndexing = true;
-        break;
-      }
-    }
-  }
-
-  if (!FoundDynamicIndexing) {
-    auto CreateHandleFromHeapFn = HlslOP->GetOpFunc(
-        DXIL::OpCode::CreateHandleFromHeap, Type::getVoidTy(Ctx));
-    for (auto FI = CreateHandleFromHeapFn->user_begin();
-         FI != CreateHandleFromHeapFn->user_end();) {
-      auto *FunctionUser = *FI++;
-      auto instruction = cast<Instruction>(FunctionUser);
-      Value *index =
-          instruction->getOperand(kCreateHandleFromHeapHeapIndexOpIdx);
-      if (!isa<Constant>(index)) {
-        FoundDynamicIndexing = true;
-        break;
-      }
-    }
-  }
-
-  return FoundDynamicIndexing;
-}
-
 bool DxilShaderAccessTracking::runOnModule(Module &M) {
   // This pass adds instrumentation for shader access to resources
 
@@ -887,7 +806,13 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) {
 
   if (m_CheckForDynamicIndexing) {
 
-    bool FoundDynamicIndexing = CheckForDynamicIndexing(HlslOP, Ctx, DM);
+    bool FoundDynamicIndexing = false;
+
+    PIXPassHelpers::ForEachDynamicallyIndexedResource(
+        DM, [&FoundDynamicIndexing](bool, Instruction *, Value *) {
+          FoundDynamicIndexing = true;
+          return false;
+        });
 
     if (FoundDynamicIndexing) {
       if (OSOverride != nullptr) {
diff --git a/lib/DxilPIXPasses/PixPassHelpers.cpp b/lib/DxilPIXPasses/PixPassHelpers.cpp
index 69385ae048..65d9a660cc 100644
--- a/lib/DxilPIXPasses/PixPassHelpers.cpp
+++ b/lib/DxilPIXPasses/PixPassHelpers.cpp
@@ -512,6 +512,90 @@ unsigned int FindOrAddSV_Position(hlsl::DxilModule &DM,
   }
 }
 
+void ForEachDynamicallyIndexedResource(
+    hlsl::DxilModule &DM,
+    const std::function<bool(bool, Instruction *, Value *)> &Visitor) {
+  OP *HlslOP = DM.GetOP();
+  LLVMContext &Ctx = DM.GetModule()->getContext();
+
+  for (llvm::Function &F : DM.GetModule()->functions()) {
+    if (F.isDeclaration() && !F.use_empty() && OP::IsDxilOpFunc(&F)) {
+      if (F.hasName()) {
+        if (F.getName().find("createHandleForLib") != StringRef::npos) {
+          auto FunctionUses = F.uses();
+          for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
+            auto &FunctionUse = *FI++;
+            auto FunctionUser = FunctionUse.getUser();
+            auto instruction = cast<Instruction>(FunctionUser);
+            Value *resourceLoad = instruction->getOperand(
+                DXIL::OperandIndex::kCreateHandleForLibResOpIdx);
+            if (auto *load = cast<LoadInst>(resourceLoad)) {
+              auto *resOrGep = load->getOperand(0);
+              if (auto *gep = dyn_cast<GetElementPtrInst>(resOrGep)) {
+                if (!Visitor(DxilMDHelper::IsMarkedNonUniform(gep), load,
+                             gep->getOperand(2))) {
+                  return;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  auto CreateHandleFn =
+      HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
+  for (auto FI = CreateHandleFn->user_begin();
+       FI != CreateHandleFn->user_end();) {
+    auto *FunctionUser = *FI++;
+    auto instruction = cast<Instruction>(FunctionUser);
+    Value *index =
+        instruction->getOperand(DXIL::OperandIndex::kCreateHandleResIndexOpIdx);
+    if (!isa<Constant>(index)) {
+      const DxilInst_CreateHandle createHandle(instruction);
+      if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction,
+                   index)) {
+        return;
+      }
+    }
+  }
+
+  auto CreateHandleFromBindingFn = HlslOP->GetOpFunc(
+      DXIL::OpCode::CreateHandleFromBinding, Type::getVoidTy(Ctx));
+  for (auto FI = CreateHandleFromBindingFn->user_begin();
+       FI != CreateHandleFromBindingFn->user_end();) {
+    auto *FunctionUser = *FI++;
+    auto instruction = cast<Instruction>(FunctionUser);
+    Value *index = instruction->getOperand(
+        DXIL::OperandIndex::kCreateHandleFromBindingResIndexOpIdx);
+    if (!isa<Constant>(index)) {
+      const DxilInst_CreateHandleFromBinding createHandle(instruction);
+      if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction,
+                   index)) {
+        return;
+      }
+    }
+  }
+
+  auto CreateHandleFromHeapFn = HlslOP->GetOpFunc(
+      DXIL::OpCode::CreateHandleFromHeap, Type::getVoidTy(Ctx));
+  for (auto FI = CreateHandleFromHeapFn->user_begin();
+       FI != CreateHandleFromHeapFn->user_end();) {
+    auto *FunctionUser = *FI++;
+    auto instruction = cast<Instruction>(FunctionUser);
+    Value *index = instruction->getOperand(
+        DXIL::OperandIndex::kCreateHandleFromHeapHeapIndexOpIdx);
+    if (!isa<Constant>(index)) {
+      const DxilInst_CreateHandleFromHeap createHandle(instruction);
+      if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction,
+                   index)) {
+        return;
+      }
+    }
+  }
+}
+
 #ifdef PIX_DEBUG_DUMP_HELPER
 
 static int g_logIndent = 0;
diff --git a/lib/DxilPIXPasses/PixPassHelpers.h b/lib/DxilPIXPasses/PixPassHelpers.h
index 4cd0e1a549..d7b0b40af8 100644
--- a/lib/DxilPIXPasses/PixPassHelpers.h
+++ b/lib/DxilPIXPasses/PixPassHelpers.h
@@ -9,6 +9,7 @@
 
 #pragma once
 
+#include <functional>
 #include <vector>
 
 #include "dxc/DXIL/DxilModule.h"
@@ -16,7 +17,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 
-//#define PIX_DEBUG_DUMP_HELPER
+// #define PIX_DEBUG_DUMP_HELPER
 #ifdef PIX_DEBUG_DUMP_HELPER
 #include "dxc/Support/Global.h"
 #endif
@@ -82,4 +83,8 @@ void ReplaceAllUsesOfInstructionWithNewValueAndDeleteInstruction(
     llvm::Instruction *Instr, llvm::Value *newValue, llvm::Type *newType);
 unsigned int FindOrAddSV_Position(hlsl::DxilModule &DM,
                                   unsigned UpStreamSVPosRow);
+void ForEachDynamicallyIndexedResource(
+    hlsl::DxilModule &DM,
+    const std::function<bool(bool, llvm::Instruction *, llvm::Value *)>
+        &Visitor);
 } // namespace PIXPassHelpers
diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp
index b97aa70c05..e337d2951c 100644
--- a/tools/clang/unittests/HLSL/PixTest.cpp
+++ b/tools/clang/unittests/HLSL/PixTest.cpp
@@ -153,6 +153,10 @@ class PixTest : public ::testing::Test {
 
   TEST_METHOD(DebugInstrumentation_VectorAllocaWrite_Structs)
 
+  TEST_METHOD(NonUniformResourceIndex_Resource)
+  TEST_METHOD(NonUniformResourceIndex_DescriptorHeap)
+  TEST_METHOD(NonUniformResourceIndex_Raytracing)
+
   dxc::DxcDllSupport m_dllSupport;
   VersionSupportInfo m_ver;
 
@@ -444,6 +448,11 @@ class PixTest : public ::testing::Test {
   std::string RunDxilPIXAddTidToAmplificationShaderPayloadPass(IDxcBlob *blob);
   CComPtr<IDxcBlob> RunDxilPIXMeshShaderOutputPass(IDxcBlob *blob);
   CComPtr<IDxcBlob> RunDxilPIXDXRInvocationsLog(IDxcBlob *blob);
+  std::vector<std::string>
+  RunDxilNonUniformResourceIndexInstrumentation(IDxcBlob *blob,
+                                                std::string &outputText);
+  void TestNuriCase(const char *source, const wchar_t *target,
+                    uint32_t expectedResult);
   void TestPixUAVCase(char const *hlsl, wchar_t const *model,
                       wchar_t const *entry);
   std::string Disassemble(IDxcBlob *pProgram);
@@ -671,6 +680,29 @@ CComPtr<IDxcBlob> PixTest::RunDxilPIXDXRInvocationsLog(IDxcBlob *blob) {
   return pOptimizedModule;
 }
 
+std::vector<std::string> PixTest::RunDxilNonUniformResourceIndexInstrumentation(
+    IDxcBlob *blob, std::string &outputText) {
+
+  CComPtr<IDxcBlob> dxil = FindModule(DFCC_ShaderDebugInfoDXIL, blob);
+  CComPtr<IDxcOptimizer> pOptimizer;
+  VERIFY_SUCCEEDED(
+      m_dllSupport.CreateInstance(CLSID_DxcOptimizer, &pOptimizer));
+  std::array<LPCWSTR, 4> Options = {
+      L"-opt-mod-passes", L"-dxil-dbg-value-to-dbg-declare",
+      L"-dxil-annotate-with-virtual-regs",
+      L"-hlsl-dxil-non-uniform-resource-index-instrumentation"};
+
+  CComPtr<IDxcBlob> pOptimizedModule;
+  CComPtr<IDxcBlobEncoding> pText;
+  VERIFY_SUCCEEDED(pOptimizer->RunOptimizer(
+      dxil, Options.data(), Options.size(), &pOptimizedModule, &pText));
+
+  outputText = BlobToUtf8(pText);
+
+  const std::string disassembly = Disassemble(pOptimizedModule);
+  return Tokenize(disassembly, "\n");
+}
+
 std::string
 PixTest::RunDxilPIXAddTidToAmplificationShaderPayloadPass(IDxcBlob *blob) {
   CComPtr<IDxcBlob> dxil = FindModule(DFCC_ShaderDebugInfoDXIL, blob);
@@ -2983,6 +3015,193 @@ void MyMiss(inout MyPayload payload)
   RunDxilPIXDXRInvocationsLog(compiledLib);
 }
 
+uint32_t NuriGetWaveInstructionCount(const std::vector<std::string> &lines) {
+  // This is the instruction we'll insert into the shader if we detect dynamic
+  // resource indexing
+  const char *const waveActiveAllEqual = "call i1 @dx.op.waveActiveAllEqual";
+
+  uint32_t instCount = 0;
+  for (const std::string &line : lines) {
+    instCount += line.find(waveActiveAllEqual) != std::string::npos;
+  }
+  return instCount;
+}
+
+void PixTest::TestNuriCase(const char *source, const wchar_t *target,
+                           uint32_t expectedResult) {
+
+  for (const OptimizationChoice &choice : OptimizationChoices) {
+    const std::vector<LPCWSTR> compilationOptions = {choice.Flag};
+
+    CComPtr<IDxcBlob> compiledLib =
+        Compile(m_dllSupport, source, target, compilationOptions);
+
+    std::string outputText;
+    const std::vector<std::string> dxilLines =
+        RunDxilNonUniformResourceIndexInstrumentation(compiledLib, outputText);
+
+    VERIFY_ARE_EQUAL(NuriGetWaveInstructionCount(dxilLines), expectedResult);
+
+    bool foundDynamicIndexingNoNuri = false;
+    const std::vector<std::string> outputTextLines = Tokenize(outputText, "\n");
+    for (const std::string &line : outputTextLines) {
+      if (line.find("FoundDynamicIndexingNoNuri") != std::string::npos) {
+        foundDynamicIndexingNoNuri = true;
+        break;
+      }
+    }
+
+    VERIFY_ARE_EQUAL((expectedResult != 0), foundDynamicIndexingNoNuri);
+  }
+}
+
+TEST_F(PixTest, NonUniformResourceIndex_Resource) {
+
+  const char *source = R"x(
+Texture2D tex[] : register(t0);
+float4 main(float2 uv : TEXCOORD0) : SV_TARGET
+{
+    uint index = uv.x * uv.y;
+    return tex[index].Load(int3(0, 0, 0));
+})x";
+
+  const char *sourceWithNuri = R"x(
+Texture2D tex[] : register(t0);
+float4 main(float2 uv : TEXCOORD0) : SV_TARGET
+{
+    uint i = uv.x * uv.y;
+    return tex[NonUniformResourceIndex(i)].Load(int3(0, 0, 0));
+})x";
+
+  TestNuriCase(source, L"ps_6_0", 1);
+  TestNuriCase(sourceWithNuri, L"ps_6_0", 0);
+
+  if (m_ver.SkipDxilVersion(1, 6)) {
+    return;
+  }
+
+  TestNuriCase(source, L"ps_6_6", 1);
+  TestNuriCase(sourceWithNuri, L"ps_6_6", 0);
+}
+
+TEST_F(PixTest, NonUniformResourceIndex_DescriptorHeap) {
+
+  if (m_ver.SkipDxilVersion(1, 6)) {
+    return;
+  }
+
+  const char *source = R"x(
+Texture2D tex[] : register(t0);
+float4 main(float2 uv : TEXCOORD0) : SV_TARGET
+{
+    uint i = uv.x + uv.y;
+    Texture2D<float4> dynResTex = 
+        ResourceDescriptorHeap[i];
+    SamplerState dynResSampler = 
+        SamplerDescriptorHeap[i];
+    return dynResTex.Sample(dynResSampler, uv);
+})x";
+
+  const char *sourceWithNuri = R"x(
+Texture2D tex[] : register(t0);
+float4 main(float2 uv : TEXCOORD0) : SV_TARGET
+{
+    uint i = uv.x + uv.y;
+    Texture2D<float4> dynResTex = 
+        ResourceDescriptorHeap[NonUniformResourceIndex(i)];
+    SamplerState dynResSampler = 
+        SamplerDescriptorHeap[NonUniformResourceIndex(i)];
+    return dynResTex.Sample(dynResSampler, uv);
+})x";
+
+  TestNuriCase(source, L"ps_6_6", 2);
+  TestNuriCase(sourceWithNuri, L"ps_6_6", 0);
+}
+
+TEST_F(PixTest, NonUniformResourceIndex_Raytracing) {
+
+  if (m_ver.SkipDxilVersion(1, 5)) {
+    return;
+  }
+
+  const char *source = R"x(
+RWTexture2D<float4> RT[] : register(u0);
+
+[noinline]
+void FuncNoInline(uint index)
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+    uint i = index + rayIndex.x * rayIndex.y;
+    float4 c = float4(0.5, 0.5, 0.5, 0);
+    RT[i][rayIndex.xy] += c;
+}
+
+void Func(uint index)
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+    uint i = index + rayIndex.y;
+    float4 c = float4(0, 1, 0, 0);
+    RT[i][rayIndex.xy] += c;
+}
+
+[shader("raygeneration")]
+void Main()
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+
+    uint i1 = rayIndex.x;
+    float4 c1 = float4(1, 0, 1, 1);
+    RT[i1][rayIndex.xy] += c1;
+
+    uint i2 = rayIndex.x * rayIndex.y * 0.25;
+    float4 c2 = float4(0.25, 0, 0.25, 0);
+    RT[i2][rayIndex.xy] += c2;
+
+    Func(i1);
+    FuncNoInline(i2);
+})x";
+
+  const char *sourceWithNuri = R"x(
+RWTexture2D<float4> RT[] : register(u0);
+
+[noinline]
+void FuncNoInline(uint index)
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+    uint i = index + rayIndex.x * rayIndex.y;
+    float4 c = float4(0.5, 0.5, 0.5, 0);
+    RT[NonUniformResourceIndex(i)][rayIndex.xy] += c;
+}
+
+void Func(uint index)
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+    uint i = index + rayIndex.y;
+    float4 c = float4(0, 1, 0, 0);
+    RT[NonUniformResourceIndex(i)][rayIndex.xy] += c;
+}
+
+[shader("raygeneration")]
+void Main()
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+
+    uint i1 = rayIndex.x;
+    float4 c1 = float4(1, 0, 1, 1);
+    RT[NonUniformResourceIndex(i1)][rayIndex.xy] += c1;
+
+    uint i2 = rayIndex.x * rayIndex.y * 0.25;
+    float4 c2 = float4(0.25, 0, 0.25, 0);
+    RT[NonUniformResourceIndex(i2)][rayIndex.xy] += c2;
+
+    Func(i1);
+    FuncNoInline(i2);
+})x";
+
+  TestNuriCase(source, L"lib_6_5", 4);
+  TestNuriCase(sourceWithNuri, L"lib_6_5", 0);
+}
+
 TEST_F(PixTest, DebugInstrumentation_TextOutput) {
 
   const char *source = R"x(
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 691c3ba58f..0008b752b1 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -6340,6 +6340,12 @@ def add_pass(name, type_name, doc, opts):
             "HLSL DXIL Logs all non-RayGen DXR 1.0 invocations into a UAV",
             [{"n": "maxNumEntriesInLog", "t": "int", "c": 1}],
         )
+        add_pass(
+            "hlsl-dxil-non-uniform-resource-index-instrumentation",
+            "DxilNonUniformResourceIndexInstrumentation",
+            "HLSL DXIL NonUniformResourceIndex instrumentation for PIX",
+            [],
+        )
 
         category_lib = "dxil_gen"
 

From c940161bb3398ff988fafc343ed1623d4a3fad6c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 8 Apr 2025 11:19:24 -0700
Subject: [PATCH 74/88] Bump cryptography from 43.0.1 to 44.0.1 in /utils/git
 (#7220)

Bumps [cryptography](https://github.com/pyca/cryptography) from 43.0.1
to 44.0.1.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst">cryptography's
changelog</a>.</em></p>
<blockquote>
<p>44.0.1 - 2025-02-11</p>
<pre><code>
* Updated Windows, macOS, and Linux wheels to be compiled with OpenSSL
3.4.1.
* We now build ``armv7l`` ``manylinux`` wheels and publish them to PyPI.
* We now build ``manylinux_2_34`` wheels and publish them to PyPI.
<p>.. _v44-0-0:</p>
<p>44.0.0 - 2024-11-27
</code></pre></p>
<ul>
<li><strong>BACKWARDS INCOMPATIBLE:</strong> Dropped support for
LibreSSL &lt; 3.9.</li>
<li>Deprecated Python 3.7 support. Python 3.7 is no longer supported by
the
Python core team. Support for Python 3.7 will be removed in a future
<code>cryptography</code> release.</li>
<li>Updated Windows, macOS, and Linux wheels to be compiled with OpenSSL
3.4.0.</li>
<li>macOS wheels are now built against the macOS 10.13 SDK. Users on
older
versions of macOS should upgrade, or they will need to build
<code>cryptography</code> themselves.</li>
<li>Enforce the :rfc:<code>5280</code> requirement that extended key
usage extensions must
not be empty.</li>
<li>Added support for timestamp extraction to the
:class:<code>~cryptography.fernet.MultiFernet</code> class.</li>
<li>Relax the Authority Key Identifier requirements on root CA
certificates
during X.509 verification to allow fields permitted by
:rfc:<code>5280</code> but
forbidden by the CA/Browser BRs.</li>
<li>Added support for
:class:<code>~cryptography.hazmat.primitives.kdf.argon2.Argon2id</code>
when using OpenSSL 3.2.0+.</li>
<li>Added support for the
:class:<code>~cryptography.x509.Admissions</code> certificate
extension.</li>
<li>Added basic support for PKCS7 decryption (including S/MIME 3.2) via

:func:<code>~cryptography.hazmat.primitives.serialization.pkcs7.pkcs7_decrypt_der</code>,

:func:<code>~cryptography.hazmat.primitives.serialization.pkcs7.pkcs7_decrypt_pem</code>,
and

:func:<code>~cryptography.hazmat.primitives.serialization.pkcs7.pkcs7_decrypt_smime</code>.</li>
</ul>
<p>.. _v43-0-3:</p>
<p>43.0.3 - 2024-10-18</p>
<pre><code>
* Fixed release metadata for ``cryptography-vectors``
<p>.. _v43-0-2:</p>
<p>43.0.2 - 2024-10-18
</code></pre></p>
<ul>
<li>Fixed compilation when using LibreSSL 4.0.0.</li>
</ul>
<p>.. _v43-0-1:</p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/pyca/cryptography/commit/adaaaed77db676bbaa9d171175db81dce056e2a7"><code>adaaaed</code></a>
Bump for 44.0.1 release (<a
href="https://redirect.github.com/pyca/cryptography/issues/12441">#12441</a>)</li>
<li><a
href="https://github.com/pyca/cryptography/commit/ccc61dabe38b86956bf218565cd4e82b918345a1"><code>ccc61da</code></a>
[backport] test and build on armv7l (<a
href="https://redirect.github.com/pyca/cryptography/issues/12420">#12420</a>)
(<a
href="https://redirect.github.com/pyca/cryptography/issues/12431">#12431</a>)</li>
<li><a
href="https://github.com/pyca/cryptography/commit/f299a48153650f2dd87716343f2daa7cd39a1f59"><code>f299a48</code></a>
remove deprecated call (<a
href="https://redirect.github.com/pyca/cryptography/issues/12052">#12052</a>)</li>
<li><a
href="https://github.com/pyca/cryptography/commit/439eb0594a9ffb7c9adedb2490998d83914d141e"><code>439eb05</code></a>
Bump version for 44.0.0 (<a
href="https://redirect.github.com/pyca/cryptography/issues/12051">#12051</a>)</li>
<li><a
href="https://github.com/pyca/cryptography/commit/2c5ad4d8dcec1b8f833198bc2f3b4634c4fd9d78"><code>2c5ad4d</code></a>
chore(deps): bump maturin from 1.7.4 to 1.7.5 in /.github/requirements
(<a
href="https://redirect.github.com/pyca/cryptography/issues/12050">#12050</a>)</li>
<li><a
href="https://github.com/pyca/cryptography/commit/d23968adddd79aa8508d7c1f985da09383b3808f"><code>d23968a</code></a>
chore(deps): bump libc from 0.2.165 to 0.2.166 (<a
href="https://redirect.github.com/pyca/cryptography/issues/12049">#12049</a>)</li>
<li><a
href="https://github.com/pyca/cryptography/commit/133c0e02edf2f172318eb27d8f50525ed64c9ec3"><code>133c0e0</code></a>
Bump x509-limbo and/or wycheproof in CI (<a
href="https://redirect.github.com/pyca/cryptography/issues/12047">#12047</a>)</li>
<li><a
href="https://github.com/pyca/cryptography/commit/f2259d7aa0d134c839ebe298baa8b63de9ead804"><code>f2259d7</code></a>
Bump BoringSSL and/or OpenSSL in CI (<a
href="https://redirect.github.com/pyca/cryptography/issues/12046">#12046</a>)</li>
<li><a
href="https://github.com/pyca/cryptography/commit/e201c870b89fd2606d67230a97e50c3badb07907"><code>e201c87</code></a>
fixed metadata in changelog (<a
href="https://redirect.github.com/pyca/cryptography/issues/12044">#12044</a>)</li>
<li><a
href="https://github.com/pyca/cryptography/commit/c6104cc3669585941dc1d2b9c6507621c53d242f"><code>c6104cc</code></a>
Prohibit Python 3.9.0, 3.9.1 -- they have a bug that causes errors (<a
href="https://redirect.github.com/pyca/cryptography/issues/12045">#12045</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/pyca/cryptography/compare/43.0.1...44.0.1">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=cryptography&package-manager=pip&previous-version=43.0.1&new-version=44.0.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the
[Security Alerts
page](https://github.com/microsoft/DirectXShaderCompiler/network/alerts).

</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 utils/git/requirements_formatting.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/git/requirements_formatting.txt b/utils/git/requirements_formatting.txt
index 06db8176c9..6f3e07dcf2 100644
--- a/utils/git/requirements_formatting.txt
+++ b/utils/git/requirements_formatting.txt
@@ -18,7 +18,7 @@ charset-normalizer==3.2.0
     # via requests
 click==8.1.7
     # via black
-cryptography==43.0.1
+cryptography==44.0.1
     # via pyjwt
 darker==1.7.2
     # via -r llvm/utils/git/requirements_formatting.txt.in

From 5d2fa929699b2a09a474796257b9709b1d48829f Mon Sep 17 00:00:00 2001
From: Chris B <cbieneman@microsoft.com>
Date: Wed, 9 Apr 2025 16:41:47 -0500
Subject: [PATCH 75/88] [SM6.9] Enable trivial native vector Dxil Operations
 plus a few (#7324)

This enables the generation of native vector DXIL Operations
that are "trivial", meaning they take only a single DXOp Call
instruction to implement as well as a few others that either only took
such a call and some llvm operations or were of particular interest for
other reasons.

This involves allowing the overloads by adding the vector indication in
hctdb, altering the lowering to maintain the vectors instead of
scalarizing them, and a few sundry changes to fix issues along the way.

The "trivial" dxil operations that return a different value from the
overload type had to be moved out of the way and given their own
lowering function so that the main function could generate vectors
conditional on the version and vector type. These will be added in a
later change.

While the long vector supporting intrinsics that weren't given this
treatment will continue to generate scalarized operations, some of them
needed some work as well. The dot product for float vectors longer than
4 had to take the integer fallback path, which required some small
modificaitons and a rename.
Additionally, a heuristic for pow that malfunctioned with too many
elements had to have a limit placed on it.

Since the or()/and()/select() intrinsics translate directly to LLVM ops,
they can have their lowering scalarization removed and what future
scalarization might be needed by the current version can be done by
later passes as with other LLVM operators.

An issue with a special value used to represent unassined dimensions had
to be addressed since new dimensions can exceed that value. It's now
MAX_INT.

Contributes to #7120, but I'd prefer to leave it open until all
intrinsics are covered

Primary work by @pow2clk

Fixes #7297 & #7120

---------

Co-authored-by: Greg Roth <grroth@microsoft.com>
---
 lib/DXIL/DxilOperations.cpp                   | 140 +++---
 lib/HLSL/HLOperationLower.cpp                 | 197 ++++----
 tools/clang/lib/Sema/SemaHLSL.cpp             |   8 +-
 .../hlsl/types/longvec-intrinsics.hlsl        | 394 ++++++++++++++++
 .../types/longvec-scalarized-intrinsics.hlsl  | 115 +++++
 ...ngvec-trivial-binary-float-intrinsics.hlsl |  69 +++
 ...longvec-trivial-binary-int-intrinsics.hlsl | 116 +++++
 ...longvec-trivial-scalarized-intrinsics.hlsl |  77 ++++
 ...vec-trivial-tertiary-float-intrinsics.hlsl |  86 ++++
 ...ngvec-trivial-tertiary-int-intrinsics.hlsl | 131 ++++++
 ...ongvec-trivial-unary-float-intrinsics.hlsl |  83 ++++
 .../longvec-trivial-unary-int-intrinsics.hlsl |  86 ++++
 .../passes/longvec-intrinsics.hlsl            | 186 ++++++++
 .../CodeGenDXIL/passes/longvec-intrinsics.ll  | 434 ++++++++++++++++++
 utils/hct/hctdb.py                            |  24 +-
 15 files changed, 1983 insertions(+), 163 deletions(-)
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll

diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp
index 0b4c7218d4..7047d9fe59 100644
--- a/lib/DXIL/DxilOperations.cpp
+++ b/lib/DXIL/DxilOperations.cpp
@@ -96,16 +96,16 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x7}},
-     {{0x0}}}, // Overloads: hfd
+     {{0x407}},
+     {{0x7}}}, // Overloads: hfd<hfd
     {OC::Saturate,
      "Saturate",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x7}},
-     {{0x0}}}, // Overloads: hfd
+     {{0x407}},
+     {{0x7}}}, // Overloads: hfd<hfd
     {OC::IsNaN,
      "IsNaN",
      OCC::IsSpecialFloat,
@@ -144,112 +144,112 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Sin,
      "Sin",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Tan,
      "Tan",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Acos,
      "Acos",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Asin,
      "Asin",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Atan,
      "Atan",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Hcos,
      "Hcos",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Hsin,
      "Hsin",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Htan,
      "Htan",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Exp,
      "Exp",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Frc,
      "Frc",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Log,
      "Log",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Sqrt,
      "Sqrt",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Rsqrt,
      "Rsqrt",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
 
     // Unary float - rounding
     {OC::Round_ne,
@@ -258,32 +258,32 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Round_ni,
      "Round_ni",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Round_pi,
      "Round_pi",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Round_z,
      "Round_z",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
 
     // Unary int
     {OC::Bfrev,
@@ -292,8 +292,8 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "unary",
      Attribute::ReadNone,
      1,
-     {{0xe0}},
-     {{0x0}}}, // Overloads: wil
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
     {OC::Countbits,
      "Countbits",
      OCC::UnaryBits,
@@ -338,16 +338,16 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "binary",
      Attribute::ReadNone,
      1,
-     {{0x7}},
-     {{0x0}}}, // Overloads: hfd
+     {{0x407}},
+     {{0x7}}}, // Overloads: hfd<hfd
     {OC::FMin,
      "FMin",
      OCC::Binary,
      "binary",
      Attribute::ReadNone,
      1,
-     {{0x7}},
-     {{0x0}}}, // Overloads: hfd
+     {{0x407}},
+     {{0x7}}}, // Overloads: hfd<hfd
 
     // Binary int
     {OC::IMax,
@@ -356,16 +356,16 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "binary",
      Attribute::ReadNone,
      1,
-     {{0xe0}},
-     {{0x0}}}, // Overloads: wil
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
     {OC::IMin,
      "IMin",
      OCC::Binary,
      "binary",
      Attribute::ReadNone,
      1,
-     {{0xe0}},
-     {{0x0}}}, // Overloads: wil
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
 
     // Binary uint
     {OC::UMax,
@@ -374,16 +374,16 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "binary",
      Attribute::ReadNone,
      1,
-     {{0xe0}},
-     {{0x0}}}, // Overloads: wil
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
     {OC::UMin,
      "UMin",
      OCC::Binary,
      "binary",
      Attribute::ReadNone,
      1,
-     {{0xe0}},
-     {{0x0}}}, // Overloads: wil
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
 
     // Binary int with two outputs
     {OC::IMul,
@@ -438,16 +438,16 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "tertiary",
      Attribute::ReadNone,
      1,
-     {{0x7}},
-     {{0x0}}}, // Overloads: hfd
+     {{0x407}},
+     {{0x7}}}, // Overloads: hfd<hfd
     {OC::Fma,
      "Fma",
      OCC::Tertiary,
      "tertiary",
      Attribute::ReadNone,
      1,
-     {{0x4}},
-     {{0x0}}}, // Overloads: d
+     {{0x404}},
+     {{0x4}}}, // Overloads: d<d
 
     // Tertiary int
     {OC::IMad,
@@ -456,8 +456,8 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "tertiary",
      Attribute::ReadNone,
      1,
-     {{0xe0}},
-     {{0x0}}}, // Overloads: wil
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
 
     // Tertiary uint
     {OC::UMad,
@@ -466,8 +466,8 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "tertiary",
      Attribute::ReadNone,
      1,
-     {{0xe0}},
-     {{0x0}}}, // Overloads: wil
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
 
     // Tertiary int
     {OC::Msad,
@@ -764,32 +764,32 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::DerivCoarseY,
      "DerivCoarseY",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::DerivFineX,
      "DerivFineX",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::DerivFineY,
      "DerivFineY",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
 
     // Pixel shader
     {OC::EvalSnapped,
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index 4d8201df8d..c0f9d7fddd 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -424,6 +424,14 @@ struct IntrinsicLower {
 // IOP intrinsics.
 namespace {
 
+// Creates the necessary scalar calls to for a "trivial" operation where only
+// call instructions to a single function type are needed.
+// The overload type `Ty` determines what scalarization might be required.
+// Elements of any vectors in `refArgs` are extracted  into scalars for each
+// call generated while the same scalar values are used unaltered in each call.
+// Utility objects `HlslOp` and `Builder` are used to generate calls to the
+// given `DxilFunc` for each set of scalar arguments.
+// The results are reconstructed into the given `RetTy` as needed.
 Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode,
                             ArrayRef<Value *> refArgs, Type *Ty, Type *RetTy,
                             OP *hlslOP, IRBuilder<> &Builder) {
@@ -459,12 +467,40 @@ Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode,
     }
   }
 }
-// Generates a DXIL operation over an overloaded type (Ty), returning a
-// RetTy value; when Ty is a vector, it will replicate per-element operations
-// into RetTy to rebuild it.
+
+// Creates a native vector call to for a "trivial" operation where only a single
+// call instruction is needed. The overload and return types are the same vector
+// type `Ty`.
+// Utility objects `HlslOp` and `Builder` are used to create a call to the given
+// `DxilFunc` with `RefArgs` arguments.
+Value *TrivialDxilVectorOperation(Function *Func, OP::OpCode Opcode,
+                                  ArrayRef<Value *> Args, Type *Ty, OP *OP,
+                                  IRBuilder<> &Builder) {
+  if (!Ty->isVoidTy())
+    return Builder.CreateCall(Func, Args, OP->GetOpCodeName(Opcode));
+  return Builder.CreateCall(Func, Args); // Cannot add name to void.
+}
+
+// Generates a DXIL operation with the overloaded type based on `Ty` and return
+// type `RetTy`. When Ty is a vector, it will either generate per-element calls
+// for each vector element and reconstruct the vector type from those results or
+// operate on and return native vectors depending on vector size and the
+// legality of the vector overload.
 Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
                             Type *Ty, Type *RetTy, OP *hlslOP,
                             IRBuilder<> &Builder) {
+
+  // If supported and the overload type is a vector with more than 1 element,
+  // create a native vector operation.
+  if (Ty->isVectorTy() && Ty->getVectorNumElements() > 1 &&
+      hlslOP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() &&
+      OP::IsOverloadLegal(opcode, Ty)) {
+    Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
+    return TrivialDxilVectorOperation(dxilFunc, opcode, refArgs, Ty, hlslOP,
+                                      Builder);
+  }
+
+  // Set overload type to the scalar type of `Ty` and generate call(s).
   Type *EltTy = Ty->getScalarType();
   Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy);
 
@@ -484,20 +520,34 @@ Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
   return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B);
 }
 
-Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy,
-                                    hlsl::OP *hlslOP, IRBuilder<> &Builder) {
-  Type *Ty = src->getType();
+// Translate call that converts to a dxil unary operation with a different
+// return type from the overload by passing the argument, explicit return type,
+// and helper objects to the scalarizing unary dxil operation creation.
+Value *TrivialUnaryOperationRet(CallInst *CI, IntrinsicOp IOP,
+                                OP::OpCode OpCode,
+                                HLOperationLowerHelper &Helper,
+                                HLObjectOperationLowerHelper *,
+                                bool &Translated) {
+  Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
+  Type *Ty = Src->getType();
 
-  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
-  Value *args[] = {opArg, src};
+  IRBuilder<> Builder(CI);
+  hlsl::OP *OP = &Helper.hlslOP;
+  Type *RetTy = CI->getType();
+  Constant *OpArg = OP->GetU32Const((unsigned)OpCode);
+  Value *Args[] = {OpArg, Src};
 
-  return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
+  return TrivialDxilOperation(OpCode, Args, Ty, RetTy, OP, Builder);
 }
 
-Value *TrivialDxilUnaryOperation(OP::OpCode opcode, Value *src,
-                                 hlsl::OP *hlslOP, IRBuilder<> &Builder) {
-  return TrivialDxilUnaryOperationRet(opcode, src, src->getType(), hlslOP,
-                                      Builder);
+Value *TrivialDxilUnaryOperation(OP::OpCode OpCode, Value *Src, hlsl::OP *Op,
+                                 IRBuilder<> &Builder) {
+  Type *Ty = Src->getType();
+
+  Constant *OpArg = Op->GetU32Const((unsigned)OpCode);
+  Value *Args[] = {OpArg, Src};
+
+  return TrivialDxilOperation(OpCode, Args, Ty, Ty, Op, Builder);
 }
 
 Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
@@ -521,6 +571,9 @@ Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
   return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
 }
 
+// Translate call that trivially converts to a dxil unary operation by passing
+// argument, return type, and helper objects to either scalarizing or native
+// vector dxil operation creation depending on version and vector size.
 Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                              HLOperationLowerHelper &helper,
                              HLObjectOperationLowerHelper *pObjHelper,
@@ -528,11 +581,13 @@ Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
   IRBuilder<> Builder(CI);
   hlsl::OP *hlslOP = &helper.hlslOP;
-  Value *retVal = TrivialDxilUnaryOperationRet(opcode, src0, CI->getType(),
-                                               hlslOP, Builder);
-  return retVal;
+
+  return TrivialDxilUnaryOperation(opcode, src0, hlslOP, Builder);
 }
 
+// Translate call that trivially converts to a dxil binary operation by passing
+// arguments, return type, and helper objects to either scalarizing or native
+// vector dxil operation creation depending on version and vector size.
 Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                               HLOperationLowerHelper &helper,
                               HLObjectOperationLowerHelper *pObjHelper,
@@ -547,6 +602,10 @@ Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   return binOp;
 }
 
+// Translate call that trivially converts to a dxil trinary (aka tertiary)
+// operation by passing arguments, return type, and helper objects to either
+// scalarizing or native vector dxil operation creation depending on version
+// and vector size.
 Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                                HLOperationLowerHelper &helper,
                                HLObjectOperationLowerHelper *pObjHelper,
@@ -738,6 +797,12 @@ bool CanUseFxcMulOnlyPatternForPow(IRBuilder<> &Builder, Value *x, Value *pow,
     }
   }
 
+  // Only apply on aggregates of 16 or fewer elements,
+  // representing the max 4x4 matrix size.
+  Type *Ty = x->getType();
+  if (Ty->isVectorTy() && Ty->getVectorNumElements() > 16)
+    return false;
+
   APFloat powAPF = isa<ConstantDataVector>(pow)
                        ? cast<ConstantDataVector>(pow)->getElementAsAPFloat(0)
                        : // should be a splat value
@@ -2019,7 +2084,7 @@ Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                            HLObjectOperationLowerHelper *pObjHelper,
                            bool &Translated) {
   Value *firstbitHi =
-      TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
+      TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated);
   // firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi);
   IRBuilder<> Builder(CI);
   Constant *neg1 = Builder.getInt32(-1);
@@ -2052,7 +2117,7 @@ Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                            HLObjectOperationLowerHelper *pObjHelper,
                            bool &Translated) {
   Value *firstbitLo =
-      TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
+      TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated);
   return firstbitLo;
 }
 
@@ -2431,17 +2496,22 @@ Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, Value *src1,
   return dotOP;
 }
 
-Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize,
-                     hlsl::OP *hlslOP, IRBuilder<> &Builder,
-                     bool Unsigned = false) {
-  auto madOpCode = Unsigned ? DXIL::OpCode::UMad : DXIL::OpCode::IMad;
+// Instead of using a DXIL intrinsic, implement a dot product operation using
+// multiply and add operations. Used for integer dots and long vectors.
+Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP,
+                 IRBuilder<> &Builder,
+                 DXIL::OpCode MadOpCode = DXIL::OpCode::IMad) {
   Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0);
   Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0);
-  Value *Result = Builder.CreateMul(Elt0, Elt1);
-  for (unsigned iVecElt = 1; iVecElt < vecSize; ++iVecElt) {
-    Elt0 = Builder.CreateExtractElement(arg0, iVecElt);
-    Elt1 = Builder.CreateExtractElement(arg1, iVecElt);
-    Result = TrivialDxilTrinaryOperation(madOpCode, Elt0, Elt1, Result, hlslOP,
+  Value *Result;
+  if (Elt0->getType()->isFloatingPointTy())
+    Result = Builder.CreateFMul(Elt0, Elt1);
+  else
+    Result = Builder.CreateMul(Elt0, Elt1);
+  for (unsigned Elt = 1; Elt < vecSize; ++Elt) {
+    Elt0 = Builder.CreateExtractElement(arg0, Elt);
+    Elt1 = Builder.CreateExtractElement(arg1, Elt);
+    Result = TrivialDxilTrinaryOperation(MadOpCode, Elt0, Elt1, Result, hlslOP,
                                          Builder);
   }
 
@@ -2480,12 +2550,16 @@ Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   unsigned vecSize = Ty->getVectorNumElements();
   Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
   IRBuilder<> Builder(CI);
-  if (Ty->getScalarType()->isFloatingPointTy()) {
+  Type *EltTy = Ty->getScalarType();
+  if (EltTy->isFloatingPointTy() && Ty->getVectorNumElements() <= 4)
     return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
-  } else {
-    return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder,
-                         IOP == IntrinsicOp::IOP_udot);
-  }
+
+  DXIL::OpCode MadOpCode = DXIL::OpCode::IMad;
+  if (IOP == IntrinsicOp::IOP_udot)
+    MadOpCode = DXIL::OpCode::UMad;
+  else if (EltTy->isFloatingPointTy())
+    MadOpCode = DXIL::OpCode::FMad;
+  return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode);
 }
 
 Value *TranslateNormalize(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -3032,8 +3106,10 @@ Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
       if (arg0Ty->getScalarType()->isFloatingPointTy()) {
         return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
       } else {
-        return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder,
-                             IOP == IntrinsicOp::IOP_umul);
+        DXIL::OpCode MadOpCode = DXIL::OpCode::IMad;
+        if (IOP == IntrinsicOp::IOP_umul)
+          MadOpCode = DXIL::OpCode::UMad;
+        return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode);
       }
     } else {
       // mul(vector, scalar) == vector * scalar-splat
@@ -6150,20 +6226,8 @@ Value *TranslateAnd(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                     bool &Translated) {
   Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
   Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
-  Type *Ty = CI->getType();
-  Type *EltTy = Ty->getScalarType();
   IRBuilder<> Builder(CI);
 
-  if (Ty != EltTy) {
-    Value *Result = UndefValue::get(Ty);
-    for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) {
-      Value *EltX = Builder.CreateExtractElement(x, i);
-      Value *EltY = Builder.CreateExtractElement(y, i);
-      Value *tmp = Builder.CreateAnd(EltX, EltY);
-      Result = Builder.CreateInsertElement(Result, tmp, i);
-    }
-    return Result;
-  }
   return Builder.CreateAnd(x, y);
 }
 Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -6171,20 +6235,8 @@ Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                    HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
   Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
   Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
-  Type *Ty = CI->getType();
-  Type *EltTy = Ty->getScalarType();
   IRBuilder<> Builder(CI);
 
-  if (Ty != EltTy) {
-    Value *Result = UndefValue::get(Ty);
-    for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) {
-      Value *EltX = Builder.CreateExtractElement(x, i);
-      Value *EltY = Builder.CreateExtractElement(y, i);
-      Value *tmp = Builder.CreateOr(EltX, EltY);
-      Result = Builder.CreateInsertElement(Result, tmp, i);
-    }
-    return Result;
-  }
   return Builder.CreateOr(x, y);
 }
 Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -6194,21 +6246,8 @@ Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   Value *cond = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
   Value *t = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
   Value *f = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
-  Type *Ty = CI->getType();
-  Type *EltTy = Ty->getScalarType();
   IRBuilder<> Builder(CI);
 
-  if (Ty != EltTy) {
-    Value *Result = UndefValue::get(Ty);
-    for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) {
-      Value *EltCond = Builder.CreateExtractElement(cond, i);
-      Value *EltTrue = Builder.CreateExtractElement(t, i);
-      Value *EltFalse = Builder.CreateExtractElement(f, i);
-      Value *tmp = Builder.CreateSelect(EltCond, EltTrue, EltFalse);
-      Result = Builder.CreateInsertElement(Result, tmp, i);
-    }
-    return Result;
-  }
   return Builder.CreateSelect(cond, t, f);
 }
 } // namespace
@@ -6467,18 +6506,20 @@ IntrinsicLower gLowerTable[] = {
     {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos},
     {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos},
-    {IntrinsicOp::IOP_countbits, TrivialUnaryOperation,
+    {IntrinsicOp::IOP_countbits, TrivialUnaryOperationRet,
      DXIL::OpCode::Countbits},
     {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
-    {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation,
+    {IntrinsicOp::IOP_ddx, TrivialUnaryOperationRet,
      DXIL::OpCode::DerivCoarseX},
-    {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation,
+    {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperationRet,
+     DXIL::OpCode::DerivCoarseX},
+    {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperationRet,
      DXIL::OpCode::DerivFineX},
-    {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
-    {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation,
+    {IntrinsicOp::IOP_ddy, TrivialUnaryOperationRet,
+     DXIL::OpCode::DerivCoarseY},
+    {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperationRet,
      DXIL::OpCode::DerivCoarseY},
-    {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation,
+    {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperationRet,
      DXIL::OpCode::DerivFineY},
     {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes},
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 6796badcb6..72dd6d41aa 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -6606,8 +6606,10 @@ bool HLSLExternalSource::MatchArguments(
   argTypes.clear();
   const bool isVariadic = IsVariadicIntrinsicFunction(pIntrinsic);
 
-  static const UINT UnusedSize = 0xFF;
-  static const BYTE MaxIntrinsicArgs = g_MaxIntrinsicParamCount + 1;
+  static const uint32_t UnusedSize = std::numeric_limits<uint32_t>::max();
+  static const uint32_t MaxIntrinsicArgs = g_MaxIntrinsicParamCount + 1;
+  assert(MaxIntrinsicArgs < std::numeric_limits<uint8_t>::max() &&
+         "This should be a pretty small number");
 #define CAB(cond, arg)                                                         \
   {                                                                            \
     if (!(cond)) {                                                             \
@@ -6622,7 +6624,7 @@ bool HLSLExternalSource::MatchArguments(
   ArBasicKind
       ComponentType[MaxIntrinsicArgs]; // Component type for each argument,
                                        // AR_BASIC_UNKNOWN if unspecified.
-  UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UNUSED_INDEX32
+  UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UnusedSize
                                        // if unspecified.
   badArgIdx = MaxIntrinsicArgs;
 
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl
new file mode 100644
index 0000000000..0b7f0d6b2f
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl
@@ -0,0 +1,394 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=2   %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=7   %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=125 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=256 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=1024 %s | FileCheck %s
+
+// Test vector-enabled non-trivial intrinsics that take parameters of various types.
+
+RWByteAddressBuffer buf;
+RWByteAddressBuffer ibuf;
+
+// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16>
+// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32>
+// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64>
+
+// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half>
+// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float>
+// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double>
+
+[numthreads(8,1,1)]
+void main() {
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle {{%.*}}, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512
+  // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 1024
+  // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  vector<float16_t, NUM> hVec1 = buf.Load<vector<float16_t, NUM> >(0);
+  vector<float16_t, NUM> hVec2 = buf.Load<vector<float16_t, NUM> >(512);
+  vector<float16_t, NUM> hVec3 = buf.Load<vector<float16_t, NUM> >(1024);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048
+  // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560
+  // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 3072
+  // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  vector<float, NUM> fVec1 = buf.Load<vector<float, NUM> >(2048);
+  vector<float, NUM> fVec2 = buf.Load<vector<float, NUM> >(2560);
+  vector<float, NUM> fVec3 = buf.Load<vector<float, NUM> >(3072);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096
+  // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608
+  // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 5120
+  // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  vector<double, NUM> dVec1 = buf.Load<vector<double, NUM> >(4096);
+  vector<double, NUM> dVec2 = buf.Load<vector<double, NUM> >(4608);
+  vector<double, NUM> dVec3 = buf.Load<vector<double, NUM> >(5120);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle {{%.*}}, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512
+  // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024
+  // CHECK: [[svec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<int16_t, NUM> sVec1 = ibuf.Load<vector<int16_t, NUM> >(0);
+  vector<int16_t, NUM> sVec2 = ibuf.Load<vector<int16_t, NUM> >(512);
+  vector<int16_t, NUM> sVec3 = ibuf.Load<vector<int16_t, NUM> >(1024);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1025
+  // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536
+  // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 2048
+  // CHECK: [[usvec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<uint16_t, NUM> usVec1 = ibuf.Load<vector<uint16_t, NUM> >(1025);
+  vector<uint16_t, NUM> usVec2 = ibuf.Load<vector<uint16_t, NUM> >(1536);
+  vector<uint16_t, NUM> usVec3 = ibuf.Load<vector<uint16_t, NUM> >(2048);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2049
+  // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560
+  // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072
+  // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<int, NUM> iVec1 = ibuf.Load<vector<int, NUM> >(2049);
+  vector<int, NUM> iVec2 = ibuf.Load<vector<int, NUM> >(2560);
+  vector<int, NUM> iVec3 = ibuf.Load<vector<int, NUM> >(3072);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3073
+  // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584
+  // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 4096
+  // CHECK: [[uivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<uint, NUM> uiVec1 = ibuf.Load<vector<uint, NUM> >(3073);
+  vector<uint, NUM> uiVec2 = ibuf.Load<vector<uint, NUM> >(3584);
+  vector<uint, NUM> uiVec3 = ibuf.Load<vector<uint, NUM> >(4096);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4097
+  // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608
+  // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120
+  // CHECK: [[lvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<int64_t, NUM> lVec1 = ibuf.Load<vector<int64_t, NUM> >(4097);
+  vector<int64_t, NUM> lVec2 = ibuf.Load<vector<int64_t, NUM> >(4608);
+  vector<int64_t, NUM> lVec3 = ibuf.Load<vector<int64_t, NUM> >(5120);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5121
+  // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632
+  // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 6144
+  // CHECK: [[ulvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<uint64_t, NUM> ulVec1 = ibuf.Load<vector<uint64_t, NUM> >(5121);
+  vector<uint64_t, NUM> ulVec2 = ibuf.Load<vector<uint64_t, NUM> >(5632);
+  vector<uint64_t, NUM> ulVec3 = ibuf.Load<vector<uint64_t, NUM> >(6144);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 35, <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]])  ; FMax(a,b)
+  // CHECK: call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 36, <[[NUM]] x half> [[tmp]], <[[NUM]] x half> [[hvec3]])  ; FMin(a,b)
+  vector<float16_t, NUM> hRes = clamp(hVec1, hVec2, hVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 35, <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]])  ; FMax(a,b)
+  // CHECK: call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 36, <[[NUM]] x float> [[tmp]], <[[NUM]] x float> [[fvec3]])  ; FMin(a,b)
+  vector<float, NUM> fRes = clamp(fVec1, fVec2, fVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 35, <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]])  ; FMax(a,b)
+  // CHECK: call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 36, <[[NUM]] x double> [[tmp]], <[[NUM]] x double> [[dvec3]])  ; FMin(a,b)
+  vector<double, NUM> dRes = clamp(dVec1, dVec2, dVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 37, <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]])  ; IMax(a,b)
+  // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 38, <[[NUM]] x i16> [[tmp]], <[[NUM]] x i16> [[svec3]])  ; IMin(a,b)
+  vector<int16_t, NUM> sRes = clamp(sVec1, sVec2, sVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 39, <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]])  ; UMax(a,b)
+  // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 40, <[[NUM]] x i16> [[tmp]], <[[NUM]] x i16> [[usvec3]])  ; UMin(a,b)
+  vector<uint16_t, NUM> usRes = clamp(usVec1, usVec2, usVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 37, <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]])  ; IMax(a,b)
+  // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 38, <[[NUM]] x i32> [[tmp]], <[[NUM]] x i32> [[ivec3]])  ; IMin(a,b)
+  vector<int, NUM> iRes = clamp(iVec1, iVec2, iVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 39, <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]])  ; UMax(a,b)
+  // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 40, <[[NUM]] x i32> [[tmp]], <[[NUM]] x i32> [[uivec3]])  ; UMin(a,b)
+  vector<uint, NUM> uiRes = clamp(uiVec1, uiVec2, uiVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 37, <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]])  ; IMax(a,b)
+  // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 38, <[[NUM]] x i64> [[tmp]], <[[NUM]] x i64> [[lvec3]])  ; IMin(a,b)
+  vector<int64_t, NUM> lRes = clamp(lVec1, lVec2, lVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 39, <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]])  ; UMax(a,b)
+  // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 40, <[[NUM]] x i64> [[tmp]], <[[NUM]] x i64> [[ulvec3]])  ; UMin(a,b)
+  vector<uint64_t, NUM> ulRes = clamp(ulVec1, ulVec2, ulVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec2]], [[hvec1]]
+  // CHECK: select <[[NUM]] x i1> [[tmp]], <[[NUM]] x half> zeroinitializer, <[[NUM]] x half> <half 0xH3C00
+  hRes += step(hVec1, hVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = fcmp fast olt <[[NUM]] x float> [[fvec2]], [[fvec1]]
+  // CHECK: select <[[NUM]] x i1> [[tmp]], <[[NUM]] x float> zeroinitializer, <[[NUM]] x float> <float 1
+  fRes += step(fVec1, fVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = fmul fast <[[NUM]] x half> [[hvec1]], <half 0x
+  // CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 21, <[[NUM]] x half> [[tmp]])  ; Exp(value)
+  hRes += exp(hVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = fmul fast <[[NUM]] x float> [[fvec1]], <float 0x
+  // CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[tmp]])  ; Exp(value)
+  fRes += exp(fVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 23, <[[NUM]] x half> [[hvec1]])  ; Log(value)
+  // CHECK: fmul fast <[[NUM]] x half> [[tmp]], <half 0xH398C
+  hRes += log(hVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 23, <[[NUM]] x float> [[fvec1]])  ; Log(value)
+  // CHECK: fmul fast <[[NUM]] x float> [[tmp]], <float 0x3FE62E4300000000
+  fRes += log(fVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x half> [[hvec2]], [[hvec1]]
+  // CHECK: [[xsub:%.*]] = fsub fast <[[NUM]] x half> [[hvec3]], [[hvec1]]
+  // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x half> [[xsub]], [[sub]]
+  // CHECK: [[sat:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 7, <[[NUM]] x half> [[div]])  ; Saturate(value)
+  // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x half> [[sat]], <half 0xH4000,
+  // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x half> <half 0xH4200, {{.*}}>, [[mul]]
+  // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x half> [[sat]], [[sat]]
+  // CHECK: fmul fast <[[NUM]] x half> [[mul]], [[sub]]
+  hRes += smoothstep(hVec1, hVec2, hVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x float> [[fvec2]], [[fvec1]]
+  // CHECK: [[xsub:%.*]] = fsub fast <[[NUM]] x float> [[fvec3]], [[fvec1]]
+  // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x float> [[xsub]], [[sub]]
+  // CHECK: [[sat:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 7, <[[NUM]] x float> [[div]])  ; Saturate(value)
+  // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x float> [[sat]], <float 2.000000e+00,
+  // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x float> <float 3.000000e+00, {{.*}}>, [[mul]]
+  // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x float> [[sat]], [[sat]]
+  // CHECK: fmul fast <[[NUM]] x float> [[mul]], [[sub]]
+  fRes += smoothstep(fVec1, fVec2, fVec3);
+
+  // Intrinsics that expand into llvm ops.
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: fmul fast <[[NUM]] x half> [[hvec2]], <half 0xH5329
+  hRes += degrees(hVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: fmul fast <[[NUM]] x float> [[fvec2]], <float 0x404CA5DC20000000
+  fRes += degrees(fVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: fmul fast <[[NUM]] x half> [[hvec3]], <half 0xH2478
+  hRes += radians(hVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: fmul fast <[[NUM]] x float> [[fvec3]], <float 0x3F91DF46A0000000
+  fRes += radians(fVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[cmp:%.*]] = fcmp fast une <[[NUM]] x float> [[fvec1]], zeroinitializer
+  // CHECK: [[f2i:%.*]] = bitcast <[[NUM]] x float> [[fvec1]] to <[[NUM]] x i32>
+  // CHECK: [[and:%.*]] = and <[[NUM]] x i32> [[f2i]], <i32 2139095040
+  // CHECK: [[add:%.*]] = add nsw <[[NUM]] x i32> [[and]], <i32 -1056964608
+  // CHECK: [[shr:%.*]] = ashr <[[NUM]] x i32> [[add]], <i32 23
+  // CHECK: [[i2f:%.*]] = sitofp <[[NUM]] x i32> [[shr]] to <[[NUM]] x float>
+  // CHECK: [[sel:%.*]] = select <[[NUM]] x i1> [[cmp]], <[[NUM]] x float> [[i2f]], <[[NUM]] x float> zeroinitializer
+  // CHECK: [[and:%.*]] = and <[[NUM]] x i32> [[f2i]], <i32 8388607
+  // CHECK: or <[[NUM]] x i32> [[and]], <i32 1056964608
+  vector<float, NUM> exp = fVec3;
+  fRes += frexp(fVec1, exp);
+  fRes += exp;
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = fsub fast <[[NUM]] x half> [[hvec3]], [[hvec2]]
+  // CHECK: fmul fast <[[NUM]] x half> [[tmp]], [[hvec1]]
+  hRes += lerp(hVec2, hVec3, hVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = fsub fast <[[NUM]] x float> [[fvec3]], [[fvec2]]
+  // CHECK: fmul fast <[[NUM]] x float> [[tmp]], [[fvec1]]
+  fRes += lerp(fVec2, fVec3, fVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: fdiv fast <[[NUM]] x half> <half 0xH3C00, {{.*}}>, [[hvec1]]
+  hRes += rcp(hVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: fdiv fast <[[NUM]] x float> <float 1.000000e+00, {{.*}}>, [[fvec1]]
+  fRes += rcp(fVec1);
+
+  vector<uint, NUM> signs = 1;
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x half> [[hvec1]], zeroinitializer
+  // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec1]], zeroinitializer
+  // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32>
+  // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32>
+  // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]]
+  signs *= sign(hVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x float> [[fvec1]], zeroinitializer
+  // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x float> [[fvec1]], zeroinitializer
+  // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32>
+  // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32>
+  // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]]
+  signs *= sign(fVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x double> [[dvec1]], zeroinitializer
+  // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x double> [[dvec1]], zeroinitializer
+  // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32>
+  // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32>
+  // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]]
+  signs *= sign(dVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i16> [[svec2]], zeroinitializer
+  // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i16> [[svec2]], zeroinitializer
+  // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32>
+  // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32>
+  // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]]
+  signs *= sign(sVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i16> [[usvec2]], zeroinitializer
+  // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32>
+  signs *= sign(usVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i32> [[ivec2]], zeroinitializer
+  // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i32> [[ivec2]], zeroinitializer
+  // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32>
+  // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32>
+  // CHECK: [[sub:%.*]] = sub nsw <[[NUM]] x i32> [[igt]], [[ilt]]
+  signs *= sign(iVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i32> [[uivec2]], zeroinitializer
+  // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32>
+  signs *= sign(uiVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i64> [[lvec2]], zeroinitializer
+  // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i64> [[lvec2]], zeroinitializer
+  // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32>
+  // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32>
+  // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]]
+  signs *= sign(lVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i64> [[ulvec2]], zeroinitializer
+  // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32>
+  signs *= sign(ulVec2);
+
+  iRes += signs;
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i16> [[svec2]], zeroinitializer
+  // CHECK: [[bvec1:%.*]] = icmp ne <[[NUM]] x i16> [[svec1]], zeroinitializer
+  // CHECK: or <[[NUM]] x i1> [[bvec2]], [[bvec1]]
+  sRes += or(sVec1, sVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i16> [[svec3]], zeroinitializer
+  // CHECK: and <[[NUM]] x i1> [[bvec3]], [[bvec2]]
+  sRes += and(sVec2, sVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: select <[[NUM]] x i1> [[bvec1]], <[[NUM]] x i16> [[svec2]], <[[NUM]] x i16> [[svec3]]
+  sRes += select(sVec1, sVec2, sVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  buf.Store<vector<float16_t, NUM> >(0, hRes);
+  buf.Store<vector<float, NUM> >(2048, fRes);
+  buf.Store<vector<double, NUM> >(4096, dRes);
+
+  ibuf.Store<vector<int16_t, NUM> >(0, sRes);
+  ibuf.Store<vector<uint16_t, NUM> >(1024, usRes);
+  ibuf.Store<vector<int, NUM> >(2048, iRes);
+  ibuf.Store<vector<uint, NUM> >(3072, uiRes);
+  ibuf.Store<vector<int64_t, NUM> >(4096, lRes);
+  ibuf.Store<vector<uint64_t, NUM> >(5120, ulRes);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl
new file mode 100644
index 0000000000..2ae3c92e85
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl
@@ -0,0 +1,115 @@
+// RUN: %dxc -T lib_6_9 %s | FileCheck %s
+
+// Long vector tests for vec ops that scalarize to something more complex
+//  than a simple repetition of the same dx.op calls.
+
+// CHECK-LABEL: test_atan2
+// CHECK: fdiv fast <8 x float>
+// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 17, <8 x float> %{{.*}}) ; Atan(value)
+// CHECK: fadd fast <8 x float> %{{.*}}, <float 0x
+// CHECK: fadd fast <8 x float> %{{.*}}, <float 0x
+// CHECK: fcmp fast olt <8 x float>
+// CHECK: fcmp fast oeq <8 x float>
+// CHECK: fcmp fast oge <8 x float>
+// CHECK: fcmp fast olt <8 x float>
+// CHECK: and <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float>
+// CHECK: and <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float>
+// CHECK: and <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> <float 0x
+// CHECK: and <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> <float 0x
+export void test_atan2(inout vector<float, 8> vec1, vector<float, 8> vec2) {
+  vec1 = atan2(vec1, vec2);
+}
+
+// CHECK-LABEL: test_fmod
+// CHECK: fdiv fast <8 x float>
+// CHECK: fsub fast <8 x float> <float
+// CHECK: fcmp fast oge <8 x float>
+// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 6, <8 x float> %{{.*}}) ; FAbs(value)
+// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 22, <8 x float> %{{.*}}) ; Frc(value)
+
+// CHECK: fsub fast <8 x float> <float
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float>
+// CHECK: fmul fast <8 x float>
+export void test_fmod(inout vector<float, 8> vec1, vector<float, 8> vec2) {
+  vec1 = fmod(vec1, vec2);
+}
+
+// CHECK-LABEL: test_ldexp
+// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 21, <8 x float> %{{.*}}) ; Exp(value)
+// CHECK: fmul fast <8 x float>
+
+export void test_ldexp(inout vector<float, 8> vec1, vector<float, 8> vec2) {
+  vec1 = ldexp(vec1, vec2);
+}
+
+
+// CHECK-LABEL: test_pow
+// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 23, <8 x float> %{{.*}}) ; Log(value)
+// CHECK: fmul fast <8 x float>
+// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 21, <8 x float> %{{.*}}) ; Exp(value)
+export void test_pow(inout vector<float, 8> vec1, vector<float, 8> vec2) {
+  vec1 = pow(vec1, vec2);
+}
+
+// CHECK-LABEL: test_modf
+// CHECK: call <8 x float>  @dx.op.unary.v8f32(i32 29, <8 x float>  %{{.*}}) ; Round_z(value)
+// CHECK: fsub fast <8 x float>
+export void test_modf(inout vector<float, 8> vec1, vector<float, 8> vec2) {
+  vec1 = modf(vec1, vec2);
+}
+
+// CHECK-LABEL: test_dot
+// CHECK: [[el:%.*]] = extractelement <8 x float>
+// CHECK: [[mul:%.*]] = fmul fast float [[el]]
+// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c)
+// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c)
+// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c)
+// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c)
+// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c)
+// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c)
+// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c)
+export void test_dot(inout vector<float, 8> vec1, vector<float, 8> vec2) {
+  vec1 = dot(vec1, vec2);
+}
+
+// CHECK-LABEL: test_any
+// CHECK: or i1
+// CHECK: or i1
+// CHECK: or i1
+// CHECK: or i1
+// CHECK: or i1
+// CHECK: or i1
+// CHECK: or i1
+export void test_any(vector<float, 8> vec1, inout vector<bool, 8> bvec) {
+  bvec &= any(vec1);
+}
+
+// CHECK-LABEL: test_all
+// CHECK: and i1
+// CHECK: and i1
+// CHECK: and i1
+// CHECK: and i1
+// CHECK: and i1
+// CHECK: and i1
+// CHECK: and i1
+export void test_all(vector<float, 8> vec1, inout vector<bool, 8> bvec) {
+  bvec &= all(vec1);
+}
+
+// CHECK-LABEL: test_WaveMatch
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+export uint4 test_WaveMatch(vector<bool, 8> bvec) {
+  return WaveMatch(bvec);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl
new file mode 100644
index 0000000000..02cad5b894
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl
@@ -0,0 +1,69 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max   -DOP=35 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max   -DOP=35 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min   -DOP=36 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min   -DOP=36 -DNUM=1022 %s | FileCheck %s
+
+// Test vector-enabled binary intrinsics that take float-like parameters and
+// and are "trivial" in that they can be implemented with a single call
+// instruction with the same parameter and return types.
+
+RWByteAddressBuffer buf;
+
+// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half>
+// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float>
+// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double>
+
+[numthreads(8,1,1)]
+void main() {
+
+  // Capture opcode number.
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]]
+  buf.Store(999, OP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512
+  // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  vector<float16_t, NUM> hVec1 = buf.Load<vector<float16_t, NUM> >(0);
+  vector<float16_t, NUM> hVec2 = buf.Load<vector<float16_t, NUM> >(512);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048
+  // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560
+  // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  vector<float, NUM> fVec1 = buf.Load<vector<float, NUM> >(2048);
+  vector<float, NUM> fVec2 = buf.Load<vector<float, NUM> >(2560);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096
+  // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608
+  // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  vector<double, NUM> dVec1 = buf.Load<vector<double, NUM> >(4096);
+  vector<double, NUM> dVec2 = buf.Load<vector<double, NUM> >(4608);
+
+  // Test simple matching type overloads.
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]])
+  vector<float16_t, NUM> hRes = FUNC(hVec1, hVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]])
+  vector<float, NUM> fRes = FUNC(fVec1, fVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 [[OP]], <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]])
+  vector<double, NUM> dRes = FUNC(dVec1, dVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  buf.Store<vector<float16_t, NUM> >(0, hRes);
+  buf.Store<vector<float, NUM> >(2048, fRes);
+  buf.Store<vector<double, NUM> >(4096, dRes);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl
new file mode 100644
index 0000000000..994246b753
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl
@@ -0,0 +1,116 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max   -DOP=37 -DUOP=39 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max   -DOP=37 -DUOP=39 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min   -DOP=38 -DUOP=40 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min   -DOP=38 -DUOP=40 -DNUM=1022 %s | FileCheck %s
+
+#ifndef UOP
+#define UOP OP
+#endif
+
+// Test vector-enabled binary intrinsics that take signed and unsigned integer parameters of
+// different widths and are "trivial" in that they can be implemented with a single call
+// instruction with the same parameter and return types.
+
+RWByteAddressBuffer buf;
+
+// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16>
+// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32>
+// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64>
+
+[numthreads(8,1,1)]
+void main() {
+
+  // Capture opcode numbers.
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 888, i32 undef, i32 [[OP:[0-9]*]]
+  buf.Store(888, OP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[UOP:[0-9]*]]
+  buf.Store(999, UOP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512
+  // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<int16_t, NUM> sVec1 = buf.Load<vector<int16_t, NUM> >(0);
+  vector<int16_t, NUM> sVec2 = buf.Load<vector<int16_t, NUM> >(512);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024
+  // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536
+  // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<uint16_t, NUM> usVec1 = buf.Load<vector<uint16_t, NUM> >(1024);
+  vector<uint16_t, NUM> usVec2 = buf.Load<vector<uint16_t, NUM> >(1536);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2048
+  // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560
+  // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<int, NUM> iVec1 = buf.Load<vector<int, NUM> >(2048);
+  vector<int, NUM> iVec2 = buf.Load<vector<int, NUM> >(2560);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072
+  // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584
+  // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<uint, NUM> uiVec1 = buf.Load<vector<uint, NUM> >(3072);
+  vector<uint, NUM> uiVec2 = buf.Load<vector<uint, NUM> >(3584);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4096
+  // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608
+  // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<int64_t, NUM> lVec1 = buf.Load<vector<int64_t, NUM> >(4096);
+  vector<int64_t, NUM> lVec2 = buf.Load<vector<int64_t, NUM> >(4608);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120
+  // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632
+  // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<uint64_t, NUM> ulVec1 = buf.Load<vector<uint64_t, NUM> >(5120);
+  vector<uint64_t, NUM> ulVec2 = buf.Load<vector<uint64_t, NUM> >(5632);
+
+  // Test simple matching type overloads.
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]])
+  vector<int16_t, NUM> sRes = FUNC(sVec1, sVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 [[UOP]], <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]])
+  vector<uint16_t, NUM> usRes = FUNC(usVec1, usVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]])
+  vector<int, NUM> iRes = FUNC(iVec1, iVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 [[UOP]], <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]])
+  vector<uint, NUM> uiRes = FUNC(uiVec1, uiVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]])
+  vector<int64_t, NUM> lRes = FUNC(lVec1, lVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 [[UOP]], <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]])
+  vector<uint64_t, NUM> ulRes = FUNC(ulVec1, ulVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  buf.Store<vector<int16_t, NUM> >(0, sRes);
+  buf.Store<vector<uint16_t, NUM> >(1024, usRes);
+  buf.Store<vector<int, NUM> >(2048, iRes);
+  buf.Store<vector<uint, NUM> >(3072, uiRes);
+  buf.Store<vector<int64_t, NUM> >(4096, lRes);
+  buf.Store<vector<uint64_t, NUM> >(5120, ulRes);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl
new file mode 100644
index 0000000000..6ebb511b00
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl
@@ -0,0 +1,77 @@
+// The binary part of some of these is all just a vector math ops with as many unary dxops as elements.
+// These will have apparent mismatches between the ARITY define and the check prefix.
+
+// RUN: %dxc -DFUNC=f16tof32    -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,LEGACY
+// RUN: %dxc -DFUNC=f32tof16    -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,LEGACY
+// RUN: %dxc -DFUNC=isfinite    -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT
+// RUN: %dxc -DFUNC=isinf       -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT
+// RUN: %dxc -DFUNC=isnan       -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT
+// RUN: %dxc -DFUNC=countbits   -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
+// RUN: %dxc -DFUNC=firstbithigh -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
+// RUN: %dxc -DFUNC=firstbitlow  -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
+// RUN: %dxc -DFUNC=QuadReadLaneAt         -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD
+// RUN: %dxc -DFUNC=QuadReadAcrossX        -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD
+// RUN: %dxc -DFUNC=QuadReadAcrossY        -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD
+// RUN: %dxc -DFUNC=QuadReadAcrossDiagonal -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD
+// RUN: %dxc -DFUNC=WaveActiveBitAnd       -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveActiveBitOr        -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveActiveBitXor       -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveActiveProduct      -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveActiveSum          -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveActiveMin          -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveActiveMax          -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveMultiPrefixBitAnd  -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveMultiPrefixBitOr   -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveMultiPrefixBitXor  -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveMultiPrefixProduct -DARITY=5 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveMultiPrefixSum     -DARITY=5 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WavePrefixSum          -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WavePrefixProduct      -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveReadLaneAt         -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveReadLaneFirst      -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveActiveAllEqual     -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+
+#ifndef TYPE
+#define TYPE float
+#endif
+
+#if ARITY == 1
+#define CALLARGS(x,y,z) x
+#elif ARITY == 2
+#define CALLARGS(x,y,z) x, y
+#elif ARITY == 3
+#define CALLARGS(x,y,z) x, y, z
+// ARITY 4 is used for 1 vec + scalar
+#elif ARITY == 4
+#define CALLARGS(x,y,z) x, i
+// ARITY 5 is used for 1 vec + uint4 mask for wavemultiprefix*
+#elif ARITY == 5
+#define CALLARGS(x,y,z) x, m
+#endif
+
+StructuredBuffer< vector<TYPE, 8> > buf;
+ByteAddressBuffer rbuf;
+
+float4 main(uint i : SV_PrimitiveID, uint4 m : M) : SV_Target {
+  vector<TYPE, 8> arg1 = rbuf.Load< vector<TYPE, 8> >(i++*32);
+  vector<TYPE, 8> arg2 = rbuf.Load< vector<TYPE, 8> >(i++*32);
+  vector<TYPE, 8> arg3 = rbuf.Load< vector<TYPE, 8> >(i++*32);
+
+  // UNARY: call {{.*}} [[DXOP:@dx.op.unary]]
+  // BINARY: call {{.*}} [[DXOP:@dx.op.binary]]
+  // TERTIARY: call {{.*}} [[DXOP:@dx.op.tertiary]]
+  // LEGACY: call {{.*}} [[DXOP:@dx.op.legacy]]
+  // SPECFLT: call {{.*}} [[DXOP:@dx.op.isSpecialFloat]]
+  // QUAD: call {{.*}} [[DXOP:@dx.op.quad]]
+  // WAVE: call {{.*}} [[DXOP:@dx.op.wave]]
+  // CHECK: call {{.*}} [[DXOP]]
+  // CHECK: call {{.*}} [[DXOP]]
+  // CHECK: call {{.*}} [[DXOP]]
+  // CHECK: call {{.*}} [[DXOP]]
+  // CHECK: call {{.*}} [[DXOP]]
+  // CHECK: call {{.*}} [[DXOP]]
+  // CHECK: call {{.*}} [[DXOP]]
+
+  vector<TYPE, 8> ret = FUNC(CALLARGS(arg1, arg2, arg3));
+  return float4(ret[0] + ret[1], ret[2] + ret[3], ret[4] + ret[5], ret[6] + ret[7]);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl
new file mode 100644
index 0000000000..e32ebc1db2
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl
@@ -0,0 +1,86 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad   -DOP=46 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad   -DOP=46 -DNUM=1022 %s | FileCheck %s
+
+// Test vector-enabled ternary intrinsics that take float-like parameters and
+// and are "trivial" in that they can be implemented with a single call
+// instruction with the same parameter and return types.
+
+// Given that all we have at the moment are fmad and fma and the latter only takes doubles,
+// fma is tacked on as an additional check.
+
+RWByteAddressBuffer buf;
+
+// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half>
+// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float>
+// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double>
+
+[numthreads(8,1,1)]
+void main() {
+
+  // Capture opcode number.
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]]
+  buf.Store(999, OP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512
+  // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 1024
+  // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  vector<float16_t, NUM> hVec1 = buf.Load<vector<float16_t, NUM> >(0);
+  vector<float16_t, NUM> hVec2 = buf.Load<vector<float16_t, NUM> >(512);
+  vector<float16_t, NUM> hVec3 = buf.Load<vector<float16_t, NUM> >(1024);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048
+  // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560
+  // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 3072
+  // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  vector<float, NUM> fVec1 = buf.Load<vector<float, NUM> >(2048);
+  vector<float, NUM> fVec2 = buf.Load<vector<float, NUM> >(2560);
+  vector<float, NUM> fVec3 = buf.Load<vector<float, NUM> >(3072);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096
+  // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608
+  // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 5120
+  // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  vector<double, NUM> dVec1 = buf.Load<vector<double, NUM> >(4096);
+  vector<double, NUM> dVec2 = buf.Load<vector<double, NUM> >(4608);
+  vector<double, NUM> dVec3 = buf.Load<vector<double, NUM> >(5120);
+
+  // Test simple matching type overloads.
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x half> @dx.op.tertiary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]], <[[NUM]] x half> [[hvec3]])
+  vector<float16_t, NUM> hRes = FUNC(hVec1, hVec2, hVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x float> @dx.op.tertiary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]], <[[NUM]] x float> [[fvec3]])
+  vector<float, NUM> fRes = FUNC(fVec1, fVec2, fVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x double> @dx.op.tertiary.[[DTY]](i32 [[OP]], <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]], <[[NUM]] x double> [[dvec3]])
+  vector<double, NUM> dRes = FUNC(dVec1, dVec2, dVec3);
+
+  // Tacked on fma() check since it only takes doubles.
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x double> @dx.op.tertiary.[[DTY]](i32 47, <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]], <[[NUM]] x double> [[dvec3]])
+  vector<double, NUM> dRes2 = fma(dVec1, dVec2, dVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  buf.Store<vector<float16_t, NUM> >(0, hRes);
+  buf.Store<vector<float, NUM> >(2048, fRes);
+  buf.Store<vector<double, NUM> >(4096, dRes);
+  buf.Store<vector<double, NUM> >(5120, dRes2);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl
new file mode 100644
index 0000000000..50f98715e4
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl
@@ -0,0 +1,131 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad   -DOP=48 -DUOP=49 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad   -DOP=48 -DUOP=49 -DNUM=1022 %s | FileCheck %s
+
+#ifndef UOP
+#define UOP OP
+#endif
+
+// Test vector-enabled tertiary intrinsics that take signed and unsigned integer parameters of
+// different widths and are "trivial" in that they can be implemented with a single call
+// instruction with the same parameter and return types.
+
+RWByteAddressBuffer buf;
+
+// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16>
+// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32>
+// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64>
+
+[numthreads(8,1,1)]
+void main() {
+
+  // Capture opcode numbers.
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 888, i32 undef, i32 [[OP:[0-9]*]]
+  buf.Store(888, OP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[UOP:[0-9]*]]
+  buf.Store(999, UOP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512
+  // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024
+  // CHECK: [[svec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<int16_t, NUM> sVec1 = buf.Load<vector<int16_t, NUM> >(0);
+  vector<int16_t, NUM> sVec2 = buf.Load<vector<int16_t, NUM> >(512);
+  vector<int16_t, NUM> sVec3 = buf.Load<vector<int16_t, NUM> >(1024);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1025
+  // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536
+  // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 2048
+  // CHECK: [[usvec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<uint16_t, NUM> usVec1 = buf.Load<vector<uint16_t, NUM> >(1025);
+  vector<uint16_t, NUM> usVec2 = buf.Load<vector<uint16_t, NUM> >(1536);
+  vector<uint16_t, NUM> usVec3 = buf.Load<vector<uint16_t, NUM> >(2048);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2049
+  // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560
+  // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072
+  // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<int, NUM> iVec1 = buf.Load<vector<int, NUM> >(2049);
+  vector<int, NUM> iVec2 = buf.Load<vector<int, NUM> >(2560);
+  vector<int, NUM> iVec3 = buf.Load<vector<int, NUM> >(3072);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3073
+  // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584
+  // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 4096
+  // CHECK: [[uivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<uint, NUM> uiVec1 = buf.Load<vector<uint, NUM> >(3073);
+  vector<uint, NUM> uiVec2 = buf.Load<vector<uint, NUM> >(3584);
+  vector<uint, NUM> uiVec3 = buf.Load<vector<uint, NUM> >(4096);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4097
+  // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608
+  // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120
+  // CHECK: [[lvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<int64_t, NUM> lVec1 = buf.Load<vector<int64_t, NUM> >(4097);
+  vector<int64_t, NUM> lVec2 = buf.Load<vector<int64_t, NUM> >(4608);
+  vector<int64_t, NUM> lVec3 = buf.Load<vector<int64_t, NUM> >(5120);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5121
+  // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632
+  // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 6144
+  // CHECK: [[ulvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<uint64_t, NUM> ulVec1 = buf.Load<vector<uint64_t, NUM> >(5121);
+  vector<uint64_t, NUM> ulVec2 = buf.Load<vector<uint64_t, NUM> >(5632);
+  vector<uint64_t, NUM> ulVec3 = buf.Load<vector<uint64_t, NUM> >(6144);
+
+  // Test simple matching type overloads.
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i16> @dx.op.tertiary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]], <[[NUM]] x i16> [[svec3]])
+  vector<int16_t, NUM> sRes = FUNC(sVec1, sVec2, sVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i16> @dx.op.tertiary.[[STY]](i32 [[UOP]], <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]], <[[NUM]] x i16> [[usvec3]])
+  vector<uint16_t, NUM> usRes = FUNC(usVec1, usVec2, usVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i32> @dx.op.tertiary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]], <[[NUM]] x i32> [[ivec3]])
+  vector<int, NUM> iRes = FUNC(iVec1, iVec2, iVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i32> @dx.op.tertiary.[[ITY]](i32 [[UOP]], <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]], <[[NUM]] x i32> [[uivec3]])
+  vector<uint, NUM> uiRes = FUNC(uiVec1, uiVec2, uiVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i64> @dx.op.tertiary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]], <[[NUM]] x i64> [[lvec3]])
+  vector<int64_t, NUM> lRes = FUNC(lVec1, lVec2, lVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i64> @dx.op.tertiary.[[LTY]](i32 [[UOP]], <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]], <[[NUM]] x i64> [[ulvec3]])
+  vector<uint64_t, NUM> ulRes = FUNC(ulVec1, ulVec2, ulVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  buf.Store<vector<int16_t, NUM> >(0, sRes);
+  buf.Store<vector<uint16_t, NUM> >(1024, usRes);
+  buf.Store<vector<int, NUM> >(2048, iRes);
+  buf.Store<vector<uint, NUM> >(3072, uiRes);
+  buf.Store<vector<int64_t, NUM> >(4096, lRes);
+  buf.Store<vector<uint64_t, NUM> >(5120, ulRes);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl
new file mode 100644
index 0000000000..91ab631a7e
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl
@@ -0,0 +1,83 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate  -DOP=7 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate  -DOP=7 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos  -DOP=12 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos  -DOP=12 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sin  -DOP=13 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sin  -DOP=13 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tan  -DOP=14 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tan  -DOP=14 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=acos -DOP=15 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=acos -DOP=15 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=asin -DOP=16 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=asin -DOP=16 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=atan -DOP=17 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=atan -DOP=17 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cosh -DOP=18 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cosh -DOP=18 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sinh -DOP=19 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sinh -DOP=19 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tanh -DOP=20 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tanh -DOP=20 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=exp2 -DOP=21 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=exp2 -DOP=21 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=frac -DOP=22 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=frac -DOP=22 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log2 -DOP=23 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log2 -DOP=23 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log10 -DOP=23 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log10 -DOP=23 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sqrt -DOP=24 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sqrt -DOP=24 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=rsqrt -DOP=25 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=rsqrt -DOP=25 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=round -DOP=26 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=round -DOP=26 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=floor -DOP=27 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=floor -DOP=27 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ceil -DOP=28 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ceil -DOP=28 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=1022 %s | FileCheck %s
+
+// Test vector-enabled unary intrinsics that take float-like parameters and
+// and are "trivial" in that they can be implemented with a single call
+// instruction with the same parameter and return types.
+
+RWByteAddressBuffer buf;
+
+// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half>
+// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float>
+
+[numthreads(8,1,1)]
+void main() {
+
+  // Capture opcode number.
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]]
+  buf.Store(999, OP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[hvec:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  vector<float16_t, NUM> hVec = buf.Load<vector<float16_t, NUM> >(0);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 1024
+  // CHECK: [[fvec:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  vector<float, NUM> fVec = buf.Load<vector<float, NUM> >(1024);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec]])
+  vector<float16_t, NUM> hRes = FUNC(hVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec]])
+  vector<float, NUM> fRes = FUNC(fVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  buf.Store<vector<float16_t, NUM> >(0, hRes);
+  buf.Store<vector<float, NUM> >(1024, fRes);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl
new file mode 100644
index 0000000000..ef0b250745
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl
@@ -0,0 +1,86 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=reversebits   -DOP=30 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=reversebits   -DOP=30 -DNUM=1022 %s | FileCheck %s
+
+// Test vector-enabled unary intrinsics that take signed and unsigned integer parameters of
+// different widths and are "trivial" in that they can be implemented with a single call
+// instruction with the same parameter and return types.
+
+RWByteAddressBuffer buf;
+
+// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16>
+// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32>
+// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64>
+
+[numthreads(8,1,1)]
+void main() {
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // Capture opcode number.
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]]
+  buf.Store(999, OP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[svec:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<int16_t, NUM> sVec = buf.Load<vector<int16_t, NUM> >(0);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024
+  // CHECK: [[usvec:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<uint16_t, NUM> usVec = buf.Load<vector<uint16_t, NUM> >(1024);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2048
+  // CHECK: [[ivec:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<int, NUM> iVec = buf.Load<vector<int, NUM> >(2048);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072
+  // CHECK: [[uivec:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<uint, NUM> uiVec = buf.Load<vector<uint, NUM> >(3072);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4096
+  // CHECK: [[lvec:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<int64_t, NUM> lVec = buf.Load<vector<int64_t, NUM> >(4096);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120
+  // CHECK: [[ulvec:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<uint64_t, NUM> ulVec = buf.Load<vector<uint64_t, NUM> >(5120);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i16> @dx.op.unary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec]])
+  vector<int16_t, NUM> sRes = FUNC(sVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i16> @dx.op.unary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[usvec]])
+  vector<uint16_t, NUM> usRes = FUNC(usVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i32> @dx.op.unary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec]])
+  vector<int, NUM> iRes = FUNC(iVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i32> @dx.op.unary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[uivec]])
+  vector<uint, NUM> uiRes = FUNC(uiVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i64> @dx.op.unary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec]])
+  vector<int64_t, NUM> lRes = FUNC(lVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i64> @dx.op.unary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[ulvec]])
+  vector<uint64_t, NUM> ulRes = FUNC(ulVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  buf.Store<vector<int16_t, NUM> >(0, sRes);
+  buf.Store<vector<uint16_t, NUM> >(1024, usRes);
+  buf.Store<vector<int, NUM> >(2048, iRes);
+  buf.Store<vector<uint, NUM> >(3072, uiRes);
+  buf.Store<vector<int64_t, NUM> >(4096, lRes);
+  buf.Store<vector<uint64_t, NUM> >(5120, ulRes);
+}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl
new file mode 100644
index 0000000000..11d705305d
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl
@@ -0,0 +1,186 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=13   %s | FileCheck %s
+
+// Source for dxilgen test CodeGenDXIL/passes/longvec-intrinsics.ll.
+// Some targetted filecheck testing as an incidental.
+
+RWStructuredBuffer<vector<float16_t, NUM> > hBuf;
+RWStructuredBuffer<vector<float, NUM> > fBuf;
+RWStructuredBuffer<vector<double, NUM> > dBuf;
+
+RWStructuredBuffer<vector<bool, NUM> > bBuf;
+RWStructuredBuffer<vector<uint, NUM> > uBuf;
+RWStructuredBuffer<vector<int64_t, NUM> > lBuf;
+
+[numthreads(8,1,1)]
+void main() {
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 11, i32 0, i32 4) 
+  // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 12, i32 0, i32 4) 
+  // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 13, i32 0, i32 4) 
+  // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0
+  vector<float, NUM> fVec1 = fBuf[11];
+  vector<float, NUM> fVec2 = fBuf[12];
+  vector<float, NUM> fVec3 = fBuf[13];
+  
+  // CHECK: [[tmp:%.*]] = call <13 x float> @dx.op.binary.v13f32(i32 35, <13 x float> [[fvec1]], <13 x float> [[fvec2]])  ; FMax(a,b)
+  // CHECK: call <13 x float> @dx.op.binary.v13f32(i32 36, <13 x float> [[tmp]], <13 x float> [[fvec3]])  ; FMin(a,b)
+  vector<float, NUM> fRes = clamp(fVec1, fVec2, fVec3);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 14, i32 0, i32 2) 
+  // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 15, i32 0, i32 2) 
+  // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 16, i32 0, i32 2) 
+  // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0
+  vector<float16_t, NUM> hVec1 = hBuf[14];
+  vector<float16_t, NUM> hVec2 = hBuf[15];
+  vector<float16_t, NUM> hVec3 = hBuf[16];
+
+  // CHECK: [[tmp:%.*]] = fcmp fast olt <13 x half> [[hvec2]], [[hvec1]]
+  // CHECK: select <13 x i1> [[tmp]], <13 x half> zeroinitializer, <13 x half> <half 0xH3C00
+  vector<float16_t, NUM> hRes = step(hVec1, hVec2);
+
+  // CHECK: [[tmp:%.*]] = fmul fast <13 x float> [[fvec1]], <float 0x
+  // CHECK: call <13 x float> @dx.op.unary.v13f32(i32 21, <13 x float> [[tmp]])  ; Exp(value)
+  fRes += exp(fVec1);
+
+  // CHECK: [[tmp:%.*]] = call <13 x half> @dx.op.unary.v13f16(i32 23, <13 x half> [[hvec1]])  ; Log(value)
+  // CHECK: fmul fast <13 x half> [[tmp]], <half 0xH398C
+  hRes += log(hVec1);
+
+  // CHECK: [[sub:%.*]] = fsub fast <13 x float> [[fvec2]], [[fvec1]]
+  // CHECK: [[xsub:%.*]] = fsub fast <13 x float> [[fvec3]], [[fvec1]]
+  // CHECK: [[div:%.*]] = fdiv fast <13 x float> [[xsub]], [[sub]]
+  // CHECK: [[sat:%.*]] = call <13 x float> @dx.op.unary.v13f32(i32 7, <13 x float> [[div]])  ; Saturate(value)
+  // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], <float 2.000000e+00,
+  // CHECK: [[sub:%.*]] = fsub fast <13 x float> <float 3.000000e+00, {{.*}}>, [[mul]]
+  // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], [[sat]]
+  // CHECK: fmul fast <13 x float> [[mul]], [[sub]]
+  fRes += smoothstep(fVec1, fVec2, fVec3);
+
+  // Intrinsics that expand into llvm ops.
+
+  // CHECK: fmul fast <13 x float> [[fvec3]], <float 0x3F91DF46A0000000
+  fRes += radians(fVec3);
+
+  // CHECK: [[cmp:%.*]] = fcmp fast une <13 x float> [[fvec1]], zeroinitializer
+  // CHECK: [[f2i:%.*]] = bitcast <13 x float> [[fvec1]] to <13 x i32>
+  // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], <i32 2139095040
+  // CHECK: [[add:%.*]] = add nsw <13 x i32> [[and]], <i32 -1056964608
+  // CHECK: [[shr:%.*]] = ashr <13 x i32> [[add]], <i32 23
+  // CHECK: [[i2f:%.*]] = sitofp <13 x i32> [[shr]] to <13 x float>
+  // CHECK: [[sel:%.*]] = select <13 x i1> [[cmp]], <13 x float> [[i2f]], <13 x float> zeroinitializer
+  // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], <i32 8388607
+  // CHECK: or <13 x i32> [[and]], <i32 1056964608
+  vector<float, NUM> exp = fVec3;
+  fRes += frexp(fVec1, exp);
+  fRes += exp;
+
+  // CHECK: [[tmp:%.*]] = fsub fast <13 x half> [[hvec3]], [[hvec2]]
+  // CHECK: fmul fast <13 x half> [[tmp]], [[hvec1]]
+  hRes += lerp(hVec2, hVec3, hVec1);
+
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4) 
+  // CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 18, i32 0, i32 4) 
+  // CHECK: [[uvec2:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0
+  vector<uint, NUM> uVec1 = uBuf[17];
+  vector<uint, NUM> uVec2 = uBuf[18];
+
+  vector<uint, NUM> signs = 1;
+  // CHECK: [[cmp:%.*]] = icmp ne <13 x i32> [[uvec2]], zeroinitializer
+  // CHECK: zext <13 x i1> [[cmp]] to <13 x i32>
+  signs *= sign(uVec2);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 19, i32 0, i32 8) 
+  // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 20, i32 0, i32 8) 
+  // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0
+  vector<int64_t, NUM> lVec1 = lBuf[19];
+  vector<int64_t, NUM> lVec2 = lBuf[20];
+
+  // CHECK: [[gt:%.*]] = icmp sgt <13 x i64> [[lvec2]], zeroinitializer
+  // CHECK: [[lt:%.*]] = icmp slt <13 x i64> [[lvec2]], zeroinitializer
+  // CHECK: [[igt:%.*]] = zext <13 x i1> [[gt]] to <13 x i32>
+  // CHECK: [[ilt:%.*]] = zext <13 x i1> [[lt]] to <13 x i32>
+  // CHECK: sub nsw <13 x i32> [[igt]], [[ilt]]
+  signs *= sign(lVec2);
+
+  vector<uint, NUM> uRes = signs;
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 21, i32 0, i32 4) 
+  // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0
+  // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer
+  // CHECK: [[vec1:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32>
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 22, i32 0, i32 4) 
+  // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0
+  // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer
+  // CHECK: [[vec2:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32>
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 23, i32 0, i32 4) 
+  // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0
+  // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer
+  // CHECK: [[vec3:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32>
+  vector<bool, NUM> bVec1 = bBuf[21];
+  vector<bool, NUM> bVec2 = bBuf[22];
+  vector<bool, NUM> bVec3 = bBuf[23];
+
+  // CHECK: [[bvec2:%.*]] = icmp ne <13 x i32> [[vec2]], zeroinitializer
+  // CHECK: [[bvec1:%.*]] = icmp ne <13 x i32> [[vec1]], zeroinitializer
+  // CHECK: or <13 x i1> [[bvec2]], [[bvec1]]
+  uRes += or(bVec1, bVec2);
+
+  // CHECK: [[bvec3:%.*]] = icmp ne <13 x i32> [[vec3]], zeroinitializer
+  // CHECK: and <13 x i1> [[bvec3]], [[bvec2]]
+  uRes += and(bVec2, bVec3);
+
+  // CHECK: select <13 x i1> [[bvec3]], <13 x i64> [[lvec1]], <13 x i64> [[lvec2]]
+  vector<int64_t, NUM> lRes = select(bVec3, lVec1, lVec2);
+
+  // CHECK: [[el1:%.*]] = extractelement <13 x float> [[fvec1]]
+  // CHECK: [[el2:%.*]] = extractelement <13 x float> [[fvec2]]
+  // CHECK: [[mul:%.*]] = fmul fast float [[el2]], [[el1]]
+  // CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c)
+  // CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad1]]) ; FMad(a,b,c)
+  // CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad2]]) ; FMad(a,b,c)
+  // CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad3]]) ; FMad(a,b,c)
+  // CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad4]]) ; FMad(a,b,c)
+  // CHECK: [[mad6:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad5]]) ; FMad(a,b,c)
+  // CHECK: [[mad7:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad6]]) ; FMad(a,b,c)
+  // CHECK: [[mad8:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad7]]) ; FMad(a,b,c)
+  // CHECK: [[mad9:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad8]]) ; FMad(a,b,c)
+  // CHECK: [[mad10:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad9]]) ; FMad(a,b,c)
+  // CHECK: [[mad11:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad10]]) ; FMad(a,b,c)
+  // CHECK: [[mad12:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad11]]) ; FMad(a,b,c)
+  fRes += dot(fVec1, fVec2);
+
+  // CHECK: call <13 x float> @dx.op.unary.v13f32(i32 17, <13 x float> [[fvec1]])  ; Atan(value)
+  fRes += atan(fVec1);
+
+  // CHECK: call <13 x i32> @dx.op.binary.v13i32(i32 40, <13 x i32> [[uvec1]], <13 x i32> [[uvec2]])  ; UMin(a,b)
+  uRes += min(uVec1, uVec2);
+
+  // CHECK: call <13 x float> @dx.op.tertiary.v13f32(i32 46, <13 x float> [[fvec1]], <13 x float> [[fvec2]], <13 x float> [[fvec3]])  ; FMad(a,b,c)
+  fRes += mad(fVec1, fVec2, fVec3);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8) 
+  // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8) 
+  // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 26, i32 0, i32 8) 
+  // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0
+  vector<double, NUM> dVec1 = dBuf[24];
+  vector<double, NUM> dVec2 = dBuf[25];
+  vector<double, NUM> dVec3 = dBuf[26];
+
+  // CHECK: call <13 x double> @dx.op.tertiary.v13f64(i32 47, <13 x double> [[dvec1]], <13 x double> [[dvec2]], <13 x double> [[dvec3]])
+  vector<double, NUM> dRes = fma(dVec1, dVec2, dVec3);
+
+  hBuf[0] = hRes;
+  fBuf[0] = fRes;
+  dBuf[0] = dRes;
+  uBuf[0] = uRes;
+  lBuf[0] = lRes;
+}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll
new file mode 100644
index 0000000000..8f9dcbbdbc
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll
@@ -0,0 +1,434 @@
+; RUN: %dxopt %s -dxilgen -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%"class.RWStructuredBuffer<vector<half, 7> >" = type { <7 x half> }
+%"class.RWStructuredBuffer<vector<float, 7> >" = type { <7 x float> }
+%"class.RWStructuredBuffer<vector<double, 7> >" = type { <7 x double> }
+%"class.RWStructuredBuffer<vector<bool, 7> >" = type { <7 x i32> }
+%"class.RWStructuredBuffer<vector<unsigned int, 7> >" = type { <7 x i32> }
+%"class.RWStructuredBuffer<vector<long long, 7> >" = type { <7 x i64> }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+
+@"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" = external global %"class.RWStructuredBuffer<vector<half, 7> >", align 2
+@"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" = external global %"class.RWStructuredBuffer<vector<float, 7> >", align 4
+@"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" = external global %"class.RWStructuredBuffer<vector<double, 7> >", align 8
+@"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" = external global %"class.RWStructuredBuffer<vector<bool, 7> >", align 4
+@"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" = external global %"class.RWStructuredBuffer<vector<unsigned int, 7> >", align 4
+@"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" = external global %"class.RWStructuredBuffer<vector<long long, 7> >", align 8
+
+; CHECK-LABEL: define void @main()
+define void @main() #0 {
+bb:
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 11, i32 0, i32 4)
+  ; CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 12, i32 0, i32 4)
+  ; CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 13, i32 0, i32 4)
+  ; CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0
+
+  %exp = alloca <7 x float>, align 4
+  %tmp = load %"class.RWStructuredBuffer<vector<float, 7> >", %"class.RWStructuredBuffer<vector<float, 7> >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:23 col:30
+  %tmp1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 7> >" %tmp) ; line:23 col:30
+  %tmp2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 14, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<float, 7> >" zeroinitializer) ; line:23 col:30
+  %tmp3 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp2, i32 11) ; line:23 col:30
+  %tmp4 = load <7 x float>, <7 x float>* %tmp3 ; line:23 col:30
+  %tmp5 = load %"class.RWStructuredBuffer<vector<float, 7> >", %"class.RWStructuredBuffer<vector<float, 7> >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:24 col:30
+  %tmp6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 7> >" %tmp5) ; line:24 col:30
+  %tmp7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 14, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<float, 7> >" zeroinitializer) ; line:24 col:30
+  %tmp8 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp7, i32 12) ; line:24 col:30
+  %tmp9 = load <7 x float>, <7 x float>* %tmp8 ; line:24 col:30
+  %tmp10 = load %"class.RWStructuredBuffer<vector<float, 7> >", %"class.RWStructuredBuffer<vector<float, 7> >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:25 col:30
+  %tmp11 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 7> >" %tmp10) ; line:25 col:30
+  %tmp12 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 14, %dx.types.Handle %tmp11, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<float, 7> >" zeroinitializer) ; line:25 col:30
+  %tmp13 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp12, i32 13) ; line:25 col:30
+  %tmp14 = load <7 x float>, <7 x float>* %tmp13 ; line:25 col:30
+
+  ;  Clamp operation.
+  ; CHECK: [[max:%.*]] = call <7 x float> @dx.op.binary.v7f32(i32 35, <7 x float> [[fvec1]], <7 x float> [[fvec2]])
+  ; CHECK: call <7 x float> @dx.op.binary.v7f32(i32 36, <7 x float> [[max]], <7 x float> [[fvec3]])
+  %tmp15 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 119, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:29 col:29
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 14, i32 0, i32 2)
+  ; CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 15, i32 0, i32 2)
+  ; CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 16, i32 0, i32 2)
+  ; CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0
+  %tmp16 = load %"class.RWStructuredBuffer<vector<half, 7> >", %"class.RWStructuredBuffer<vector<half, 7> >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:37 col:34
+  %tmp17 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<half, 7> >" %tmp16) ; line:37 col:34
+  %tmp18 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 14, %dx.types.Handle %tmp17, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer<vector<half, 7> >" zeroinitializer) ; line:37 col:34
+  %tmp19 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp18, i32 14) ; line:37 col:34
+  %tmp20 = load <7 x half>, <7 x half>* %tmp19 ; line:37 col:34
+  %tmp21 = load %"class.RWStructuredBuffer<vector<half, 7> >", %"class.RWStructuredBuffer<vector<half, 7> >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:38 col:34
+  %tmp22 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<half, 7> >" %tmp21) ; line:38 col:34
+  %tmp23 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 14, %dx.types.Handle %tmp22, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer<vector<half, 7> >" zeroinitializer) ; line:38 col:34
+  %tmp24 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp23, i32 15) ; line:38 col:34
+  %tmp25 = load <7 x half>, <7 x half>* %tmp24 ; line:38 col:34
+  %tmp26 = load %"class.RWStructuredBuffer<vector<half, 7> >", %"class.RWStructuredBuffer<vector<half, 7> >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:39 col:34
+  %tmp27 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<half, 7> >" %tmp26) ; line:39 col:34
+  %tmp28 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 14, %dx.types.Handle %tmp27, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer<vector<half, 7> >" zeroinitializer) ; line:39 col:34
+  %tmp29 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp28, i32 16) ; line:39 col:34
+  %tmp30 = load <7 x half>, <7 x half>* %tmp29 ; line:39 col:34
+
+  ; Step operation.
+  ; CHECK: [[cmp:%.*]] = fcmp fast olt <7 x half> [[hvec2]], [[hvec1]]
+  ; CHECK: select <7 x i1> [[cmp]], <7 x half> zeroinitializer, <7 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
+  %tmp31 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>)"(i32 192, <7 x half> %tmp20, <7 x half> %tmp25) ; line:43 col:33
+
+  ;  Exp operation.
+  ; CHECK: [[mul:%.*]] = fmul fast <7 x float> <float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000>, [[fvec1]]
+  ; CHECK call <7 x float> @dx.op.unary.v7f32(i32 21, <7 x float> [[mul]])
+  %tmp32 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 139, <7 x float> %tmp4) ; line:47 col:11
+  %tmp33 = fadd <7 x float> %tmp15, %tmp32 ; line:47 col:8
+
+  ;  Log operation.
+  ; CHECK: [[log:%.*]] = call <7 x half> @dx.op.unary.v7f16(i32 23, <7 x half> [[hvec1]])
+  ; CHECK: fmul fast <7 x half> <half 0xH398C, half 0xH398C, half 0xH398C, half 0xH398C, half 0xH398C, half 0xH398C, half 0xH398C>, [[log]]
+  %tmp34 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>)"(i32 159, <7 x half> %tmp20) ; line:51 col:11
+  %tmp35 = fadd <7 x half> %tmp31, %tmp34 ; line:51 col:8
+
+  ; Smoothstep operation.
+  ; CHECK: [[sub1:%.*]] = fsub fast <7 x float> [[fvec2]], [[fvec1]]
+  ; CHECK: [[sub2:%.*]] = fsub fast <7 x float> [[fvec3]], [[fvec1]]
+  ; CHECK: [[div:%.*]] = fdiv fast <7 x float> [[sub2]], [[sub1]]
+  ; CHECK: [[sat:%.*]] = call <7 x float> @dx.op.unary.v7f32(i32 7, <7 x float> [[div]])
+  ; CHECK: [[mul:%.*]] = fmul fast <7 x float> [[sat]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+  ; CHECK: [[sub:%.*]] = fsub fast <7 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>, [[mul]]
+  ; CHECK: [[mul:%.*]] = fmul fast <7 x float> [[sat]], [[sub]]
+  ; CHECK: fmul fast <7 x float> %Saturate, [[mul]]
+  %tmp36 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 189, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:61 col:11
+  %tmp37 = fadd <7 x float> %tmp33, %tmp36 ; line:61 col:8
+
+  ;  Radians operation.
+  ; CHECK: fmul fast <7 x float> <float 0x3F91DF46A0000000, float 0x3F91DF46A0000000, float 0x3F91DF46A0000000, float 0x3F91DF46A0000000, float 0x3F91DF46A0000000, float 0x3F91DF46A0000000, float 0x3F91DF46A0000000>, [[fvec3]]
+  %tmp38 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 176, <7 x float> %tmp14) ; line:66 col:11
+  %tmp39 = fadd <7 x float> %tmp37, %tmp38 ; line:66 col:8
+  store <7 x float> %tmp14, <7 x float>* %exp, align 4 ; line:77 col:22
+
+  ;  Frexp operation.
+  ; CHECK: [[cmp:%.*]] = fcmp fast une <7 x float> [[fvec1]], zeroinitializer
+  ; CHECK: [[ext:%.*]] = sext <7 x i1> [[cmp]] to <7 x i32>
+  ; CHECK: [[bct:%.*]] = bitcast <7 x float> [[fvec1]] to <7 x i32>
+  ; CHECK: [[and:%.*]] = and <7 x i32> [[bct]], <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
+  ; CHECK: [[add:%.*]] = add <7 x i32> [[and]], <i32 -1056964608, i32 -1056964608, i32 -1056964608, i32 -1056964608, i32 -1056964608, i32 -1056964608, i32 -1056964608>
+  ; CHECK: [[and:%.*]] = and <7 x i32> [[add]], [[ext]]
+  ; CHECK: [[shr:%.*]] = ashr <7 x i32> [[and]], <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
+  ; CHECK: [[i2f:%.*]] = sitofp <7 x i32> [[shr]] to <7 x float>
+  ; CHECK: store <7 x float> [[i2f]], <7 x float>* %exp
+  ; CHECK: [[and:%.*]] = and <7 x i32> [[bct]], <i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  ; CHECK: [[or:%.*]] = or <7 x i32> [[and]], <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
+  ; CHECK: [[and:%.*]] = and <7 x i32> [[or]], [[ext]]
+  ; CHECK: bitcast <7 x i32> [[and]] to <7 x float>
+  %tmp41 = call <7 x float> @"dx.hl.op..<7 x float> (i32, <7 x float>, <7 x float>*)"(i32 150, <7 x float> %tmp4, <7 x float>* %exp) ; line:78 col:11
+  %tmp42 = fadd <7 x float> %tmp39, %tmp41 ; line:78 col:8
+  %tmp43 = load <7 x float>, <7 x float>* %exp, align 4 ; line:79 col:11
+  %tmp44 = fadd <7 x float> %tmp42, %tmp43 ; line:79 col:8
+
+  ;  Lerp operation.
+  ; CHECK: [[sub:%.*]] = fsub fast <7 x half> [[hvec3]], [[hvec2]]
+  ; CHECK: fmul fast <7 x half> [[hvec1]], [[sub]]
+  %tmp45 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>, <7 x half>)"(i32 157, <7 x half> %tmp25, <7 x half> %tmp30, <7 x half> %tmp20) ; line:83 col:11
+  %tmp46 = fadd <7 x half> %tmp35, %tmp45 ; line:83 col:8
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4)
+  ; CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 18, i32 0, i32 4)
+  ; CHECK: [[uvec2:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0
+  %tmp47 = load %"class.RWStructuredBuffer<vector<unsigned int, 7> >", %"class.RWStructuredBuffer<vector<unsigned int, 7> >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:90 col:29
+  %tmp48 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<unsigned int, 7> >" %tmp47) ; line:90 col:29
+  %tmp49 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32 14, %dx.types.Handle %tmp48, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<unsigned int, 7> >" zeroinitializer) ; line:90 col:29
+  %tmp50 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp49, i32 17) ; line:90 col:29
+  %tmp51 = load <7 x i32>, <7 x i32>* %tmp50 ; line:90 col:29
+  %tmp52 = load %"class.RWStructuredBuffer<vector<unsigned int, 7> >", %"class.RWStructuredBuffer<vector<unsigned int, 7> >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:91 col:29
+  %tmp53 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<unsigned int, 7> >" %tmp52) ; line:91 col:29
+  %tmp54 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32 14, %dx.types.Handle %tmp53, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<unsigned int, 7> >" zeroinitializer) ; line:91 col:29
+  %tmp55 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp54, i32 18) ; line:91 col:29
+  %tmp56 = load <7 x i32>, <7 x i32>* %tmp55 ; line:91 col:29
+
+  ; Unsigned int sign operation.
+  ; CHECK: [[cmp:%.*]] = icmp ne <7 x i32> [[uvec2]], zeroinitializer
+  ; CHECK: zext <7 x i1> [[cmp]] to <7 x i32>
+  %tmp57 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>)"(i32 355, <7 x i32> %tmp56) ; line:96 col:12
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i64 @dx.op.rawBufferVectorLoad.v7i64(i32 303, %dx.types.Handle {{%.*}}, i32 19, i32 0, i32 8)
+  ; CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.v7i64 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i64 @dx.op.rawBufferVectorLoad.v7i64(i32 303, %dx.types.Handle {{%.*}}, i32 20, i32 0, i32 8)
+  ; CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.v7i64 [[ld]], 0
+  %tmp58 = load %"class.RWStructuredBuffer<vector<long long, 7> >", %"class.RWStructuredBuffer<vector<long long, 7> >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:102 col:32
+  %tmp59 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<long long, 7> >" %tmp58) ; line:102 col:32
+  %tmp60 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32 14, %dx.types.Handle %tmp59, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer<vector<long long, 7> >" zeroinitializer) ; line:102 col:32
+  %tmp61 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp60, i32 19) ; line:102 col:32
+  %tmp62 = load <7 x i64>, <7 x i64>* %tmp61 ; line:102 col:32
+  %tmp63 = load %"class.RWStructuredBuffer<vector<long long, 7> >", %"class.RWStructuredBuffer<vector<long long, 7> >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:103 col:32
+  %tmp64 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<long long, 7> >" %tmp63) ; line:103 col:32
+  %tmp65 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32 14, %dx.types.Handle %tmp64, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer<vector<long long, 7> >" zeroinitializer) ; line:103 col:32
+  %tmp66 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp65, i32 20) ; line:103 col:32
+  %tmp67 = load <7 x i64>, <7 x i64>* %tmp66 ; line:103 col:32
+
+  ; Signed int sign operation.
+  ; CHECK: [[lt1:%.*]] = icmp slt <7 x i64> zeroinitializer, [[lvec2]]
+  ; CHECK: [[lt2:%.*]] = icmp slt <7 x i64> [[lvec2]], zeroinitializer
+  ; CHECK: [[ilt1:%.*]] = zext <7 x i1> [[lt1]] to <7 x i32>
+  ; CHECK: [[ilt2:%.*]] = zext <7 x i1> [[lt2]] to <7 x i32>
+  ; CHECK: sub <7 x i32> [[ilt1]], [[ilt2]]
+  %tmp68 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i64>)"(i32 185, <7 x i64> %tmp67) ; line:110 col:12
+  %tmp69 = mul <7 x i32> %tmp57, %tmp68 ; line:110 col:9
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 21, i32 0, i32 4) 
+  ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0
+  ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer
+  ; CHECK: [[vec1:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32>
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 22, i32 0, i32 4) 
+  ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0
+  ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer
+  ; CHECK: [[vec2:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32>
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 23, i32 0, i32 4) 
+  ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0
+  ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer
+  ; CHECK: [[vec3:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32>
+  %tmp70 = load %"class.RWStructuredBuffer<vector<bool, 7> >", %"class.RWStructuredBuffer<vector<bool, 7> >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:126 col:29
+  %tmp71 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<bool, 7> >" %tmp70) ; line:126 col:29
+  %tmp72 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32 14, %dx.types.Handle %tmp71, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<bool, 7> >" zeroinitializer) ; line:126 col:29
+  %tmp73 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp72, i32 21) ; line:126 col:29
+  %tmp74 = load <7 x i32>, <7 x i32>* %tmp73 ; line:126 col:29
+  %tmp75 = icmp ne <7 x i32> %tmp74, zeroinitializer ; line:126 col:29
+  %tmp76 = zext <7 x i1> %tmp75 to <7 x i32> ; line:126 col:21
+  %tmp77 = load %"class.RWStructuredBuffer<vector<bool, 7> >", %"class.RWStructuredBuffer<vector<bool, 7> >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:127 col:29
+  %tmp78 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<bool, 7> >" %tmp77) ; line:127 col:29
+  %tmp79 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32 14, %dx.types.Handle %tmp78, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<bool, 7> >" zeroinitializer) ; line:127 col:29
+  %tmp80 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp79, i32 22) ; line:127 col:29
+  %tmp81 = load <7 x i32>, <7 x i32>* %tmp80 ; line:127 col:29
+  %tmp82 = icmp ne <7 x i32> %tmp81, zeroinitializer ; line:127 col:29
+  %tmp83 = zext <7 x i1> %tmp82 to <7 x i32> ; line:127 col:21
+  %tmp84 = load %"class.RWStructuredBuffer<vector<bool, 7> >", %"class.RWStructuredBuffer<vector<bool, 7> >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:128 col:29
+  %tmp85 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<bool, 7> >" %tmp84) ; line:128 col:29
+  %tmp86 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32 14, %dx.types.Handle %tmp85, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<bool, 7> >" zeroinitializer) ; line:128 col:29
+  %tmp87 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp86, i32 23) ; line:128 col:29
+  %tmp88 = load <7 x i32>, <7 x i32>* %tmp87 ; line:128 col:29
+  %tmp89 = icmp ne <7 x i32> %tmp88, zeroinitializer ; line:128 col:29
+  %tmp90 = zext <7 x i1> %tmp89 to <7 x i32> ; line:128 col:21
+
+
+  ; Or() operation.
+  ; CHECK: [[bvec2:%.*]] = icmp ne <7 x i32> [[vec2]], zeroinitializer
+  ; CHECK: [[bvec1:%.*]] = icmp ne <7 x i32> [[vec1]], zeroinitializer
+  ; CHECK: or <7 x i1> [[bvec1]], [[bvec2]]
+  %tmp91 = icmp ne <7 x i32> %tmp83, zeroinitializer ; line:133 col:21
+  %tmp92 = icmp ne <7 x i32> %tmp76, zeroinitializer ; line:133 col:14
+  %tmp93 = call <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32 169, <7 x i1> %tmp92, <7 x i1> %tmp91) ; line:133 col:11
+  %tmp94 = zext <7 x i1> %tmp93 to <7 x i32> ; line:133 col:11
+  %tmp95 = add <7 x i32> %tmp69, %tmp94 ; line:133 col:8
+
+  ; And() operation.
+  ; CHECK: [[bvec3:%.*]] = icmp ne <7 x i32> [[vec3]], zeroinitializer
+  ; CHECK: [[bvec2:%.*]] = icmp ne <7 x i32> [[vec2]], zeroinitializer
+  ; CHECK: and <7 x i1> [[bvec2]], [[bvec3]]
+  %tmp96 = icmp ne <7 x i32> %tmp90, zeroinitializer ; line:137 col:22
+  %tmp97 = icmp ne <7 x i32> %tmp83, zeroinitializer ; line:137 col:15
+  %tmp98 = call <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32 106, <7 x i1> %tmp97, <7 x i1> %tmp96) ; line:137 col:11
+  %tmp99 = zext <7 x i1> %tmp98 to <7 x i32> ; line:137 col:11
+  %tmp100 = add <7 x i32> %tmp95, %tmp99 ; line:137 col:8
+
+  ; Select() operation.
+  ; CHECK: [[bvec3:%.*]] = icmp ne <7 x i32> [[vec3]], zeroinitializer
+  ; CHECK: select <7 x i1> [[bvec3]], <7 x i64> [[lvec1]], <7 x i64> [[lvec2]]
+  %tmp101 = icmp ne <7 x i32> %tmp90, zeroinitializer ; line:140 col:38
+  %tmp102 = call <7 x i64> @"dx.hl.op.rn.<7 x i64> (i32, <7 x i1>, <7 x i64>, <7 x i64>)"(i32 184, <7 x i1> %tmp101, <7 x i64> %tmp62, <7 x i64> %tmp67) ; line:140 col:31
+  %tmp103 = call float @"dx.hl.op.rn.float (i32, <7 x float>, <7 x float>)"(i32 134, <7 x float> %tmp4, <7 x float> %tmp9) ; line:152 col:11
+
+  ; Dot operation.
+  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 0
+  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 0
+  ; CHECK: [[mul:%.*]] = fmul fast float [[el1]], [[el2]]
+  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 1
+  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 1
+  ; CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mul]])
+  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 2
+  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 2
+  ; CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad1]])
+  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 3
+  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 3
+  ; CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad2]])
+  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 4
+  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 4
+  ; CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad3]])
+  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 5
+  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 5
+  ; CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad4]])
+  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 6
+  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 6
+  ; CHECK: call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad5]])
+  %tmp104 = insertelement <7 x float> undef, float %tmp103, i32 0 ; line:152 col:11
+  %tmp105 = shufflevector <7 x float> %tmp104, <7 x float> undef, <7 x i32> zeroinitializer ; line:152 col:11
+  %tmp106 = fadd <7 x float> %tmp44, %tmp105 ; line:152 col:8
+
+  ; Atan operation.
+  ; CHECK: call <7 x float> @dx.op.unary.v7f32(i32 17, <7 x float> [[fvec1]])
+  %tmp107 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 116, <7 x float> %tmp4) ; line:155 col:11
+  %tmp108 = fadd <7 x float> %tmp106, %tmp107 ; line:155 col:8
+
+  ; Min operation.
+  ; CHECK: call <7 x i32> @dx.op.binary.v7i32(i32 40, <7 x i32> [[uvec1]], <7 x i32> [[uvec2]])
+  %tmp109 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>, <7 x i32>)"(i32 353, <7 x i32> %tmp51, <7 x i32> %tmp56) ; line:158 col:11
+  %tmp110 = add <7 x i32> %tmp100, %tmp109 ; line:158 col:8
+
+  ; Mad operation.
+  ; CHECK: call <7 x float> @dx.op.tertiary.v7f32(i32 46, <7 x float> [[fvec1]], <7 x float> [[fvec2]], <7 x float> [[fvec3]])
+  %tmp111 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 162, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:161 col:11
+  %tmp112 = fadd <7 x float> %tmp108, %tmp111 ; line:161 col:8
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8) 
+  ; CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8) 
+  ; CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 26, i32 0, i32 8) 
+  ; CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0
+  %tmp113 = load %"class.RWStructuredBuffer<vector<double, 7> >", %"class.RWStructuredBuffer<vector<double, 7> >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:169 col:31
+  %tmp114 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<double, 7> >" %tmp113) ; line:169 col:31
+  %tmp115 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 14, %dx.types.Handle %tmp114, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer<vector<double, 7> >" zeroinitializer) ; line:169 col:31
+  %tmp116 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp115, i32 24) ; line:169 col:31
+  %tmp117 = load <7 x double>, <7 x double>* %tmp116 ; line:169 col:31
+  %tmp118 = load %"class.RWStructuredBuffer<vector<double, 7> >", %"class.RWStructuredBuffer<vector<double, 7> >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:170 col:31
+  %tmp119 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<double, 7> >" %tmp118) ; line:170 col:31
+  %tmp120 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 14, %dx.types.Handle %tmp119, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer<vector<double, 7> >" zeroinitializer) ; line:170 col:31
+  %tmp121 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp120, i32 25) ; line:170 col:31
+  %tmp122 = load <7 x double>, <7 x double>* %tmp121 ; line:170 col:31
+  %tmp123 = load %"class.RWStructuredBuffer<vector<double, 7> >", %"class.RWStructuredBuffer<vector<double, 7> >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:171 col:31
+  %tmp124 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<double, 7> >" %tmp123) ; line:171 col:31
+  %tmp125 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 14, %dx.types.Handle %tmp124, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer<vector<double, 7> >" zeroinitializer) ; line:171 col:31
+  %tmp126 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp125, i32 26) ; line:171 col:31
+  %tmp127 = load <7 x double>, <7 x double>* %tmp126 ; line:171 col:31
+
+  ; FMA operation.
+  ; CHECK: call <7 x double> @dx.op.tertiary.v7f64(i32 47, <7 x double> [[dvec1]], <7 x double> [[dvec2]], <7 x double> [[dvec3]])
+  %tmp128 = call <7 x double> @"dx.hl.op.rn.<7 x double> (i32, <7 x double>, <7 x double>, <7 x double>)"(i32 147, <7 x double> %tmp117, <7 x double> %tmp122, <7 x double> %tmp127) ; line:174 col:30
+  %tmp129 = load %"class.RWStructuredBuffer<vector<half, 7> >", %"class.RWStructuredBuffer<vector<half, 7> >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:176 col:3
+  %tmp130 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<half, 7> >" %tmp129) ; line:176 col:3
+  %tmp131 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 14, %dx.types.Handle %tmp130, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer<vector<half, 7> >" zeroinitializer) ; line:176 col:3
+  %tmp132 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp131, i32 0) ; line:176 col:3
+  store <7 x half> %tmp46, <7 x half>* %tmp132 ; line:176 col:11
+  %tmp133 = load %"class.RWStructuredBuffer<vector<float, 7> >", %"class.RWStructuredBuffer<vector<float, 7> >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:177 col:3
+  %tmp134 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 7> >" %tmp133) ; line:177 col:3
+  %tmp135 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 14, %dx.types.Handle %tmp134, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<float, 7> >" zeroinitializer) ; line:177 col:3
+  %tmp136 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp135, i32 0) ; line:177 col:3
+  store <7 x float> %tmp112, <7 x float>* %tmp136 ; line:177 col:11
+  %tmp137 = load %"class.RWStructuredBuffer<vector<double, 7> >", %"class.RWStructuredBuffer<vector<double, 7> >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:178 col:3
+  %tmp138 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<double, 7> >" %tmp137) ; line:178 col:3
+  %tmp139 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 14, %dx.types.Handle %tmp138, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer<vector<double, 7> >" zeroinitializer) ; line:178 col:3
+  %tmp140 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp139, i32 0) ; line:178 col:3
+  store <7 x double> %tmp128, <7 x double>* %tmp140 ; line:178 col:11
+  %tmp141 = load %"class.RWStructuredBuffer<vector<unsigned int, 7> >", %"class.RWStructuredBuffer<vector<unsigned int, 7> >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:179 col:3
+  %tmp142 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<unsigned int, 7> >" %tmp141) ; line:179 col:3
+  %tmp143 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32 14, %dx.types.Handle %tmp142, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<unsigned int, 7> >" zeroinitializer) ; line:179 col:3
+  %tmp144 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp143, i32 0) ; line:179 col:3
+  store <7 x i32> %tmp110, <7 x i32>* %tmp144 ; line:179 col:11
+  %tmp145 = load %"class.RWStructuredBuffer<vector<long long, 7> >", %"class.RWStructuredBuffer<vector<long long, 7> >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:180 col:3
+  %tmp146 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<long long, 7> >" %tmp145) ; line:180 col:3
+  %tmp147 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32 14, %dx.types.Handle %tmp146, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer<vector<long long, 7> >" zeroinitializer) ; line:180 col:3
+  %tmp148 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp147, i32 0) ; line:180 col:3
+  store <7 x i64> %tmp102, <7 x i64>* %tmp148 ; line:180 col:11
+  ret void ; line:181 col:1
+}
+
+declare <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32, %"class.RWStructuredBuffer<vector<float, 7> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<float, 7> >") #1
+declare <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32, <7 x float>, <7 x float>, <7 x float>) #1
+declare <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32, %"class.RWStructuredBuffer<vector<half, 7> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<half, 7> >") #1
+declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>)"(i32, <7 x half>, <7 x half>) #1
+declare <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32, <7 x float>) #1
+declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>)"(i32, <7 x half>) #1
+declare <7 x float> @"dx.hl.op..<7 x float> (i32, <7 x float>, <7 x float>*)"(i32, <7 x float>, <7 x float>*) #0
+declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>, <7 x half>)"(i32, <7 x half>, <7 x half>, <7 x half>) #1
+declare <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32, %"class.RWStructuredBuffer<vector<unsigned int, 7> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<unsigned int, 7> >") #1
+declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>)"(i32, <7 x i32>) #1
+declare <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32, %"class.RWStructuredBuffer<vector<long long, 7> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<long long, 7> >") #1
+declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i64>)"(i32, <7 x i64>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32, %"class.RWStructuredBuffer<vector<bool, 7> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<bool, 7> >") #1
+declare <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32, <7 x i1>, <7 x i1>) #1
+declare <7 x i64> @"dx.hl.op.rn.<7 x i64> (i32, <7 x i1>, <7 x i64>, <7 x i64>)"(i32, <7 x i1>, <7 x i64>, <7 x i64>) #1
+declare float @"dx.hl.op.rn.float (i32, <7 x float>, <7 x float>)"(i32, <7 x float>, <7 x float>) #1
+declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>, <7 x i32>)"(i32, <7 x i32>, <7 x i32>) #1
+declare <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32, %"class.RWStructuredBuffer<vector<double, 7> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<double, 7> >") #1
+declare <7 x double> @"dx.hl.op.rn.<7 x double> (i32, <7 x double>, <7 x double>, <7 x double>)"(i32, <7 x double>, <7 x double>, <7 x double>) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!pauseresume = !{!1}
+!dx.version = !{!3}
+!dx.valver = !{!3}
+!dx.shaderModel = !{!4}
+!dx.typeAnnotations = !{!5, !36}
+!dx.entryPoints = !{!40}
+!dx.fnprops = !{!52}
+!dx.options = !{!53, !54}
+
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!3 = !{i32 1, i32 9}
+!4 = !{!"cs", i32 6, i32 9}
+!5 = !{i32 0, %"class.RWStructuredBuffer<vector<half, 7> >" undef, !6, %"class.RWStructuredBuffer<vector<float, 7> >" undef, !11, %"class.RWStructuredBuffer<vector<double, 7> >" undef, !16, %"class.RWStructuredBuffer<vector<bool, 7> >" undef, !21, %"class.RWStructuredBuffer<vector<unsigned int, 7> >" undef, !26, %"class.RWStructuredBuffer<vector<long long, 7> >" undef, !31}
+!6 = !{i32 14, !7, !8}
+!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 8, i32 13, i32 7}
+!8 = !{i32 0, !9}
+!9 = !{!10}
+!10 = !{i32 0, <7 x half> undef}
+!11 = !{i32 28, !12, !13}
+!12 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9, i32 13, i32 7}
+!13 = !{i32 0, !14}
+!14 = !{!15}
+!15 = !{i32 0, <7 x float> undef}
+!16 = !{i32 56, !17, !18}
+!17 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 10, i32 13, i32 7}
+!18 = !{i32 0, !19}
+!19 = !{!20}
+!20 = !{i32 0, <7 x double> undef}
+!21 = !{i32 28, !22, !23}
+!22 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 1, i32 13, i32 7}
+!23 = !{i32 0, !24}
+!24 = !{!25}
+!25 = !{i32 0, <7 x i1> undef}
+!26 = !{i32 28, !27, !28}
+!27 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5, i32 13, i32 7}
+!28 = !{i32 0, !29}
+!29 = !{!30}
+!30 = !{i32 0, <7 x i32> undef}
+!31 = !{i32 56, !32, !33}
+!32 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 6, i32 13, i32 7}
+!33 = !{i32 0, !34}
+!34 = !{!35}
+!35 = !{i32 0, <7 x i64> undef}
+!36 = !{i32 1, void ()* @main, !37}
+!37 = !{!38}
+!38 = !{i32 1, !39, !39}
+!39 = !{}
+!40 = !{void ()* @main, !"main", null, !41, null}
+!41 = !{null, !42, null, null}
+!42 = !{!43, !45, !47, !49, !50, !51}
+!43 = !{i32 0, %"class.RWStructuredBuffer<vector<half, 7> >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A", !"hBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !44}
+!44 = !{i32 1, i32 14}
+!45 = !{i32 1, %"class.RWStructuredBuffer<vector<float, 7> >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A", !"fBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46}
+!46 = !{i32 1, i32 28}
+!47 = !{i32 2, %"class.RWStructuredBuffer<vector<double, 7> >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A", !"dBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !48}
+!48 = !{i32 1, i32 56}
+!49 = !{i32 3, %"class.RWStructuredBuffer<vector<bool, 7> >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A", !"bBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46}
+!50 = !{i32 4, %"class.RWStructuredBuffer<vector<unsigned int, 7> >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A", !"uBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46}
+!51 = !{i32 5, %"class.RWStructuredBuffer<vector<long long, 7> >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A", !"lBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !48}
+!52 = !{void ()* @main, i32 5, i32 8, i32 1, i32 1}
+!53 = !{i32 0}
+!54 = !{i32 -1}
+!59 = !{!60, !60, i64 0}
+!60 = !{!"omnipotent char", !61, i64 0}
+!61 = !{!"Simple C/C++ TBAA"}
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 0008b752b1..a6cc52df1a 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -1503,7 +1503,7 @@ def UFI(name, **mappings):
                 next_op_idx,
                 "Unary",
                 "returns the " + i,
-                "hfd",
+                "hfd<",
                 "rn",
                 [
                     db_dxil_param(0, "$o", "", "operation result"),
@@ -1537,7 +1537,7 @@ def UFI(name, **mappings):
                 next_op_idx,
                 "Unary",
                 "returns the " + i,
-                "hf",
+                "hf<",
                 "rn",
                 [
                     db_dxil_param(0, "$o", "", "operation result"),
@@ -1554,7 +1554,7 @@ def UFI(name, **mappings):
                 next_op_idx,
                 "Unary",
                 "returns the reverse bit pattern of the input value",
-                "wil",
+                "wil<",
                 "rn",
                 [
                     db_dxil_param(0, "$o", "", "operation result"),
@@ -1601,7 +1601,7 @@ def UFI(name, **mappings):
                 next_op_idx,
                 "Binary",
                 "returns the " + i + " of the input values",
-                "hfd",
+                "hfd<",
                 "rn",
                 [
                     db_dxil_param(0, "$o", "", "operation result"),
@@ -1619,7 +1619,7 @@ def UFI(name, **mappings):
                 next_op_idx,
                 "Binary",
                 "returns the " + i + " of the input values",
-                "wil",
+                "wil<",
                 "rn",
                 [
                     db_dxil_param(0, "$o", "", "operation result"),
@@ -1674,7 +1674,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Tertiary",
             "performs a fused multiply add (FMA) of the form a * b + c",
-            "hfd",
+            "hfd<",
             "rn",
             [
                 db_dxil_param(
@@ -1691,7 +1691,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Tertiary",
             "performs a fused multiply add (FMA) of the form a * b + c",
-            "d",
+            "d<",
             "rn",
             [
                 db_dxil_param(
@@ -1715,7 +1715,7 @@ def UFI(name, **mappings):
                 next_op_idx,
                 "Tertiary",
                 "performs an integral " + i,
-                "wil",
+                "wil<",
                 "rn",
                 [
                     db_dxil_param(0, "$o", "", "the operation result"),
@@ -2608,7 +2608,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Unary",
             "computes the rate of change of components per stamp",
-            "hf",
+            "hf<",
             "rn",
             [
                 db_dxil_param(
@@ -2626,7 +2626,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Unary",
             "computes the rate of change of components per stamp",
-            "hf",
+            "hf<",
             "rn",
             [
                 db_dxil_param(
@@ -2644,7 +2644,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Unary",
             "computes the rate of change of components per pixel",
-            "hf",
+            "hf<",
             "rn",
             [
                 db_dxil_param(
@@ -2662,7 +2662,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Unary",
             "computes the rate of change of components per pixel",
-            "hf",
+            "hf<",
             "rn",
             [
                 db_dxil_param(

From 90bfb669fd98e35993cc11bf3f0ae04b2194196e Mon Sep 17 00:00:00 2001
From: Simon Moll <smoll@nvidia.com>
Date: Thu, 10 Apr 2025 18:21:09 +0200
Subject: [PATCH 76/88] [SER] 'reordercoherent' HLSL attribute and DXIL
 encoding (#7250)

Specification:
https://github.com/microsoft/hlsl-specs/blob/main/proposals/0027-shader-execution-reordering.md

'reordercoherent' encoding hlsl-specs PR:
https://github.com/microsoft/hlsl-specs/pull/453

DXC SER implementation tracker: #7214
---
 include/dxc/DXIL/DxilMetadataHelper.h         |   1 +
 include/dxc/DXIL/DxilResource.h               |   3 +
 include/dxc/DXIL/DxilResourceProperties.h     |   3 +-
 .../dxc/DxilContainer/RDAT_LibraryTypes.inl   |   1 +
 lib/DXIL/DxilMetadataHelper.cpp               |  10 ++
 lib/DXIL/DxilResource.cpp                     |   8 +-
 lib/DXIL/DxilResourceProperties.cpp           |   3 +
 lib/DxilContainer/DxilContainerAssembler.cpp  |   3 +
 lib/DxilPIXPasses/PixPassHelpers.cpp          |   1 +
 lib/HLSL/DxilCondenseResources.cpp            |   4 +-
 lib/HLSL/DxilGenerationPass.cpp               |   1 +
 lib/HLSL/DxilPatchShaderRecordBindings.cpp    |   1 +
 lib/HLSL/HLModule.cpp                         |   1 +
 tools/clang/include/clang/AST/HlslTypes.h     |   1 +
 tools/clang/include/clang/AST/Type.h          |   3 +-
 tools/clang/include/clang/Basic/Attr.td       |   6 ++
 .../clang/Basic/DiagnosticSemaKinds.td        |  20 +++-
 .../clang/include/clang/Basic/TokenKinds.def  |   1 +
 tools/clang/include/clang/Sema/Sema.h         |   5 +-
 tools/clang/include/clang/Sema/SemaHLSL.h     |   3 +-
 tools/clang/lib/AST/HlslTypes.cpp             |  12 +++
 tools/clang/lib/AST/Type.cpp                  |   4 +-
 tools/clang/lib/AST/TypePrinter.cpp           |   3 +
 tools/clang/lib/CodeGen/CGHLSLMS.cpp          |  51 +++++----
 .../lib/CodeGen/CGHLSLMSFinishCodeGen.cpp     |   9 +-
 tools/clang/lib/CodeGen/CGHLSLMSHelper.h      |   3 +-
 tools/clang/lib/CodeGen/CGHLSLRuntime.h       |   2 +-
 tools/clang/lib/CodeGen/CGStmt.cpp            |   4 +-
 tools/clang/lib/Parse/ParseDecl.cpp           |   3 +
 tools/clang/lib/Parse/ParseExpr.cpp           |   2 +
 tools/clang/lib/Parse/ParseStmt.cpp           |   1 +
 tools/clang/lib/Parse/ParseTentative.cpp      |   1 +
 tools/clang/lib/Sema/SemaChecking.cpp         |   6 +-
 tools/clang/lib/Sema/SemaDecl.cpp             |   7 +-
 tools/clang/lib/Sema/SemaDeclAttr.cpp         |  11 ++
 tools/clang/lib/Sema/SemaHLSL.cpp             |  82 +++++++++++---
 tools/clang/lib/Sema/SemaStmt.cpp             |   2 +-
 tools/clang/lib/Sema/SemaType.cpp             |  31 ++++--
 .../attributes/reordercoherent_for_arg.hlsl   |  19 ++++
 .../hlsl/attributes/reordercoherent_uav.hlsl  |  17 +++
 .../attributes/reordercoherent_uav_array.hlsl |  16 +++
 .../attributes/reordercoherent_ast.hlsl       |  17 +++
 ...dercoherent-globallycoherent-mismatch.hlsl |  96 +++++++++++++++++
 .../SemaHLSL/reordercoherent-implied.hlsl     |  41 +++++++
 .../SemaHLSL/reordercoherent-mismatch.hlsl    | 101 ++++++++++++++++++
 .../SemaHLSL/reordercoherent-type-errors.hlsl |  26 +++++
 .../tools/dxcompiler/dxcdisassembler.cpp      |   7 +-
 .../unittests/HLSL/DxilContainerTest.cpp      |   8 +-
 48 files changed, 585 insertions(+), 76 deletions(-)
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_for_arg.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav_array.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/attributes/reordercoherent_ast.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/reordercoherent-globallycoherent-mismatch.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/reordercoherent-implied.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/reordercoherent-mismatch.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/reordercoherent-type-errors.hlsl

diff --git a/include/dxc/DXIL/DxilMetadataHelper.h b/include/dxc/DXIL/DxilMetadataHelper.h
index 9df155e6e7..e17db016d8 100644
--- a/include/dxc/DXIL/DxilMetadataHelper.h
+++ b/include/dxc/DXIL/DxilMetadataHelper.h
@@ -233,6 +233,7 @@ class DxilMDHelper {
   static const unsigned kDxilStructuredBufferElementStrideTag = 1;
   static const unsigned kDxilSamplerFeedbackKindTag = 2;
   static const unsigned kDxilAtomic64UseTag = 3;
+  static const unsigned kDxilReorderCoherentTag = 4;
 
   // Type system.
   static const char kDxilTypeSystemMDName[];
diff --git a/include/dxc/DXIL/DxilResource.h b/include/dxc/DXIL/DxilResource.h
index 49db65caed..dcf70333da 100644
--- a/include/dxc/DXIL/DxilResource.h
+++ b/include/dxc/DXIL/DxilResource.h
@@ -63,6 +63,8 @@ class DxilResource : public DxilResourceBase {
 
   bool IsGloballyCoherent() const;
   void SetGloballyCoherent(bool b);
+  bool IsReorderCoherent() const;
+  void SetReorderCoherent(bool b);
   bool HasCounter() const;
   void SetHasCounter(bool b);
 
@@ -97,6 +99,7 @@ class DxilResource : public DxilResourceBase {
   CompType m_CompType;
   DXIL::SamplerFeedbackType m_SamplerFeedbackType;
   bool m_bGloballyCoherent;
+  bool m_bReorderCoherent;
   bool m_bHasCounter;
   bool m_bROV;
   bool m_bHasAtomic64Use;
diff --git a/include/dxc/DXIL/DxilResourceProperties.h b/include/dxc/DXIL/DxilResourceProperties.h
index 21a705f077..2f4ff58969 100644
--- a/include/dxc/DXIL/DxilResourceProperties.h
+++ b/include/dxc/DXIL/DxilResourceProperties.h
@@ -47,7 +47,8 @@ struct DxilResourceProperties {
     uint8_t SamplerCmpOrHasCounter : 1;
 
     // BYTE 2
-    uint8_t Reserved2;
+    uint8_t IsReorderCoherent : 1;
+    uint8_t Reserved2 : 7;
 
     // BYTE 3
     uint8_t Reserved3;
diff --git a/include/dxc/DxilContainer/RDAT_LibraryTypes.inl b/include/dxc/DxilContainer/RDAT_LibraryTypes.inl
index 132d272a8e..4b58b406c2 100644
--- a/include/dxc/DxilContainer/RDAT_LibraryTypes.inl
+++ b/include/dxc/DxilContainer/RDAT_LibraryTypes.inl
@@ -22,6 +22,7 @@ RDAT_ENUM_START(DxilResourceFlag, uint32_t)
   RDAT_ENUM_VALUE(UAVRasterizerOrderedView, 1 << 2)
   RDAT_ENUM_VALUE(DynamicIndexing,          1 << 3)
   RDAT_ENUM_VALUE(Atomics64Use,             1 << 4)
+  RDAT_ENUM_VALUE(UAVReorderCoherent,       1 << 5)
 RDAT_ENUM_END()
 
 RDAT_ENUM_START(DxilShaderStageFlags, uint32_t)
diff --git a/lib/DXIL/DxilMetadataHelper.cpp b/lib/DXIL/DxilMetadataHelper.cpp
index 19d199ee29..c1282a980a 100644
--- a/lib/DXIL/DxilMetadataHelper.cpp
+++ b/lib/DXIL/DxilMetadataHelper.cpp
@@ -3110,6 +3110,13 @@ void DxilExtraPropertyHelper::EmitUAVProperties(
         DxilMDHelper::kDxilAtomic64UseTag, m_Ctx));
     MDVals.emplace_back(DxilMDHelper::Uint32ToConstMD((unsigned)true, m_Ctx));
   }
+  // Whether resource is reordercoherent.
+  if (DXIL::CompareVersions(m_ValMajor, m_ValMinor, 1, 9) >= 0 &&
+      UAV.IsReorderCoherent()) {
+    MDVals.emplace_back(DxilMDHelper::Uint32ToConstMD(
+        DxilMDHelper::kDxilReorderCoherentTag, m_Ctx));
+    MDVals.emplace_back(DxilMDHelper::BoolToConstMD(true, m_Ctx));
+  }
 }
 
 void DxilExtraPropertyHelper::LoadUAVProperties(const MDOperand &MDO,
@@ -3147,6 +3154,9 @@ void DxilExtraPropertyHelper::LoadUAVProperties(const MDOperand &MDO,
     case DxilMDHelper::kDxilAtomic64UseTag:
       UAV.SetHasAtomic64Use(DxilMDHelper::ConstMDToBool(MDO));
       break;
+    case DxilMDHelper::kDxilReorderCoherentTag:
+      UAV.SetReorderCoherent(DxilMDHelper::ConstMDToBool(MDO));
+      break;
     default:
       DXASSERT(false, "Unknown resource record tag");
       m_bExtraMetadata = true;
diff --git a/lib/DXIL/DxilResource.cpp b/lib/DXIL/DxilResource.cpp
index 3ab71030bb..0e6f1df877 100644
--- a/lib/DXIL/DxilResource.cpp
+++ b/lib/DXIL/DxilResource.cpp
@@ -25,8 +25,8 @@ namespace hlsl {
 DxilResource::DxilResource()
     : DxilResourceBase(DxilResourceBase::Class::Invalid), m_SampleCount(0),
       m_ElementStride(0), m_SamplerFeedbackType((DXIL::SamplerFeedbackType)0),
-      m_bGloballyCoherent(false), m_bHasCounter(false), m_bROV(false),
-      m_bHasAtomic64Use(false) {}
+      m_bGloballyCoherent(false), m_bReorderCoherent(false),
+      m_bHasCounter(false), m_bROV(false), m_bHasAtomic64Use(false) {}
 
 CompType DxilResource::GetCompType() const { return m_CompType; }
 
@@ -74,6 +74,10 @@ bool DxilResource::IsGloballyCoherent() const { return m_bGloballyCoherent; }
 
 void DxilResource::SetGloballyCoherent(bool b) { m_bGloballyCoherent = b; }
 
+bool DxilResource::IsReorderCoherent() const { return m_bReorderCoherent; }
+
+void DxilResource::SetReorderCoherent(bool b) { m_bReorderCoherent = b; }
+
 bool DxilResource::HasCounter() const { return m_bHasCounter; }
 
 void DxilResource::SetHasCounter(bool b) { m_bHasCounter = b; }
diff --git a/lib/DXIL/DxilResourceProperties.cpp b/lib/DXIL/DxilResourceProperties.cpp
index 2d1bf95014..54ab24f36e 100644
--- a/lib/DXIL/DxilResourceProperties.cpp
+++ b/lib/DXIL/DxilResourceProperties.cpp
@@ -190,6 +190,7 @@ DxilResourceProperties loadPropsFromResourceBase(const DxilResourceBase *Res) {
     RP.Basic.IsUAV = true;
     RP.Basic.ResourceKind = (uint8_t)Res->GetKind();
     RP.Basic.IsGloballyCoherent = UAV->IsGloballyCoherent();
+    RP.Basic.IsReorderCoherent = UAV->IsReorderCoherent();
     RP.Basic.SamplerCmpOrHasCounter = UAV->HasCounter();
     RP.Basic.IsROV = UAV->IsROV();
     SetResProperties(*UAV);
@@ -234,6 +235,8 @@ DxilResourceProperties tryMergeProps(DxilResourceProperties curProps,
         prevProps.Basic.IsGloballyCoherent) {
       curProps.Basic.IsGloballyCoherent = prevProps.Basic.IsGloballyCoherent;
     }
+    if (curProps.Basic.IsReorderCoherent != prevProps.Basic.IsReorderCoherent)
+      curProps.Basic.IsReorderCoherent = prevProps.Basic.IsReorderCoherent;
   }
 
   if (curProps.Basic.ResourceKind == (uint8_t)DXIL::ResourceKind::CBuffer) {
diff --git a/lib/DxilContainer/DxilContainerAssembler.cpp b/lib/DxilContainer/DxilContainerAssembler.cpp
index f0d7bf6d23..48d8872733 100644
--- a/lib/DxilContainer/DxilContainerAssembler.cpp
+++ b/lib/DxilContainer/DxilContainerAssembler.cpp
@@ -1057,6 +1057,9 @@ class DxilRDATWriter : public DxilPartWriter {
       if (pRes->IsGloballyCoherent())
         info.Flags |=
             static_cast<uint32_t>(RDAT::DxilResourceFlag::UAVGloballyCoherent);
+      if (pRes->IsReorderCoherent())
+        info.Flags |=
+            static_cast<uint32_t>(RDAT::DxilResourceFlag::UAVReorderCoherent);
       if (pRes->IsROV())
         info.Flags |= static_cast<uint32_t>(
             RDAT::DxilResourceFlag::UAVRasterizerOrderedView);
diff --git a/lib/DxilPIXPasses/PixPassHelpers.cpp b/lib/DxilPIXPasses/PixPassHelpers.cpp
index 65d9a660cc..c7c99cf763 100644
--- a/lib/DxilPIXPasses/PixPassHelpers.cpp
+++ b/lib/DxilPIXPasses/PixPassHelpers.cpp
@@ -324,6 +324,7 @@ hlsl::DxilResource *CreateGlobalUAVResource(hlsl::DxilModule &DM,
       (unsigned int)-2);   // This is the reserved-for-tools register space
   pUAV->SetSampleCount(0); // This is what compiler generates for a raw UAV
   pUAV->SetGloballyCoherent(false);
+  pUAV->SetReorderCoherent(false);
   pUAV->SetHasCounter(false);
   pUAV->SetCompType(
       CompType::getInvalid()); // This is what compiler generates for a raw UAV
diff --git a/lib/HLSL/DxilCondenseResources.cpp b/lib/HLSL/DxilCondenseResources.cpp
index 82d5e14d00..529c203bdc 100644
--- a/lib/HLSL/DxilCondenseResources.cpp
+++ b/lib/HLSL/DxilCondenseResources.cpp
@@ -2061,7 +2061,8 @@ void DxilLowerCreateHandleForLib::ReplaceResourceUserWithHandle(
     };
 
     // Search all users for update counter
-    bool updateAnnotateHandle = res.IsGloballyCoherent();
+    bool updateAnnotateHandle =
+        res.IsGloballyCoherent() || res.IsReorderCoherent();
     if (!res.HasCounter()) {
       for (User *U : handle->users()) {
         if (IsDxilOp(U, hlsl::OP::OpCode::BufferUpdateCounter)) {
@@ -2321,6 +2322,7 @@ void InitTBuffer(const DxilCBuffer *pSource, DxilResource *pDest) {
   pDest->SetSampleCount(0);
   pDest->SetElementStride(0);
   pDest->SetGloballyCoherent(false);
+  pDest->SetReorderCoherent(false);
   pDest->SetHasCounter(false);
   pDest->SetRW(false);
   pDest->SetROV(false);
diff --git a/lib/HLSL/DxilGenerationPass.cpp b/lib/HLSL/DxilGenerationPass.cpp
index 7d902a4ed7..c3a6ad7dfc 100644
--- a/lib/HLSL/DxilGenerationPass.cpp
+++ b/lib/HLSL/DxilGenerationPass.cpp
@@ -88,6 +88,7 @@ void InitResource(const DxilResource *pSource, DxilResource *pDest) {
   pDest->SetSampleCount(pSource->GetSampleCount());
   pDest->SetElementStride(pSource->GetElementStride());
   pDest->SetGloballyCoherent(pSource->IsGloballyCoherent());
+  pDest->SetReorderCoherent(pSource->IsReorderCoherent());
   pDest->SetHasCounter(pSource->HasCounter());
   pDest->SetRW(pSource->IsRW());
   pDest->SetROV(pSource->IsROV());
diff --git a/lib/HLSL/DxilPatchShaderRecordBindings.cpp b/lib/HLSL/DxilPatchShaderRecordBindings.cpp
index 1873dcbcc4..e07a41a5c0 100644
--- a/lib/HLSL/DxilPatchShaderRecordBindings.cpp
+++ b/lib/HLSL/DxilPatchShaderRecordBindings.cpp
@@ -341,6 +341,7 @@ unsigned int DxilPatchShaderRecordBindings::AddHandle(
 
   if (pHandle) {
     pHandle->SetGloballyCoherent(false);
+    pHandle->SetReorderCoherent(false);
     pHandle->SetHasCounter(false);
     pHandle->SetCompType(CompType::getF32()); // TODO: Need to handle all types
   }
diff --git a/lib/HLSL/HLModule.cpp b/lib/HLSL/HLModule.cpp
index a67877ef3e..bab6e23a30 100644
--- a/lib/HLSL/HLModule.cpp
+++ b/lib/HLSL/HLModule.cpp
@@ -700,6 +700,7 @@ HLModule::AddResourceWithGlobalVariableAndProps(llvm::Constant *GV,
     Res->SetRW(true);
     Res->SetROV(RP.Basic.IsROV);
     Res->SetGloballyCoherent(RP.Basic.IsGloballyCoherent);
+    Res->SetReorderCoherent(RP.Basic.IsReorderCoherent);
     Res->SetHasCounter(RP.Basic.SamplerCmpOrHasCounter);
     Res->SetKind(RK);
     Res->SetGlobalSymbol(GV);
diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h
index ab29e4bde7..3a02824b3a 100644
--- a/tools/clang/include/clang/AST/HlslTypes.h
+++ b/tools/clang/include/clang/AST/HlslTypes.h
@@ -470,6 +470,7 @@ bool IsHLSLUnsigned(clang::QualType type);
 bool IsHLSLMinPrecision(clang::QualType type);
 bool HasHLSLUNormSNorm(clang::QualType type, bool *pIsSNorm = nullptr);
 bool HasHLSLGloballyCoherent(clang::QualType type);
+bool HasHLSLReorderCoherent(clang::QualType type);
 bool IsHLSLInputPatchType(clang::QualType type);
 bool IsHLSLOutputPatchType(clang::QualType type);
 bool IsHLSLPointStreamType(clang::QualType type);
diff --git a/tools/clang/include/clang/AST/Type.h b/tools/clang/include/clang/AST/Type.h
index f393f88ce9..2c96bbc295 100644
--- a/tools/clang/include/clang/AST/Type.h
+++ b/tools/clang/include/clang/AST/Type.h
@@ -3652,7 +3652,8 @@ class AttributedType : public Type, public llvm::FoldingSetNode {
     attr_hlsl_row_major,
     attr_hlsl_column_major,
     attr_hlsl_globallycoherent,
-    // HLSL Change Ends    
+    attr_hlsl_reordercoherent,
+    // HLSL Change Ends
   };
 
 private:
diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td
index 9c117fb3ce..2518423565 100644
--- a/tools/clang/include/clang/Basic/Attr.td
+++ b/tools/clang/include/clang/Basic/Attr.td
@@ -854,6 +854,12 @@ def HLSLGloballyCoherent : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def HLSLReorderCoherent : InheritableAttr {
+  let Spellings = [CXX11<"", "reordercoherent", 2015>];
+  let Subjects = SubjectList<[Var, Function]>;
+  let Documentation = [Undocumented];
+}
+
 def HLSLShader : InheritableAttr {
   let Spellings = [CXX11<"", "shader", 2017>];
   let Args = [StringArgument<"stage">]; // one of compute, pixel, vertex, hull, domain, geometry, node
diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 4f4dc28a4c..21a1b707c6 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7706,8 +7706,10 @@ def err_hlsl_varmodifierna : Error<
   "%0 is not a valid modifier for a %1">;
 def err_hlsl_varmodifierna_decltype : Error<
   "%0 is not a valid modifier for a declaration of type %1">;
-def note_hlsl_globallycoherent_applies_to : Note<
-  "'globallycoherent' can only be applied to UAV or RWDispatchNodeInputRecord objects">;
+def note_hlsl_coherence_applies_to : Note<
+  "'%select{reordercoherent|globallycoherent}0' can only be applied to UAV%select{| or RWDispatchNodeInputRecord}0 objects">;
+def warn_hlsl_gc_implies_rc_attribute : Warning<
+  "attribute 'reordercoherent' implied by 'globallycoherent' in %0. 'reordercoherent' ignored.">;
 def err_hlsl_varmodifiersna : Error<
   "%0 and %1 cannot be used together for a %2">;
 def err_hlsl_vla : Error< // Patterened after err_opencl_vla
@@ -7756,9 +7758,17 @@ def warn_hlsl_semantic_attribute_position_misuse_hint: Warning<
 def warn_hlsl_unary_negate_unsigned : Warning<
   "unary negate of unsigned value is still unsigned">,
   InGroup<Conversion>, DefaultWarn;
-def warn_hlsl_impcast_glc_mismatch : Warning<
-  "implicit conversion from %0 to %1 %select{loses|adds}2 globallycoherent annotation">,
-  InGroup<Conversion>, DefaultWarn;
+def warn_hlsl_impcast_coherence_mismatch : Warning<
+  "implicit conversion from %0 to %1 %select{"
+  "demotes globallycoherent to reordercoherent|"
+  "promotes reordercoherent to globallycoherent|"
+  "loses reordercoherent|"
+  "loses globallycoherent|"
+  "adds reordercoherent|"
+  "adds globallycoherent}2 annotation">,
+  InGroup<Conversion>;
+def warn_hlsl_glc_implies_rdc : Warning<
+  "attribute 'globallycoherent' implies 'reordercoherent'">, InGroup<IgnoredAttributes>;
 def warn_hlsl_narrowing : Warning<
   "conversion from larger type %0 to smaller type %1, possible loss of data">,
   InGroup<Conversion>, DefaultWarn;
diff --git a/tools/clang/include/clang/Basic/TokenKinds.def b/tools/clang/include/clang/Basic/TokenKinds.def
index 2267b12b74..6933c965cf 100644
--- a/tools/clang/include/clang/Basic/TokenKinds.def
+++ b/tools/clang/include/clang/Basic/TokenKinds.def
@@ -508,6 +508,7 @@ KEYWORD(lineadj                     , KEYHLSL)
 KEYWORD(triangle                    , KEYHLSL)
 KEYWORD(triangleadj                 , KEYHLSL)
 KEYWORD(globallycoherent            , KEYHLSL)
+KEYWORD(reordercoherent             , KEYHLSL)
 KEYWORD(interface                   , KEYHLSL)
 KEYWORD(sampler_state               , KEYHLSL)
 KEYWORD(technique                   , KEYHLSL)
diff --git a/tools/clang/include/clang/Sema/Sema.h b/tools/clang/include/clang/Sema/Sema.h
index 42ab80b617..755c7e0755 100644
--- a/tools/clang/include/clang/Sema/Sema.h
+++ b/tools/clang/include/clang/Sema/Sema.h
@@ -3804,9 +3804,8 @@ class Sema {
   bool CheckHLSLUnaryExprOrTypeTraitOperand(QualType ExprType, SourceLocation Loc,
                                             UnaryExprOrTypeTrait ExprKind);
   void DiagnoseHLSLDeclAttr(const Decl *D, const Attr *A);
-  void DiagnoseGloballyCoherentMismatch(const Expr *SrcExpr,
-                                        QualType TargetType,
-                                        SourceLocation Loc);
+  void DiagnoseCoherenceMismatch(const Expr *SrcExpr, QualType TargetType,
+                                 SourceLocation Loc);
   void CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
                              const FunctionProtoType *Proto);
   void DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM,
diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h
index ac6e08b3fa..59d99ab4c5 100644
--- a/tools/clang/include/clang/Sema/SemaHLSL.h
+++ b/tools/clang/include/clang/Sema/SemaHLSL.h
@@ -203,7 +203,8 @@ void Indent(unsigned int Indentation, llvm::raw_ostream &Out);
 void GetHLSLAttributedTypes(clang::Sema *self, clang::QualType type,
                             const clang::AttributedType **ppMatrixOrientation,
                             const clang::AttributedType **ppNorm,
-                            const clang::AttributedType **ppGLC);
+                            const clang::AttributedType **ppGLC,
+                            const clang::AttributedType **ppRDC);
 
 bool IsMatrixType(clang::Sema *self, clang::QualType type);
 bool IsVectorType(clang::Sema *self, clang::QualType type);
diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp
index d853125954..5b19e064a3 100644
--- a/tools/clang/lib/AST/HlslTypes.cpp
+++ b/tools/clang/lib/AST/HlslTypes.cpp
@@ -278,6 +278,18 @@ bool HasHLSLGloballyCoherent(clang::QualType type) {
   return false;
 }
 
+bool HasHLSLReorderCoherent(clang::QualType type) {
+  const AttributedType *AT = type->getAs<AttributedType>();
+  while (AT) {
+    AttributedType::Kind kind = AT->getAttrKind();
+    if (kind == AttributedType::attr_hlsl_reordercoherent)
+      return true;
+    AT = AT->getLocallyUnqualifiedSingleStepDesugaredType()
+             ->getAs<AttributedType>();
+  }
+  return false;
+}
+
 /// Checks whether the pAttributes indicate a parameter is inout or out; if
 /// inout, pIsIn will be set to true.
 bool IsParamAttributedAsOut(clang::AttributeList *pAttributes, bool *pIsIn);
diff --git a/tools/clang/lib/AST/Type.cpp b/tools/clang/lib/AST/Type.cpp
index 06db4747ff..51c20218cc 100644
--- a/tools/clang/lib/AST/Type.cpp
+++ b/tools/clang/lib/AST/Type.cpp
@@ -2945,6 +2945,7 @@ bool AttributedType::isHLSLTypeSpec() const {
   case attr_hlsl_snorm:
   case attr_hlsl_unorm:
   case attr_hlsl_globallycoherent:
+  case attr_hlsl_reordercoherent:
     return true;
   }
   llvm_unreachable("invalid attr kind");
@@ -2975,7 +2976,8 @@ bool AttributedType::isCallingConv() const {
   case attr_hlsl_snorm:
   case attr_hlsl_unorm:
   case attr_hlsl_globallycoherent:
-  // HLSL Change Ends
+  case attr_hlsl_reordercoherent:
+    // HLSL Change Ends
     return false;
 
   case attr_pcs:
diff --git a/tools/clang/lib/AST/TypePrinter.cpp b/tools/clang/lib/AST/TypePrinter.cpp
index 621e1d46a0..ca9e15bfd7 100644
--- a/tools/clang/lib/AST/TypePrinter.cpp
+++ b/tools/clang/lib/AST/TypePrinter.cpp
@@ -1174,6 +1174,9 @@ void TypePrinter::printAttributedBefore(const AttributedType *T,
     case AttributedType::attr_hlsl_globallycoherent:
       OS << "globallycoherent ";
       break;
+    case AttributedType::attr_hlsl_reordercoherent:
+      OS << "reordercoherent ";
+      break;
     default:
       // Only HLSL attribute types are covered.
       break;
diff --git a/tools/clang/lib/CodeGen/CGHLSLMS.cpp b/tools/clang/lib/CodeGen/CGHLSLMS.cpp
index b041db95a7..16ddeaec60 100644
--- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp
+++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp
@@ -300,7 +300,7 @@ class CGMSHLSLRuntime : public CGHLSLRuntime {
                                  clang::QualType QaulTy) override;
   void FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D,
                      llvm::Value *V) override;
-  const clang::Expr *CheckReturnStmtGLCMismatch(
+  const clang::Expr *CheckReturnStmtCoherenceMismatch(
       CodeGenFunction &CGF, const Expr *RV, const clang::ReturnStmt &S,
       clang::QualType FnRetTy,
       const std::function<void(const VarDecl *, llvm::Value *)> &TmpArgMap)
@@ -2803,16 +2803,20 @@ void CGMSHLSLRuntime::MarkPotentialResourceTemp(CodeGenFunction &CGF,
   AddValToPropertyMap(V, QualTy);
 }
 
-static bool isGLCMismatch(QualType Ty0, QualType Ty1, const Expr *SrcExp,
-                          clang::SourceLocation Loc, DiagnosticsEngine &Diags) {
-  if (HasHLSLGloballyCoherent(Ty0) == HasHLSLGloballyCoherent(Ty1))
-    return false;
+static std::pair<bool, bool> getCoherenceMismatch(QualType Ty0, QualType Ty1,
+                                                  const Expr *SrcExp) {
+  std::pair Mismatch{
+      HasHLSLGloballyCoherent(Ty0) != HasHLSLGloballyCoherent(Ty1),
+      HasHLSLReorderCoherent(Ty0) != HasHLSLReorderCoherent(Ty1)};
+  if (!Mismatch.first && !Mismatch.second)
+    return {false, false};
+
   if (const CastExpr *Cast = dyn_cast<CastExpr>(SrcExp)) {
     // Skip flat conversion which is for createHandleFromHeap.
     if (Cast->getCastKind() == CastKind::CK_FlatConversion)
-      return false;
+      return {false, false};
   }
-  return true;
+  return Mismatch;
 }
 
 void CGMSHLSLRuntime::FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D,
@@ -2829,19 +2833,23 @@ void CGMSHLSLRuntime::FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D,
   AddValToPropertyMap(V, D.getType());
 
   if (D.hasInit()) {
-    if (isGLCMismatch(D.getType(), D.getInit()->getType(), D.getInit(),
-                      D.getLocation(), CGM.getDiags())) {
-      objectProperties.updateGLC(V);
+    auto [glcMismatch, rdcMismatch] =
+        getCoherenceMismatch(D.getType(), D.getInit()->getType(), D.getInit());
+
+    if (glcMismatch || rdcMismatch) {
+      objectProperties.updateCoherence(V, glcMismatch, rdcMismatch);
     }
   }
 }
 
-const clang::Expr *CGMSHLSLRuntime::CheckReturnStmtGLCMismatch(
+const clang::Expr *CGMSHLSLRuntime::CheckReturnStmtCoherenceMismatch(
     CodeGenFunction &CGF, const Expr *RV, const clang::ReturnStmt &S,
     clang::QualType FnRetTy,
     const std::function<void(const VarDecl *, llvm::Value *)> &TmpArgMap) {
-  if (!isGLCMismatch(RV->getType(), FnRetTy, RV, S.getReturnLoc(),
-                     CGM.getDiags())) {
+  auto [glcMismatch, rdcMismatch] =
+      getCoherenceMismatch(RV->getType(), FnRetTy, RV);
+
+  if (!glcMismatch && !rdcMismatch) {
     return RV;
   }
   const FunctionDecl *FD = cast<FunctionDecl>(CGF.CurFuncDecl);
@@ -2913,10 +2921,11 @@ void CGMSHLSLRuntime::addResource(Decl *D) {
     if (VD->hasInit() && resClass != DXIL::ResourceClass::Invalid) {
 
       if (resClass == DXIL::ResourceClass::UAV) {
-        if (isGLCMismatch(VD->getType(), VD->getInit()->getType(),
-                          VD->getInit(), D->getLocation(), CGM.getDiags())) {
+        auto [glcMismatch, rdcMismatch] = getCoherenceMismatch(
+            VD->getType(), VD->getInit()->getType(), VD->getInit());
+        if (glcMismatch || rdcMismatch) {
           GlobalVariable *GV = cast<GlobalVariable>(CGM.GetAddrOfGlobalVar(VD));
-          objectProperties.updateGLC(GV);
+          objectProperties.updateCoherence(GV, glcMismatch, rdcMismatch);
         }
       }
       return;
@@ -3463,8 +3472,11 @@ bool CGMSHLSLRuntime::SetUAVSRV(SourceLocation loc,
       }
     }
   }
+  // 'globallycoherent' implies 'reordercoherent'
   if (HasHLSLGloballyCoherent(QualTy)) {
     hlslRes->SetGloballyCoherent(true);
+  } else if (HasHLSLReorderCoherent(QualTy)) {
+    hlslRes->SetReorderCoherent(true);
   }
   if (resClass == hlsl::DxilResourceBase::Class::SRV) {
     hlslRes->SetRW(false);
@@ -3497,6 +3509,8 @@ uint32_t CGMSHLSLRuntime::AddUAVSRV(VarDecl *decl,
   if (decl->hasAttr<HLSLGloballyCoherentAttr>()) {
     hlslRes->SetGloballyCoherent(true);
   }
+  if (decl->hasAttr<HLSLReorderCoherentAttr>())
+    hlslRes->SetReorderCoherent(true);
 
   if (!SetUAVSRV(decl->getLocation(), resClass, hlslRes.get(), VarTy))
     return 0;
@@ -6140,8 +6154,9 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
     bool isObject = dxilutil::IsHLSLObjectType(CGF.ConvertTypeForMem(ParamTy));
     bool bAnnotResource = false;
     if (isObject) {
-      if (isGLCMismatch(Param->getType(), Arg->getType(), Arg,
-                        Arg->getExprLoc(), CGM.getDiags())) {
+      auto [glcMismatch, rdcMismatch] =
+          getCoherenceMismatch(Param->getType(), Arg->getType(), Arg);
+      if (glcMismatch || rdcMismatch) {
         // NOTE: if function is noinline, resource parameter is not allowed.
         // Here assume function will be always inlined.
         // This can only take care resource as parameter. When parameter is
diff --git a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
index 532ec01458..13edadf9df 100644
--- a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
+++ b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
@@ -4034,12 +4034,17 @@ hlsl::DxilResourceProperties DxilObjectProperties::GetResource(llvm::Value *V) {
     return it->second;
   return DxilResourceProperties();
 }
-void DxilObjectProperties::updateGLC(llvm::Value *V) {
+void DxilObjectProperties::updateCoherence(llvm::Value *V,
+                                           bool updateGloballyCoherent,
+                                           bool updateReorderCoherent) {
   auto it = resMap.find(V);
   if (it == resMap.end())
     return;
 
-  it->second.Basic.IsGloballyCoherent ^= 1;
+  if (updateGloballyCoherent)
+    it->second.Basic.IsGloballyCoherent ^= 1;
+  if (updateReorderCoherent)
+    it->second.Basic.IsReorderCoherent ^= 1;
 }
 
 } // namespace CGHLSLMSHelper
diff --git a/tools/clang/lib/CodeGen/CGHLSLMSHelper.h b/tools/clang/lib/CodeGen/CGHLSLMSHelper.h
index 9058ed4f6d..7fca5d4025 100644
--- a/tools/clang/lib/CodeGen/CGHLSLMSHelper.h
+++ b/tools/clang/lib/CodeGen/CGHLSLMSHelper.h
@@ -159,7 +159,8 @@ struct DxilObjectProperties {
   bool AddResource(llvm::Value *V, const hlsl::DxilResourceProperties &RP);
   bool IsResource(llvm::Value *V);
   hlsl::DxilResourceProperties GetResource(llvm::Value *V);
-  void updateGLC(llvm::Value *V);
+  void updateCoherence(llvm::Value *V, bool updateGloballyCoherent,
+                       bool updateReorderCoherent);
 
   // MapVector for deterministic iteration order.
   llvm::MapVector<llvm::Value *, hlsl::DxilResourceProperties> resMap;
diff --git a/tools/clang/lib/CodeGen/CGHLSLRuntime.h b/tools/clang/lib/CodeGen/CGHLSLRuntime.h
index 3e27951e86..b100d93579 100644
--- a/tools/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/tools/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -146,7 +146,7 @@ class CGHLSLRuntime {
 
   virtual void FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D,
                              llvm::Value *V) = 0;
-  virtual const clang::Expr *CheckReturnStmtGLCMismatch(
+  virtual const clang::Expr *CheckReturnStmtCoherenceMismatch(
       CodeGenFunction &CGF, const clang::Expr *RV, const clang::ReturnStmt &S,
       clang::QualType FnRetTy,
       const std::function<void(const VarDecl *, llvm::Value *)> &TmpArgMap) = 0;
diff --git a/tools/clang/lib/CodeGen/CGStmt.cpp b/tools/clang/lib/CodeGen/CGStmt.cpp
index 340550dbdd..1b1f593271 100644
--- a/tools/clang/lib/CodeGen/CGStmt.cpp
+++ b/tools/clang/lib/CodeGen/CGStmt.cpp
@@ -1178,8 +1178,8 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
       auto MapTemp = [&](const VarDecl *LocalVD, llvm::Value *TmpArg) {
         OutParamScope.addTemp(LocalVD, TmpArg);
       };
-      RV = CGM.getHLSLRuntime().CheckReturnStmtGLCMismatch(*this, RV, S,
-                                                           FnRetTy, MapTemp);
+      RV = CGM.getHLSLRuntime().CheckReturnStmtCoherenceMismatch(
+          *this, RV, S, FnRetTy, MapTemp);
       // HLSL Change Ends.
       CharUnits Alignment = getContext().getTypeAlignInChars(RV->getType());
       EmitAggExpr(RV, AggValueSlot::forAddr(ReturnValue, Alignment,
diff --git a/tools/clang/lib/Parse/ParseDecl.cpp b/tools/clang/lib/Parse/ParseDecl.cpp
index 4ca80fcec6..59be41a484 100644
--- a/tools/clang/lib/Parse/ParseDecl.cpp
+++ b/tools/clang/lib/Parse/ParseDecl.cpp
@@ -3877,6 +3877,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
     case tok::kw_precise:
     case tok::kw_sample:
     case tok::kw_globallycoherent:
+    case tok::kw_reordercoherent:
     case tok::kw_center:
     case tok::kw_indices:
     case tok::kw_vertices:
@@ -5321,6 +5322,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
   case tok::kw_shared:
   case tok::kw_groupshared:
   case tok::kw_globallycoherent:
+  case tok::kw_reordercoherent:
   case tok::kw_uniform:
   case tok::kw_in:
   case tok::kw_out:
@@ -6125,6 +6127,7 @@ void Parser::ParseDirectDeclarator(Declarator &D) {
       switch (Tok.getKind()) {
       case tok::kw_center:
       case tok::kw_globallycoherent:
+      case tok::kw_reordercoherent:
       case tok::kw_precise:
       case tok::kw_sample:
       case tok::kw_indices:
diff --git a/tools/clang/lib/Parse/ParseExpr.cpp b/tools/clang/lib/Parse/ParseExpr.cpp
index 745b506468..8f51dd4b6c 100644
--- a/tools/clang/lib/Parse/ParseExpr.cpp
+++ b/tools/clang/lib/Parse/ParseExpr.cpp
@@ -795,6 +795,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
   case tok::kw_precise:
   case tok::kw_sample:
   case tok::kw_globallycoherent:
+  case tok::kw_reordercoherent:
   case tok::kw_center:
   case tok::kw_indices:
   case tok::kw_vertices:
@@ -1740,6 +1741,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
         switch (auto tk = Tok.getKind()) {
         case tok::kw_center:
         case tok::kw_globallycoherent:
+        case tok::kw_reordercoherent:
         case tok::kw_precise:
         case tok::kw_sample:
         case tok::kw_indices:
diff --git a/tools/clang/lib/Parse/ParseStmt.cpp b/tools/clang/lib/Parse/ParseStmt.cpp
index 95dea4ab2c..6fa33d7108 100644
--- a/tools/clang/lib/Parse/ParseStmt.cpp
+++ b/tools/clang/lib/Parse/ParseStmt.cpp
@@ -179,6 +179,7 @@ Parser::ParseStatementOrDeclarationAfterAttributes(StmtVector &Stmts,
   case tok::kw_precise:
   case tok::kw_sample:
   case tok::kw_globallycoherent:
+  case tok::kw_reordercoherent:
   case tok::kw_center:
   case tok::kw_indices:
   case tok::kw_vertices:
diff --git a/tools/clang/lib/Parse/ParseTentative.cpp b/tools/clang/lib/Parse/ParseTentative.cpp
index 29c6e49770..6bdef3a547 100644
--- a/tools/clang/lib/Parse/ParseTentative.cpp
+++ b/tools/clang/lib/Parse/ParseTentative.cpp
@@ -1275,6 +1275,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
   case tok::kw_precise:
   case tok::kw_center:
   case tok::kw_globallycoherent:
+  case tok::kw_reordercoherent:
   case tok::kw_indices:
   case tok::kw_vertices:
   case tok::kw_primitives:
diff --git a/tools/clang/lib/Sema/SemaChecking.cpp b/tools/clang/lib/Sema/SemaChecking.cpp
index 2fde458499..9e64732336 100644
--- a/tools/clang/lib/Sema/SemaChecking.cpp
+++ b/tools/clang/lib/Sema/SemaChecking.cpp
@@ -6772,8 +6772,8 @@ static void AnalyzeAssignment(Sema &S, BinaryOperator *E) {
   // Just recurse on the LHS.
   AnalyzeImplicitConversions(S, E->getLHS(), E->getOperatorLoc());
 
-  S.DiagnoseGloballyCoherentMismatch(E->getRHS(), E->getLHS()->getType(),
-                                     E->getOperatorLoc());
+  S.DiagnoseCoherenceMismatch(E->getRHS(), E->getLHS()->getType(),
+                              E->getOperatorLoc());
 
   // We want to recurse on the RHS as normal unless we're assigning to
   // a bitfield.
@@ -6887,7 +6887,7 @@ void CheckImplicitArgumentConversions(Sema &S, CallExpr *TheCall,
          ++ArgIdx, ++ParmIdx) {
       ParmVarDecl *PD = FD->getParamDecl(ParmIdx);
       Expr *CurrA = TheCall->getArg(ArgIdx);
-      S.DiagnoseGloballyCoherentMismatch(CurrA, PD->getType(), CC);
+      S.DiagnoseCoherenceMismatch(CurrA, PD->getType(), CC);
     }
   }
   // HLSL CHange End
diff --git a/tools/clang/lib/Sema/SemaDecl.cpp b/tools/clang/lib/Sema/SemaDecl.cpp
index 06bdeb491a..e09bf4623c 100644
--- a/tools/clang/lib/Sema/SemaDecl.cpp
+++ b/tools/clang/lib/Sema/SemaDecl.cpp
@@ -9167,9 +9167,10 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
 
   // HLSL Change begin
   // When initializing an HLSL resource type we should diagnose mismatches in
-  // globally coherent annotations _unless_ the source is a dynamic resource
-  // placeholder type where we safely infer the globallycoherent annotaiton.
-  DiagnoseGloballyCoherentMismatch(Init, DclT, Init->getExprLoc());
+  // globally and reorder coherent annotations _unless_ the source is a dynamic
+  // resource placeholder type where we safely infer the coherence
+  // annotations.
+  DiagnoseCoherenceMismatch(Init, DclT, Init->getExprLoc());
   // HLSL Change end
   
   // Expressions default to 'id' when we're in a debugger
diff --git a/tools/clang/lib/Sema/SemaDeclAttr.cpp b/tools/clang/lib/Sema/SemaDeclAttr.cpp
index 723900cd07..085874a0ed 100644
--- a/tools/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/tools/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5105,6 +5105,17 @@ void Sema::ProcessDeclAttributeList(Scope *S, Decl *D,
   for (const AttributeList* l = AttrList; l; l = l->getNext())
     ProcessDeclAttribute(*this, S, D, *l, IncludeCXX11Attributes);
 
+  // HLSL Change Starts - Warn of redundant reorder / globally coherent
+  // attributes
+  if (D->hasAttr<HLSLGloballyCoherentAttr>() &&
+      D->hasAttr<HLSLReorderCoherentAttr>()) {
+    Diag(AttrList->getLoc(), diag::warn_hlsl_gc_implies_rc_attribute)
+        << cast<NamedDecl>(D);
+    D->dropAttr<HLSLReorderCoherentAttr>();
+    return;
+  }
+  // HLSL Change Ends
+
   // FIXME: We should be able to handle these cases in TableGen.
   // GCC accepts
   // static int a9 __attribute__((weakref));
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 72dd6d41aa..2bd4462f2f 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -13674,8 +13674,9 @@ ValidateMaxRecordsSharedWithAttributes(Sema &S, Decl *D,
 
 void Sema::DiagnoseHLSLDeclAttr(const Decl *D, const Attr *A) {
   HLSLExternalSource *ExtSource = HLSLExternalSource::FromSema(this);
-  if (const HLSLGloballyCoherentAttr *HLSLGCAttr =
-          dyn_cast<HLSLGloballyCoherentAttr>(A)) {
+  const bool IsGCAttr = isa<HLSLGloballyCoherentAttr>(A);
+  const bool IsRCAttr = isa<HLSLReorderCoherentAttr>(A);
+  if (IsGCAttr || IsRCAttr) {
     const ValueDecl *TD = cast<ValueDecl>(D);
     if (TD->getType()->isDependentType())
       return;
@@ -13684,23 +13685,25 @@ void Sema::DiagnoseHLSLDeclAttr(const Decl *D, const Attr *A) {
       DeclType = FD->getReturnType();
     while (DeclType->isArrayType())
       DeclType = QualType(DeclType->getArrayElementTypeNoTypeQual(), 0);
+    const bool IsAllowedNodeIO =
+        IsGCAttr &&
+        GetNodeIOType(DeclType) == DXIL::NodeIOKind::RWDispatchNodeInputRecord;
+    const bool IsUAV =
+        hlsl::GetResourceClassForType(getASTContext(), DeclType) ==
+        hlsl::DXIL::ResourceClass::UAV;
     if (ExtSource->GetTypeObjectKind(DeclType) != AR_TOBJ_OBJECT ||
-        (hlsl::GetResourceClassForType(getASTContext(), DeclType) !=
-             hlsl::DXIL::ResourceClass::UAV &&
-         GetNodeIOType(DeclType) !=
-             DXIL::NodeIOKind::RWDispatchNodeInputRecord)) {
+        (!IsUAV && !IsAllowedNodeIO)) {
       Diag(A->getLocation(), diag::err_hlsl_varmodifierna_decltype)
           << A << DeclType->getCanonicalTypeUnqualified() << A->getRange();
-      Diag(A->getLocation(), diag::note_hlsl_globallycoherent_applies_to)
-          << A << A->getRange();
+      Diag(A->getLocation(), diag::note_hlsl_coherence_applies_to)
+          << (int)IsGCAttr << A << A->getRange();
     }
     return;
   }
 }
 
-void Sema::DiagnoseGloballyCoherentMismatch(const Expr *SrcExpr,
-                                            QualType TargetType,
-                                            SourceLocation Loc) {
+void Sema::DiagnoseCoherenceMismatch(const Expr *SrcExpr, QualType TargetType,
+                                     SourceLocation Loc) {
   QualType SrcTy = SrcExpr->getType();
   QualType DstTy = TargetType;
   if (SrcTy->isArrayType() && DstTy->isArrayType()) {
@@ -13712,9 +13715,39 @@ void Sema::DiagnoseGloballyCoherentMismatch(const Expr *SrcExpr,
       GetNodeIOType(DstTy) == DXIL::NodeIOKind::RWDispatchNodeInputRecord) {
     bool SrcGL = hlsl::HasHLSLGloballyCoherent(SrcTy);
     bool DstGL = hlsl::HasHLSLGloballyCoherent(DstTy);
-    if (SrcGL != DstGL)
-      Diag(Loc, diag::warn_hlsl_impcast_glc_mismatch)
-          << SrcExpr->getType() << TargetType << /*loses|adds*/ DstGL;
+    // 'reordercoherent' attribute dropped earlier in presence of
+    // 'globallycoherent'
+    bool SrcRD = hlsl::HasHLSLReorderCoherent(SrcTy);
+    bool DstRD = hlsl::HasHLSLReorderCoherent(DstTy);
+
+    enum {
+      NoMismatch = -1,
+      DemoteToRD = 0,
+      PromoteToGL = 1,
+      LosesRD = 2,
+      LosesGL = 3,
+      AddsRD = 4,
+      AddsGL = 5
+    } MismatchType = NoMismatch;
+
+    if (SrcGL && DstRD)
+      MismatchType = DemoteToRD;
+    else if (SrcRD && DstGL)
+      MismatchType = PromoteToGL;
+    else if (SrcRD && !DstRD)
+      MismatchType = LosesRD;
+    else if (SrcGL && !DstGL)
+      MismatchType = LosesGL;
+    else if (!SrcRD && DstRD)
+      MismatchType = AddsRD;
+    else if (!SrcGL && DstGL)
+      MismatchType = AddsGL;
+
+    if (MismatchType == NoMismatch)
+      return;
+
+    Diag(Loc, diag::warn_hlsl_impcast_coherence_mismatch)
+        << SrcExpr->getType() << TargetType << MismatchType;
   }
 }
 
@@ -13863,6 +13896,10 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A,
     declAttr = ::new (S.Context) HLSLGloballyCoherentAttr(
         A.getRange(), S.Context, A.getAttributeSpellingListIndex());
     break;
+  case AttributeList::AT_HLSLReorderCoherent:
+    declAttr = ::new (S.Context) HLSLReorderCoherentAttr(
+        A.getRange(), S.Context, A.getAttributeSpellingListIndex());
+    break;
   case AttributeList::AT_HLSLIndices:
     declAttr = ::new (S.Context) HLSLIndicesAttr(
         A.getRange(), S.Context, A.getAttributeSpellingListIndex());
@@ -14927,6 +14964,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
       }
       break;
     case AttributeList::AT_HLSLGloballyCoherent: // Handled elsewhere
+    case AttributeList::AT_HLSLReorderCoherent:  // Handled elsewhere
       break;
     case AttributeList::AT_HLSLUniform:
       if (!(isGlobal || isParameter)) {
@@ -15322,15 +15360,17 @@ static QualType getUnderlyingType(QualType Type) {
 void hlsl::GetHLSLAttributedTypes(
     clang::Sema *self, clang::QualType type,
     const clang::AttributedType **ppMatrixOrientation,
-    const clang::AttributedType **ppNorm, const clang::AttributedType **ppGLC) {
+    const clang::AttributedType **ppNorm, const clang::AttributedType **ppGLC,
+    const clang::AttributedType **ppRDC) {
   AssignOpt<const clang::AttributedType *>(nullptr, ppMatrixOrientation);
   AssignOpt<const clang::AttributedType *>(nullptr, ppNorm);
   AssignOpt<const clang::AttributedType *>(nullptr, ppGLC);
+  AssignOpt<const clang::AttributedType *>(nullptr, ppRDC);
 
   // Note: we clear output pointers once set so we can stop searching
   QualType Desugared = getUnderlyingType(type);
   const AttributedType *AT = dyn_cast<AttributedType>(Desugared);
-  while (AT && (ppMatrixOrientation || ppNorm || ppGLC)) {
+  while (AT && (ppMatrixOrientation || ppNorm || ppGLC || ppRDC)) {
     AttributedType::Kind Kind = AT->getAttrKind();
 
     if (Kind == AttributedType::attr_hlsl_row_major ||
@@ -15350,6 +15390,11 @@ void hlsl::GetHLSLAttributedTypes(
         *ppGLC = AT;
         ppGLC = nullptr;
       }
+    } else if (Kind == AttributedType::attr_hlsl_reordercoherent) {
+      if (ppRDC) {
+        *ppRDC = AT;
+        ppRDC = nullptr;
+      }
     }
 
     Desugared = getUnderlyingType(AT->getEquivalentType());
@@ -15734,6 +15779,10 @@ void hlsl::CustomPrintHLSLAttr(const clang::Attr *A, llvm::raw_ostream &Out,
     Out << "globallycoherent ";
     break;
 
+  case clang::attr::HLSLReorderCoherent:
+    Out << "reordercoherent ";
+    break;
+
   case clang::attr::HLSLIndices:
     Out << "indices ";
     break;
@@ -15941,6 +15990,7 @@ bool hlsl::IsHLSLAttr(clang::attr::Kind AttrKind) {
   case clang::attr::HLSLNodeLocalRootArgumentsTableIndex:
   case clang::attr::HLSLNodeShareInputOf:
   case clang::attr::HLSLNodeTrackRWInputSharing:
+  case clang::attr::HLSLReorderCoherent:
   case clang::attr::VKBinding:
   case clang::attr::VKBuiltIn:
   case clang::attr::VKConstantId:
diff --git a/tools/clang/lib/Sema/SemaStmt.cpp b/tools/clang/lib/Sema/SemaStmt.cpp
index ce1e55bb0e..4e47a68888 100644
--- a/tools/clang/lib/Sema/SemaStmt.cpp
+++ b/tools/clang/lib/Sema/SemaStmt.cpp
@@ -3184,7 +3184,7 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
 
   // HLSL Change begin - Diagnose mismatched globallycoherent attrs on return.
   if (RetValExp)
-    DiagnoseGloballyCoherentMismatch(RetValExp, FnRetType, ReturnLoc);
+    DiagnoseCoherenceMismatch(RetValExp, FnRetType, ReturnLoc);
   // HLSL Change end
 
   bool HasDependentReturnType = FnRetType->isDependentType();
diff --git a/tools/clang/lib/Sema/SemaType.cpp b/tools/clang/lib/Sema/SemaType.cpp
index 5a8f9d13b3..ff3b0dbac7 100644
--- a/tools/clang/lib/Sema/SemaType.cpp
+++ b/tools/clang/lib/Sema/SemaType.cpp
@@ -4528,7 +4528,9 @@ static AttributeList::Kind getAttrListKind(AttributedType::Kind kind) {
     return AttributeList::AT_HLSLColumnMajor;
   case AttributedType::attr_hlsl_globallycoherent:
     return AttributeList::AT_HLSLGloballyCoherent;
-  // HLSL Change Ends
+  case AttributedType::attr_hlsl_reordercoherent:
+    return AttributeList::AT_HLSLReorderCoherent;
+    // HLSL Change Ends
   }
   llvm_unreachable("unexpected attribute kind!");
 }
@@ -5771,6 +5773,7 @@ static bool isHLSLTypeAttr(AttributeList::Kind Kind) {
   case AttributeList::AT_HLSLSnorm:
   case AttributeList::AT_HLSLUnorm:
   case AttributeList::AT_HLSLGloballyCoherent:
+  case AttributeList::AT_HLSLReorderCoherent:
     return true;
   default:
     // Only meant to catch attr handled by handleHLSLTypeAttr, ignore the rest
@@ -5802,7 +5805,9 @@ static bool handleHLSLTypeAttr(TypeProcessingState &State,
   const AttributedType *pMatrixOrientation = nullptr;
   const AttributedType *pNorm = nullptr;
   const AttributedType *pGLC = nullptr;
-  hlsl::GetHLSLAttributedTypes(&S, Type, &pMatrixOrientation, &pNorm, &pGLC);
+  const AttributedType *pRDC = nullptr;
+  hlsl::GetHLSLAttributedTypes(&S, Type, &pMatrixOrientation, &pNorm, &pGLC,
+                               &pRDC);
 
   if (pMatrixOrientation &&
     (Kind == AttributeList::AT_HLSLColumnMajor ||
@@ -5836,13 +5841,18 @@ static bool handleHLSLTypeAttr(TypeProcessingState &State,
     return true;
   }
 
-  if (pGLC && Kind == AttributeList::AT_HLSLGloballyCoherent) {
-    AttributedType::Kind CurAttrKind = pGLC->getAttrKind();
-    if (Kind == getAttrListKind(CurAttrKind)) {
-      S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute_exact)
-          << Attr.getName() << Attr.getRange();
-    }
-  }
+  const bool hasGLC = pGLC;
+  const bool addsGLC = Kind == AttributeList::AT_HLSLGloballyCoherent;
+  const bool hasRDC = pRDC;
+  const bool addsRDC = Kind == AttributeList::AT_HLSLReorderCoherent;
+
+  const bool hasMismatchingAttrs = hasGLC && hasRDC;
+  const bool addsMismatchingAttr = (hasGLC && addsRDC) || (hasRDC && addsGLC);
+  if ((hasGLC && addsGLC) || (hasRDC && addsRDC))
+    S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute_exact)
+        << Attr.getName() << Attr.getRange();
+  else if (!hasMismatchingAttrs && addsMismatchingAttr)
+    S.Diag(Attr.getLoc(), diag::warn_hlsl_glc_implies_rdc) << Attr.getRange();
 
   AttributedType::Kind TAK;
   switch (Kind) {
@@ -5853,6 +5863,9 @@ static bool handleHLSLTypeAttr(TypeProcessingState &State,
   case AttributeList::AT_HLSLSnorm:       TAK = AttributedType::attr_hlsl_snorm; break;
   case AttributeList::AT_HLSLGloballyCoherent:
     TAK = AttributedType::attr_hlsl_globallycoherent; break;
+  case AttributeList::AT_HLSLReorderCoherent:
+    TAK = AttributedType::attr_hlsl_reordercoherent;
+    break;
   }
 
   Type = S.Context.getAttributedType(TAK, Type, Type);
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_for_arg.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_for_arg.hlsl
new file mode 100644
index 0000000000..d92ce7b9ca
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_for_arg.hlsl
@@ -0,0 +1,19 @@
+// RUN: %dxc -E main -T lib_6_9 %s | FileCheck %s
+// REQUIRES: dxil-1-9
+
+// CHECK: %[[uH:[^ ]+]] = load %dx.types.Handle, %dx.types.Handle* @"\01?u@@3V?$RWBuffer@M@@A", align 4
+// CHECK: %[[uLIBH:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %[[uH]]) ; CreateHandleForLib(Resource)
+// CHECK: %[[uANNOT:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[uLIBH]], %dx.types.ResourceProperties { i32 69642, i32 265 }) ; AnnotateHandle(res,props) resource: reordercoherent RWTypedBuffer<F32>
+// CHECK: %{{[^ ]+}} = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle %[[uANNOT]], i32 0, i32 undef) ; BufferLoad(srv,index,wot)
+
+RWBuffer<float> OutBuf : register(u1);
+reordercoherent RWBuffer<float> u : register(u2);
+
+float read(RWBuffer<float> buf) {
+  return buf[0];
+}
+
+[shader("raygeneration")]
+void main() {
+  OutBuf[0] = read(u);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav.hlsl
new file mode 100644
index 0000000000..ea47281d0d
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav.hlsl
@@ -0,0 +1,17 @@
+// RUN: %dxc -E main -T lib_6_9 %s | FileCheck %s
+// REQUIRES: dxil-1-9
+
+// CHECK: !"uav1", {{.+}}, ![[TAGMD:[0-9]+]]}
+// CHECK: ![[TAGMD]] = !{i32 0, i32 9, i32 4, i1 true
+
+reordercoherent RWTexture1D<float4> uav1 : register(u3);
+RWBuffer<float4> uav2;
+
+[shader("raygeneration")]
+void main()
+{
+  reordercoherent  RWTexture1D<float4> uav3 = uav1;
+  uav3[0] = 5;
+  uav1[0] = 2;
+  uav2[1] = 3;
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav_array.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav_array.hlsl
new file mode 100644
index 0000000000..8b60c0cd67
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav_array.hlsl
@@ -0,0 +1,16 @@
+// RUN: %dxc -E main -T lib_6_9 %s | FileCheck %s
+// REQUIRES: dxil-1-9
+
+// Make sure uav array can have reordercoherent.
+// CHECK: !{{.*}} = !{i32 1, [12 x %"class.RWTexture2D<float>"]* bitcast ([12 x %dx.types.Handle]* @"\01?tex@@3PAV?$RWTexture2D@M@@A" to [12 x %"class.RWTexture2D<float>"]*), !"tex", i32 0, i32 2, i32 12, i32 2, i1 false, i1 false, i1 false, ![[TAGMD:.*]]}
+// CHECK: ![[TAGMD]] = !{i32 0, i32 9, i32 4, i1 true}
+
+
+RWBuffer<float> OutBuf: register(u1);
+reordercoherent RWTexture2D<float> tex[12] : register(u2);
+
+[shader("raygeneration")]
+void main() {
+  int2 c = DispatchRaysIndex().xy;
+  OutBuf[0] = tex[0][c];
+}
diff --git a/tools/clang/test/SemaHLSL/attributes/reordercoherent_ast.hlsl b/tools/clang/test/SemaHLSL/attributes/reordercoherent_ast.hlsl
new file mode 100644
index 0000000000..53366de828
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/attributes/reordercoherent_ast.hlsl
@@ -0,0 +1,17 @@
+// RUN: %dxc -T lib_6_9 -ast-dump %s | FileCheck %s
+// REQUIRES: dxil-1-9
+
+// CHECK: |-VarDecl {{.*}} used uav1 'reordercoherent RWTexture1D<float4>':'RWTexture1D<vector<float, 4> >'
+// CHECK-NEXT: | |-HLSLReorderCoherentAttr
+reordercoherent RWTexture1D<float4> uav1 : register(u3);
+RWBuffer<float4> uav2;
+
+[shader("raygeneration")]
+void main()
+{
+ // CHECK: |   `-VarDecl {{.*}} uav3 'reordercoherent RWTexture1D<float4>':'RWTexture1D<vector<float, 4> >' cinit
+ // CHECK-NEXT: |     |
+ // CHECK-NEXT: |     |
+ // CHECK-NEXT: |     `-HLSLReorderCoherentAttr
+  reordercoherent  RWTexture1D<float4> uav3 = uav1;
+}
diff --git a/tools/clang/test/SemaHLSL/reordercoherent-globallycoherent-mismatch.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-globallycoherent-mismatch.hlsl
new file mode 100644
index 0000000000..0192154b78
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/reordercoherent-globallycoherent-mismatch.hlsl
@@ -0,0 +1,96 @@
+// RUN: %dxc -Tlib_6_9 -verify %s
+
+RWByteAddressBuffer NonCBuf;
+globallycoherent RWByteAddressBuffer GCBuf;
+reordercoherent RWByteAddressBuffer RCBuf;
+// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}}
+// expected-warning@+1{{attribute 'reordercoherent' implied by 'globallycoherent' in 'RCGCBuf'. 'reordercoherent' ignored.}}
+reordercoherent globallycoherent RWByteAddressBuffer RCGCBuf;
+
+globallycoherent RWByteAddressBuffer getPromoteRC() {
+  return RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}}
+}
+
+reordercoherent RWByteAddressBuffer getDemoteGC() {
+  return GCBuf; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}}
+}
+
+globallycoherent RWByteAddressBuffer GCBufArr[2];
+reordercoherent RWByteAddressBuffer RCBufArr[2];
+
+reordercoherent RWByteAddressBuffer RCBufMultiArr[2][2];
+globallycoherent RWByteAddressBuffer GCBufMultiArr[2][2];
+
+globallycoherent RWByteAddressBuffer getPromoteRCArr() {
+  return RCBufArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}}
+}
+
+reordercoherent RWByteAddressBuffer getDemoteGCArr() {
+  return GCBufArr[0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}}
+}
+
+globallycoherent RWByteAddressBuffer getPromoteRCMultiArr() {
+  return RCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}}
+}
+
+reordercoherent RWByteAddressBuffer getDemoteGCMultiArr() {
+  return GCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}}
+}
+
+void NonGCStore(RWByteAddressBuffer Buf) {
+  Buf.Store(0, 0);
+}
+
+void RCStore(reordercoherent RWByteAddressBuffer Buf) {
+  Buf.Store(0, 0);
+}
+
+void GCStore(globallycoherent RWByteAddressBuffer Buf) {
+  Buf.Store(0, 0);
+}
+
+void getPromoteToGCParam(inout globallycoherent RWByteAddressBuffer PGCBuf) {
+  PGCBuf = RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer __restrict' promotes reordercoherent to globallycoherent annotation}}
+}
+void getDemoteToRCParam(inout reordercoherent RWByteAddressBuffer PRCBuf) {
+  PRCBuf = GCBuf; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer __restrict' demotes globallycoherent to reordercoherent annotation}}
+}
+
+static reordercoherent RWByteAddressBuffer SRCDemoteBufArr[2] = GCBufArr; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' demotes globallycoherent to reordercoherent annotation}}
+static reordercoherent RWByteAddressBuffer SRCDemoteBufMultiArr0[2] = GCBufMultiArr[0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' demotes globallycoherent to reordercoherent annotation}}
+static reordercoherent RWByteAddressBuffer SRCDemoteBufMultiArr1[2][2] = GCBufMultiArr; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2][2]' to 'reordercoherent RWByteAddressBuffer [2][2]' demotes globallycoherent to reordercoherent annotation}}
+
+static globallycoherent RWByteAddressBuffer SRCPromoteBufArr[2] = RCBufArr; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2]' to 'globallycoherent RWByteAddressBuffer [2]' promotes reordercoherent to globallycoherent annotation}}
+static globallycoherent RWByteAddressBuffer SRCPromoteBufMultiArr0[2] = RCBufMultiArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2]' to 'globallycoherent RWByteAddressBuffer [2]' promotes reordercoherent to globallycoherent annotation}}
+static globallycoherent RWByteAddressBuffer SRCPromoteBufMultiArr1[2][2] = RCBufMultiArr; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2][2]' to 'globallycoherent RWByteAddressBuffer [2][2]' promotes reordercoherent to globallycoherent annotation}}
+
+void getPromoteToGCParamArr(inout globallycoherent RWByteAddressBuffer PGCBufArr[2]) {
+  PGCBufArr = RCBufArr; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2]' to 'globallycoherent RWByteAddressBuffer __restrict[2]' promotes reordercoherent to globallycoherent annotation}}
+}
+void getDemoteToRCParamArr(inout reordercoherent RWByteAddressBuffer PRCBufArr[2]) {
+  PRCBufArr = GCBufArr; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer __restrict[2]' demotes globallycoherent to reordercoherent annotation}}
+}
+
+globallycoherent RWByteAddressBuffer getGCBuf() {
+  return GCBuf;
+}
+
+reordercoherent RWByteAddressBuffer getRCBuf() {
+  return RCBuf;
+}
+
+[shader("raygeneration")]
+void main()
+{
+  GCStore(RCBuf); // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}}
+  RCStore(GCBuf); // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}}
+
+  reordercoherent RWByteAddressBuffer RCCopyGC = GCBuf; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}}
+  globallycoherent RWByteAddressBuffer GCCopyRC = RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}}
+
+  reordercoherent RWByteAddressBuffer RCCopyGCReturn = getGCBuf(); // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}}
+  globallycoherent RWByteAddressBuffer GCCopyRCReturn = getRCBuf(); // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}}
+
+  reordercoherent RWByteAddressBuffer RCCopyGC0 = GCBufArr[0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}}
+  globallycoherent RWByteAddressBuffer GCCopyRC0 = RCBufArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}}
+}
diff --git a/tools/clang/test/SemaHLSL/reordercoherent-implied.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-implied.hlsl
new file mode 100644
index 0000000000..130b0efee7
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/reordercoherent-implied.hlsl
@@ -0,0 +1,41 @@
+// RUN: %dxc -E main -T lib_6_9 -verify %s
+// REQUIRES: dxil-1-9
+
+using Ty = RWTexture1D<float4>;
+
+using GTy = globallycoherent Ty;
+using RTy = reordercoherent Ty;
+
+// expected-warning@+1{{attribute 'globallycoherent' is already applied}}
+using GGTy = globallycoherent GTy;
+// expected-warning@+1{{attribute 'reordercoherent' is already applied}}
+using RRTy = reordercoherent RTy;
+
+// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}}
+using GRTy = globallycoherent RTy;
+// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}}
+using RGTy = reordercoherent GTy;
+
+// expected-warning@+1{{attribute 'globallycoherent' is already applied}}
+using GGRTy = globallycoherent GRTy;
+// expected-warning@+1{{attribute 'reordercoherent' is already applied}}
+using RRGTy = reordercoherent RGTy;
+
+// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}}
+using GRTy2 = globallycoherent reordercoherent Ty;
+// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}}
+using RGTy2 = reordercoherent globallycoherent Ty;
+
+// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}}
+// expected-warning@+1{{attribute 'globallycoherent' is already applied}}
+using GGRTy2 = globallycoherent globallycoherent reordercoherent Ty;
+// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}}
+// expected-warning@+1{{attribute 'globallycoherent' is already applied}}
+using GRGTy2 = globallycoherent reordercoherent globallycoherent Ty;
+
+// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}}
+// expected-warning@+1{{attribute 'reordercoherent' is already applied}}
+using RGRTy2 = reordercoherent globallycoherent reordercoherent Ty;
+// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}}
+// expected-warning@+1{{attribute 'reordercoherent' is already applied}}
+using RRGTy2 = reordercoherent reordercoherent globallycoherent Ty;
diff --git a/tools/clang/test/SemaHLSL/reordercoherent-mismatch.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-mismatch.hlsl
new file mode 100644
index 0000000000..447e496c6e
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/reordercoherent-mismatch.hlsl
@@ -0,0 +1,101 @@
+// RUN: %dxc -Tlib_6_9 -verify %s
+
+RWByteAddressBuffer NonRCBuf;
+reordercoherent RWByteAddressBuffer RCBuf;
+
+RWByteAddressBuffer NonRCBufArr[2];
+reordercoherent RWByteAddressBuffer RCBufArr[2];
+
+RWByteAddressBuffer NonRCBufMultiArr[2][2];
+reordercoherent RWByteAddressBuffer RCBufMultiArr[2][2];
+
+RWByteAddressBuffer getNonRCBuf() {
+  return NonRCBuf;
+}
+
+reordercoherent RWByteAddressBuffer getRCBuf() {
+  return RCBuf;
+}
+
+RWByteAddressBuffer getNonRCBufArr() {
+  return NonRCBufArr[0];
+}
+
+reordercoherent RWByteAddressBuffer getRCBufArr() {
+  return RCBufArr[0];
+}
+
+RWByteAddressBuffer getNonRCBufMultiArr() {
+  return NonRCBufMultiArr[0][0];
+}
+
+reordercoherent RWByteAddressBuffer getRCBufMultiArr() {
+  return RCBufMultiArr[0][0];
+}
+
+RWByteAddressBuffer getNonGCRCBuf() {
+  return RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}}
+}
+
+reordercoherent RWByteAddressBuffer getGCNonRCBuf() {
+  return NonRCBuf; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}}
+}
+
+RWByteAddressBuffer getNonGCRCBufArr() {
+  return RCBufArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}}
+}
+
+reordercoherent RWByteAddressBuffer getGCNonRCBufArr() {
+  return NonRCBufArr[0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}}
+}
+
+RWByteAddressBuffer getNonGCRCBufMultiArr() {
+  return RCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}}
+}
+
+reordercoherent RWByteAddressBuffer getGCNonRCBufMultiArr() {
+  return NonRCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}}
+}
+
+void NonGCStore(RWByteAddressBuffer Buf) {
+  Buf.Store(0, 0);
+}
+
+void GCStore(reordercoherent RWByteAddressBuffer Buf) {
+  Buf.Store(0, 0);
+}
+
+void getNonRCBufPAram(inout reordercoherent RWByteAddressBuffer PRCBuf) {
+  PRCBuf = NonRCBuf; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer __restrict' adds reordercoherent annotation}}
+}
+
+static reordercoherent RWByteAddressBuffer SRCBufArr[2] = NonRCBufArr;               // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' adds reordercoherent annotation}}
+static reordercoherent RWByteAddressBuffer SRCBufMultiArr0[2] = NonRCBufMultiArr[0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' adds reordercoherent annotation}}
+static reordercoherent RWByteAddressBuffer SRCBufMultiArr1[2][2] = NonRCBufMultiArr; // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2][2]' to 'reordercoherent RWByteAddressBuffer [2][2]' adds reordercoherent annotation}}
+
+void getNonRCBufArrParam(inout reordercoherent RWByteAddressBuffer PRCBufArr[2]) {
+  PRCBufArr = NonRCBufArr; // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer __restrict[2]' adds reordercoherent annotation}}
+}
+
+[shader("raygeneration")] void main() {
+  NonGCStore(NonRCBuf); // No diagnostic
+  GCStore(NonRCBuf);    // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}}
+  NonGCStore(RCBuf);    // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}}
+  GCStore(RCBuf);       // No diagnostic
+
+  RWByteAddressBuffer NonGCCopyNonGC = NonRCBuf; // No diagnostic
+  RWByteAddressBuffer NonGCCopyGC = RCBuf;       // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}}
+
+  reordercoherent RWByteAddressBuffer GCCopyNonGC = NonRCBuf; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}}
+  reordercoherent RWByteAddressBuffer GCCopyGC = RCBuf;       // No diagnostic
+
+  reordercoherent RWByteAddressBuffer GCCopyNonGCReturn = getNonRCBuf(); // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}}
+
+  RWByteAddressBuffer NonGCCopyGCReturn = getRCBuf(); // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}}
+
+  RWByteAddressBuffer NonGCCopyNonGC0 = NonRCBufArr[0]; // No diagnostic
+  RWByteAddressBuffer NonGCCopyGC0 = RCBufArr[0];       // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}}
+
+  reordercoherent RWByteAddressBuffer GCCopyNonGC0 = NonRCBufArr[0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}}
+  reordercoherent RWByteAddressBuffer GCCopyGC0 = RCBufArr[0];       // No diagnostic
+}
diff --git a/tools/clang/test/SemaHLSL/reordercoherent-type-errors.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-type-errors.hlsl
new file mode 100644
index 0000000000..57fd33fb13
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/reordercoherent-type-errors.hlsl
@@ -0,0 +1,26 @@
+// RUN: %dxc -Tlib_6_9 -verify %s
+
+reordercoherent RWTexture1D<float4> uav1 : register(u3);
+
+// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'Buffer<vector<float, 4> >'}}
+// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}}
+reordercoherent Buffer<float4> srv;
+
+// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'float'}}
+// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}}
+reordercoherent float m;
+
+reordercoherent RWTexture2D<float> tex[12];
+reordercoherent RWTexture2D<float> texMD[12][12];
+
+// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'float'}}
+// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}}
+reordercoherent float One() {
+  return 1.0;
+}
+
+struct Record { uint index; };
+
+// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'RWDispatchNodeInputRecord<Record>'}}
+// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}}
+void func2(reordercoherent RWDispatchNodeInputRecord<Record> funcInputData) {}
diff --git a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp
index 3af305d52a..16d8b1dadd 100644
--- a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp
+++ b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp
@@ -1220,6 +1220,7 @@ void PrintResourceProperties(DxilResourceProperties &RP,
   bool bUAV = RP.isUAV();
   LPCSTR RW = bUAV ? (RP.Basic.IsROV ? "ROV" : "RW") : "";
   LPCSTR GC = bUAV && RP.Basic.IsGloballyCoherent ? "globallycoherent " : "";
+  LPCSTR RC = bUAV && RP.Basic.IsReorderCoherent ? "reordercoherent " : "";
   LPCSTR COUNTER = bUAV && RP.Basic.SamplerCmpOrHasCounter ? ", counter" : "";
 
   switch (RP.getResourceKind()) {
@@ -1233,7 +1234,7 @@ void PrintResourceProperties(DxilResourceProperties &RP,
   case DXIL::ResourceKind::TypedBuffer:
   case DXIL::ResourceKind::Texture2DMS:
   case DXIL::ResourceKind::Texture2DMSArray:
-    OS << GC << RW << ResourceKindToString(RP.getResourceKind());
+    OS << GC << RC << RW << ResourceKindToString(RP.getResourceKind());
     OS << "<";
     if (RP.Typed.CompCount > 1)
       OS << std::to_string(RP.Typed.CompCount) << "x";
@@ -1241,11 +1242,11 @@ void PrintResourceProperties(DxilResourceProperties &RP,
     break;
 
   case DXIL::ResourceKind::RawBuffer:
-    OS << GC << RW << ResourceKindToString(RP.getResourceKind());
+    OS << GC << RC << RW << ResourceKindToString(RP.getResourceKind());
     break;
 
   case DXIL::ResourceKind::StructuredBuffer:
-    OS << GC << RW << ResourceKindToString(RP.getResourceKind());
+    OS << GC << RC << RW << ResourceKindToString(RP.getResourceKind());
     OS << "<stride=" << RP.StructStrideInBytes << COUNTER << ">";
     break;
 
diff --git a/tools/clang/unittests/HLSL/DxilContainerTest.cpp b/tools/clang/unittests/HLSL/DxilContainerTest.cpp
index a1533ae19f..339b33c655 100644
--- a/tools/clang/unittests/HLSL/DxilContainerTest.cpp
+++ b/tools/clang/unittests/HLSL/DxilContainerTest.cpp
@@ -1454,6 +1454,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) {
       "ConsumeStructuredBuffer<Foo> consume_buf;"
       "RasterizerOrderedByteAddressBuffer rov_buf;"
       "globallycoherent RWByteAddressBuffer gc_buf;"
+      "reordercoherent RWByteAddressBuffer rc_buf;"
       "float function_import(float x);"
       "export float function0(min16float x) { "
       "  return x + 1 + tex[0].x; }"
@@ -1465,6 +1466,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) {
       "  f.f2 += 0.5; append_buf.Append(f);"
       "  rov_buf.Store(i, f.i2.x);"
       "  gc_buf.Store(i, f.i2.y);"
+      "  rc_buf.Store(i, f.i2.y);"
       "  b_buf.Store(i, f.i2.x + f.i2.y); }";
   CComPtr<IDxcCompiler> pCompiler;
   CComPtr<IDxcBlobEncoding> pSource;
@@ -1477,7 +1479,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) {
     hlsl::DXIL::ResourceKind kind;
     hlsl::RDAT::DxilResourceFlag flag;
   };
-  const unsigned numResFlagCheck = 5;
+  const unsigned numResFlagCheck = 6;
   CheckResFlagInfo resFlags[numResFlagCheck] = {
       {"b_buf", hlsl::DXIL::ResourceKind::RawBuffer,
        hlsl::RDAT::DxilResourceFlag::None},
@@ -1487,6 +1489,8 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) {
        hlsl::RDAT::DxilResourceFlag::UAVCounter},
       {"gc_buf", hlsl::DXIL::ResourceKind::RawBuffer,
        hlsl::RDAT::DxilResourceFlag::UAVGloballyCoherent},
+      {"rc_buf", hlsl::DXIL::ResourceKind::RawBuffer,
+       hlsl::RDAT::DxilResourceFlag::UAVReorderCoherent},
       {"rov_buf", hlsl::DXIL::ResourceKind::RawBuffer,
        hlsl::RDAT::DxilResourceFlag::UAVRasterizerOrderedView}};
 
@@ -1575,7 +1579,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) {
           IFTBOOLMSG(false, E_FAIL, "unknown function name");
         }
       }
-      VERIFY_ARE_EQUAL(resTable.Count(), 8U);
+      VERIFY_ARE_EQUAL(resTable.Count(), 9U);
     }
   }
   IFTBOOLMSG(blobFound, E_FAIL, "failed to find RDAT blob after compiling");

From bc9044adc7356896eeb1f37a3846f4fef8ed241e Mon Sep 17 00:00:00 2001
From: Simon Moll <smoll@nvidia.com>
Date: Thu, 10 Apr 2025 18:21:30 +0200
Subject: [PATCH 77/88] [SER] REORDER_SCOPE Barrier semantic flag (#7263)

- HLSL REORDER_SCOPE flag (available from SM6.9)
- Make validator accept REORDER_SCOPE from DXIL 1.9
---
 include/dxc/DXIL/DxilConstants.h              |  4 +-
 include/dxc/DXIL/DxilOperations.h             |  1 +
 lib/DXIL/DxilOperations.cpp                   | 40 ++++++++++-
 lib/DxilValidation/DxilValidation.cpp         | 16 +++--
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 +-
 tools/clang/lib/AST/ASTContextHLSL.cpp        |  6 +-
 tools/clang/lib/Sema/SemaHLSL.cpp             | 22 ++++--
 .../rdat_mintarget/sm69_barriers.hlsl         | 53 +++++++++++++++
 .../ser_reorder_scope_sm69_passing.ll         | 68 +++++++++++++++++++
 .../reorder_scope_sm68_unavailable.hlsl       |  8 +++
 .../barrier/reorder_scope_sm69_passing.hlsl   | 12 ++++
 11 files changed, 218 insertions(+), 14 deletions(-)
 create mode 100644 tools/clang/test/HLSLFileCheck/d3dreflect/rdat_mintarget/sm69_barriers.hlsl
 create mode 100644 tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm68_unavailable.hlsl
 create mode 100644 tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm69_passing.hlsl

diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
index 4f8c521851..2c1d309650 100644
--- a/include/dxc/DXIL/DxilConstants.h
+++ b/include/dxc/DXIL/DxilConstants.h
@@ -1905,7 +1905,9 @@ enum class BarrierSemanticFlag : uint32_t {
   GroupSync = 0x00000001,   // GROUP_SYNC
   GroupScope = 0x00000002,  // GROUP_SCOPE
   DeviceScope = 0x00000004, // DEVICE_SCOPE
-  ValidMask = 0x00000007,
+  LegacyFlags = 0x00000007,
+  ReorderScope = 0x00000008, // REORDER_SCOPE
+  ValidMask = 0x0000000F,
   GroupFlags = GroupSync | GroupScope,
 };
 
diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h
index 05021ce789..c8b6762b3f 100644
--- a/include/dxc/DXIL/DxilOperations.h
+++ b/include/dxc/DXIL/DxilOperations.h
@@ -151,6 +151,7 @@ class OP {
   static bool IsDxilOpBarrier(OpCode C);
   static bool BarrierRequiresGroup(const llvm::CallInst *CI);
   static bool BarrierRequiresNode(const llvm::CallInst *CI);
+  static bool BarrierRequiresReorder(const llvm::CallInst *CI);
   static DXIL::BarrierMode TranslateToBarrierMode(const llvm::CallInst *CI);
   static void GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
                                        unsigned &major, unsigned &minor,
diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp
index 7047d9fe59..786d4a5ef6 100644
--- a/lib/DXIL/DxilOperations.cpp
+++ b/lib/DXIL/DxilOperations.cpp
@@ -10,6 +10,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 #include "dxc/DXIL/DxilOperations.h"
+#include "dxc/DXIL/DxilConstants.h"
 #include "dxc/DXIL/DxilInstructions.h"
 #include "dxc/DXIL/DxilModule.h"
 #include "dxc/Support/Global.h"
@@ -3024,6 +3025,30 @@ bool OP::BarrierRequiresNode(const llvm::CallInst *CI) {
   }
 }
 
+bool OP::BarrierRequiresReorder(const llvm::CallInst *CI) {
+  OpCode Opcode = OP::GetDxilOpFuncCallInst(CI);
+  switch (Opcode) {
+  case OpCode::BarrierByMemoryType: {
+    DxilInst_BarrierByMemoryType Barrier(const_cast<CallInst *>(CI));
+    if (!isa<ConstantInt>(Barrier.get_SemanticFlags()))
+      return false;
+    unsigned SemanticFlags = Barrier.get_SemanticFlags_val();
+    return (SemanticFlags & static_cast<unsigned>(
+                                DXIL::BarrierSemanticFlag::ReorderScope)) != 0U;
+  }
+  case OpCode::BarrierByMemoryHandle: {
+    DxilInst_BarrierByMemoryHandle Barrier(const_cast<CallInst *>(CI));
+    if (!isa<ConstantInt>(Barrier.get_SemanticFlags()))
+      return false;
+    unsigned SemanticFlags = Barrier.get_SemanticFlags_val();
+    return (SemanticFlags & static_cast<unsigned>(
+                                DXIL::BarrierSemanticFlag::ReorderScope)) != 0U;
+  }
+  default:
+    return false;
+  }
+}
+
 DXIL::BarrierMode OP::TranslateToBarrierMode(const llvm::CallInst *CI) {
   OpCode opcode = OP::GetDxilOpFuncCallInst(CI);
   switch (opcode) {
@@ -3046,6 +3071,12 @@ DXIL::BarrierMode OP::TranslateToBarrierMode(const llvm::CallInst *CI) {
       semanticFlags = barrier.get_SemanticFlags_val();
     }
 
+    // Disallow SM6.9+ semantic flags.
+    if (semanticFlags &
+        ~static_cast<unsigned>(DXIL::BarrierSemanticFlag::LegacyFlags)) {
+      return DXIL::BarrierMode::Invalid;
+    }
+
     // Mask to legacy flags, if allowed.
     memoryTypeFlags = MaskMemoryTypeFlagsIfAllowed(
         memoryTypeFlags, (unsigned)DXIL::MemoryTypeFlag::LegacyFlags);
@@ -3467,10 +3498,17 @@ void OP::GetMinShaderModelAndMask(const llvm::CallInst *CI,
         minor = 8;
       }
     }
+    if (BarrierRequiresReorder(CI)) {
+      major = 6;
+      minor = 9;
+      mask &= SFLAG(Library) | SFLAG(RayGeneration);
+      return;
+    }
     if (BarrierRequiresNode(CI)) {
       mask &= SFLAG(Library) | SFLAG(Node);
       return;
-    } else if (BarrierRequiresGroup(CI)) {
+    }
+    if (BarrierRequiresGroup(CI)) {
       mask &= SFLAG(Library) | SFLAG(Compute) | SFLAG(Amplification) |
               SFLAG(Mesh) | SFLAG(Node);
       return;
diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
index a788f21d4e..aa7bb398fa 100644
--- a/lib/DxilValidation/DxilValidation.cpp
+++ b/lib/DxilValidation/DxilValidation.cpp
@@ -1628,6 +1628,15 @@ std::string GetLaunchTypeStr(DXIL::NodeLaunchType LT) {
   }
 }
 
+static unsigned getSemanticFlagValidMask(const ShaderModel *pSM) {
+  unsigned DxilMajor, DxilMinor;
+  pSM->GetDxilVersion(DxilMajor, DxilMinor);
+  // DXIL version >= 1.9
+  if (hlsl::DXIL::CompareVersions(DxilMajor, DxilMinor, 1, 9) < 0)
+    return static_cast<unsigned>(hlsl::DXIL::BarrierSemanticFlag::LegacyFlags);
+  return static_cast<unsigned>(hlsl::DXIL::BarrierSemanticFlag::ValidMask);
+}
+
 static void ValidateDxilOperationCallInProfile(CallInst *CI,
                                                DXIL::OpCode Opcode,
                                                const ShaderModel *pSM,
@@ -1838,8 +1847,8 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
                            (unsigned)hlsl::DXIL::MemoryTypeFlag::ValidMask,
                            "memory type", "BarrierByMemoryType");
     ValidateBarrierFlagArg(ValCtx, CI, DI.get_SemanticFlags(),
-                           (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask,
-                           "semantic", "BarrierByMemoryType");
+                           getSemanticFlagValidMask(pSM), "semantic",
+                           "BarrierByMemoryType");
     if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node &&
         OP::BarrierRequiresNode(CI)) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode);
@@ -1855,8 +1864,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
                              : "barrierByMemoryHandle";
     DxilInst_BarrierByMemoryHandle DIMH(CI);
     ValidateBarrierFlagArg(ValCtx, CI, DIMH.get_SemanticFlags(),
-                           (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask,
-                           "semantic", OpName);
+                           getSemanticFlagValidMask(pSM), "semantic", OpName);
     if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node &&
         OP::BarrierRequiresNode(CI)) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode);
diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 21a1b707c6..6254e5fc71 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7986,7 +7986,7 @@ def err_hlsl_barrier_invalid_memory_flags: Error<
   "UAV_MEMORY, GROUP_SHARED_MEMORY, NODE_INPUT_MEMORY, NODE_OUTPUT_MEMORY flags">;
 def err_hlsl_barrier_invalid_semantic_flags: Error<
   "invalid SemanticFlags for Barrier operation; expected 0 or some combination of "
-  "GROUP_SYNC, GROUP_SCOPE, DEVICE_SCOPE flags">;
+  "GROUP_SYNC, GROUP_SCOPE, DEVICE_SCOPE%select{|, REORDER_SCOPE}0 flags">;
 def warn_hlsl_barrier_group_memory_requires_group: Warning<
   "GROUP_SHARED_MEMORY specified for Barrier operation when context has no visible group">,
   InGroup<HLSLBarrier>, DefaultError;
diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp
index c7a031a219..2c3c20546f 100644
--- a/tools/clang/lib/AST/ASTContextHLSL.cpp
+++ b/tools/clang/lib/AST/ASTContextHLSL.cpp
@@ -718,6 +718,8 @@ void hlsl::AddSamplerFeedbackConstants(ASTContext &context) {
 
 /// <summary> Adds all enums for Barrier intrinsic</summary>
 void hlsl::AddBarrierConstants(ASTContext &context) {
+  VersionTuple VT69 = VersionTuple(6, 9);
+
   AddTypedefPseudoEnum(
       context, "MEMORY_TYPE_FLAG",
       {{"UAV_MEMORY", (unsigned)DXIL::MemoryTypeFlag::UavMemory},
@@ -730,7 +732,9 @@ void hlsl::AddBarrierConstants(ASTContext &context) {
       context, "BARRIER_SEMANTIC_FLAG",
       {{"GROUP_SYNC", (unsigned)DXIL::BarrierSemanticFlag::GroupSync},
        {"GROUP_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::GroupScope},
-       {"DEVICE_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::DeviceScope}});
+       {"DEVICE_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::DeviceScope},
+       {"REORDER_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::ReorderScope,
+        ConstructAvailabilityAttribute(context, VT69)}});
 }
 
 static Expr *IntConstantAsBoolExpr(clang::Sema &sema, uint64_t value) {
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 2bd4462f2f..5236a1e3c4 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -11576,7 +11576,8 @@ static bool CheckFinishedCrossGroupSharingCall(Sema &S, CXXMethodDecl *MD,
   return false;
 }
 
-static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) {
+static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE,
+                             const hlsl::ShaderModel *SM) {
   DXASSERT(FD->getNumParams() == 2, "otherwise, unknown Barrier overload");
 
   // Emit error when MemoryTypeFlags are known to be invalid.
@@ -11606,12 +11607,18 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) {
   llvm::APSInt SemanticFlagsVal;
   if (SemanticFlagsExpr->isIntegerConstantExpr(SemanticFlagsVal, S.Context)) {
     SemanticFlags = SemanticFlagsVal.getLimitedValue();
-    if ((uint32_t)SemanticFlags &
-        ~(uint32_t)DXIL::BarrierSemanticFlag::ValidMask) {
+    uint32_t ValidMask = 0U;
+    if (SM->IsSM69Plus()) {
+      ValidMask =
+          static_cast<unsigned>(hlsl::DXIL::BarrierSemanticFlag::ValidMask);
+    } else {
+      ValidMask =
+          static_cast<unsigned>(hlsl::DXIL::BarrierSemanticFlag::LegacyFlags);
+    }
+    if ((uint32_t)SemanticFlags & ~ValidMask) {
       S.Diags.Report(SemanticFlagsExpr->getExprLoc(),
                      diag::err_hlsl_barrier_invalid_semantic_flags)
-          << (uint32_t)SemanticFlags
-          << (uint32_t)DXIL::BarrierSemanticFlag::ValidMask;
+          << SM->IsSM69Plus();
       return true;
     }
   }
@@ -11654,6 +11661,9 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
   if (!IsBuiltinTable(IntrinsicAttr->getGroup()))
     return;
 
+  const auto *SM =
+      hlsl::ShaderModel::GetByName(getLangOpts().HLSLProfile.c_str());
+
   hlsl::IntrinsicOp opCode = (hlsl::IntrinsicOp)IntrinsicAttr->getOpcode();
   switch (opCode) {
   case hlsl::IntrinsicOp::MOP_FinishedCrossGroupSharing:
@@ -11661,7 +11671,7 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
                                        TheCall->getLocStart());
     break;
   case hlsl::IntrinsicOp::IOP_Barrier:
-    CheckBarrierCall(*this, FDecl, TheCall);
+    CheckBarrierCall(*this, FDecl, TheCall, SM);
     break;
 #ifdef ENABLE_SPIRV_CODEGEN
   case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast:
diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/rdat_mintarget/sm69_barriers.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/rdat_mintarget/sm69_barriers.hlsl
new file mode 100644
index 0000000000..6cedf44e20
--- /dev/null
+++ b/tools/clang/test/HLSLFileCheck/d3dreflect/rdat_mintarget/sm69_barriers.hlsl
@@ -0,0 +1,53 @@
+// RUN: %dxilver 1.9 | %dxc -T lib_6_9 %s | %D3DReflect %s | %FileCheck %s -check-prefixes=RDAT
+
+// Check that stage flags are set correctly still for different barrier modes in SM 6.9.
+
+// RDAT: FunctionTable[{{.*}}] = {
+
+RWByteAddressBuffer BAB : register(u1, space0);
+
+// RDAT-LABEL: UnmangledName: "fn_barrier_reorder"
+// RDAT: FeatureInfo1: 0
+// RDAT: FeatureInfo2: 0
+// RDAT: ShaderStageFlag: (Library | RayGeneration)
+// RDAT: MinShaderTarget: 0x60069
+
+[noinline] export
+void fn_barrier_reorder() {
+  Barrier(UAV_MEMORY, REORDER_SCOPE);
+}
+
+// RDAT-LABEL: UnmangledName: "fn_barrier_reorder2"
+// RDAT: FeatureInfo1: 0
+// RDAT: FeatureInfo2: 0
+// RDAT: ShaderStageFlag: (Library | RayGeneration)
+// RDAT: MinShaderTarget: 0x60069
+
+[noinline] export
+void fn_barrier_reorder2() {
+  Barrier(BAB, REORDER_SCOPE);
+}
+
+// RDAT-LABEL: UnmangledName: "rg_barrier_reorder_in_call"
+// RDAT: FeatureInfo1: 0
+// RDAT: FeatureInfo2: 0
+// RDAT: ShaderStageFlag: (RayGeneration)
+// RDAT: MinShaderTarget: 0x70069
+
+[shader("raygeneration")]
+void rg_barrier_reorder_in_call() {
+  fn_barrier_reorder();
+  BAB.Store(0, 0);
+}
+
+// RDAT-LABEL: UnmangledName: "rg_barrier_reorder_in_call2"
+// RDAT: FeatureInfo1: 0
+// RDAT: FeatureInfo2: 0
+// RDAT: ShaderStageFlag: (RayGeneration)
+// RDAT: MinShaderTarget: 0x70069
+
+[shader("raygeneration")]
+void rg_barrier_reorder_in_call2() {
+  fn_barrier_reorder2();
+  BAB.Store(0, 0);
+}
diff --git a/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll b/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll
new file mode 100644
index 0000000000..cab9942b02
--- /dev/null
+++ b/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll
@@ -0,0 +1,68 @@
+; RUN: %dxilver 1.9 | %dxv %s
+
+; Buffer Definitions:
+;
+;
+; Resource Bindings:
+;
+; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
+; ------------------------------ ---------- ------- ----------- ------- -------------- ------
+; BAB                                   UAV    byte         r/w      U0             u1     1
+;
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+%struct.RWByteAddressBuffer = type { i32 }
+
+@"\01?BAB@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+  %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?BAB@@3URWByteAddressBuffer@@A", align 4
+  call void @dx.op.barrierByMemoryType(i32 244, i32 1, i32 8)  ; BarrierByMemoryType(MemoryTypeFlags,SemanticFlags)
+  %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1)  ; CreateHandleForLib(Resource)
+  %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 })  ; AnnotateHandle(res,props)  resource: RWByteAddressBuffer
+  call void @dx.op.barrierByMemoryHandle(i32 245, %dx.types.Handle %3, i32 8)  ; BarrierByMemoryHandle(object,SemanticFlags)
+  ret void
+}
+
+; Function Attrs: noduplicate nounwind
+declare void @dx.op.barrierByMemoryType(i32, i32, i32) #1
+
+; Function Attrs: noduplicate nounwind
+declare void @dx.op.barrierByMemoryHandle(i32, %dx.types.Handle, i32) #1
+
+; Function Attrs: nounwind readnone
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #3
+
+attributes #0 = { nounwind }
+attributes #1 = { noduplicate nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind readonly }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.typeAnnotations = !{!5}
+!dx.entryPoints = !{!9, !11}
+
+!0 = !{i32 1, i32 9}
+!1 = !{!"lib", i32 6, i32 9}
+!2 = !{null, !3, null, null}
+!3 = !{!4}
+!4 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?BAB@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"BAB", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!5 = !{i32 1, void ()* @"\01?main@@YAXXZ", !6}
+!6 = !{!7}
+!7 = !{i32 1, !8, !8}
+!8 = !{}
+!9 = !{null, !"", null, !2, !10}
+!10 = !{i32 0, i64 8589934608}
+!11 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !12}
+!12 = !{i32 8, i32 7, i32 5, !13}
+!13 = !{i32 0}
diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm68_unavailable.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm68_unavailable.hlsl
new file mode 100644
index 0000000000..fc42f99a9a
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm68_unavailable.hlsl
@@ -0,0 +1,8 @@
+// RUN: %dxc -Tlib_6_8 -verify %s
+
+[Shader("compute")]
+[numthreads(1, 1, 1)]
+void main() {
+  // expected-error@+1{{invalid SemanticFlags for Barrier operation; expected 0 or some combination of GROUP_SYNC, GROUP_SCOPE, DEVICE_SCOPE flags}}
+  Barrier(0, REORDER_SCOPE);
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm69_passing.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm69_passing.hlsl
new file mode 100644
index 0000000000..18271a2b11
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm69_passing.hlsl
@@ -0,0 +1,12 @@
+// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s
+
+RWByteAddressBuffer BAB : register(u1, space0);
+
+[shader("raygeneration")]
+void main() {
+// CHECK:  call void @dx.op.barrierByMemoryType(i32 244, i32 1, i32 8)
+  Barrier(UAV_MEMORY, REORDER_SCOPE);
+
+// CHECK:  call void @dx.op.barrierByMemoryHandle(i32 245, %dx.types.Handle %{{[^ ]+}}, i32 8)
+  Barrier(BAB, REORDER_SCOPE);
+}

From 0168df12c28e1f088fd713d550a82cb35a34f89c Mon Sep 17 00:00:00 2001
From: Simon Moll <smoll@nvidia.com>
Date: Sat, 12 Apr 2025 03:07:08 +0200
Subject: [PATCH 78/88] [SER] HitObject_FromRayQuery[WithAttrs] DXIL opcodes
 and check-pass tests (#7277)

Add the DXIL operations and a passing validation test for:
- HitObject_FromRayQuery
- HitObject_FromRayQueryWithAttrs

DXC SER implementation tracker: #7214
---
 include/dxc/DXIL/DxilConstants.h              | 11 ++-
 include/dxc/DXIL/DxilInstructions.h           | 63 ++++++++++++++
 lib/DXIL/DxilOperations.cpp                   | 56 +++++++------
 .../ser_hitobject_fromrayquery_passing.ll     | 84 +++++++++++++++++++
 utils/hct/hctdb.py                            | 46 +++++++++-
 5 files changed, 230 insertions(+), 30 deletions(-)
 create mode 100644 tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_passing.ll

diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
index 2c1d309650..9c71eb329e 100644
--- a/include/dxc/DXIL/DxilConstants.h
+++ b/include/dxc/DXIL/DxilConstants.h
@@ -503,7 +503,6 @@ enum class OpCode : unsigned {
   ReservedA1 = 260,  // reserved
   ReservedA2 = 261,  // reserved
   ReservedB0 = 262,  // reserved
-  ReservedB1 = 263,  // reserved
   ReservedB10 = 272, // reserved
   ReservedB11 = 273, // reserved
   ReservedB12 = 274, // reserved
@@ -514,7 +513,6 @@ enum class OpCode : unsigned {
   ReservedB17 = 279, // reserved
   ReservedB18 = 280, // reserved
   ReservedB19 = 281, // reserved
-  ReservedB2 = 264,  // reserved
   ReservedB20 = 282, // reserved
   ReservedB21 = 283, // reserved
   ReservedB22 = 284, // reserved
@@ -916,6 +914,11 @@ enum class OpCode : unsigned {
                                    // operation with a mipmap-level offset
 
   // Shader Execution Reordering
+  HitObject_FromRayQuery = 263, // Creates a new HitObject representing a
+                                // committed hit from a RayQuery
+  HitObject_FromRayQueryWithAttrs =
+      264, // Creates a new HitObject representing a committed hit from a
+           // RayQuery and committed attributes
   HitObject_MakeMiss = 265, // Creates a new HitObject representing a miss
   HitObject_MakeNop = 266,  // Creates an empty nop HitObject
 
@@ -1294,6 +1297,8 @@ enum class OpCodeClass : unsigned {
   WriteSamplerFeedbackLevel,
 
   // Shader Execution Reordering
+  HitObject_FromRayQuery,
+  HitObject_FromRayQueryWithAttrs,
   HitObject_MakeMiss,
   HitObject_MakeNop,
 
@@ -1361,7 +1366,7 @@ enum class OpCodeClass : unsigned {
   NumOpClasses_Dxil_1_7 = 153,
   NumOpClasses_Dxil_1_8 = 174,
 
-  NumOpClasses = 179 // exclusive last value of enumeration
+  NumOpClasses = 181 // exclusive last value of enumeration
 };
 // OPCODECLASS-ENUM:END
 
diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h
index 6ee22869a5..15f7a1362b 100644
--- a/include/dxc/DXIL/DxilInstructions.h
+++ b/include/dxc/DXIL/DxilInstructions.h
@@ -8850,6 +8850,69 @@ struct DxilInst_AllocateRayQuery2 {
   }
 };
 
+/// This instruction Creates a new HitObject representing a committed hit from a
+/// RayQuery
+struct DxilInst_HitObject_FromRayQuery {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_FromRayQuery(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_FromRayQuery);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_rayQueryHandle = 1,
+  };
+  // Accessors
+  llvm::Value *get_rayQueryHandle() const { return Instr->getOperand(1); }
+  void set_rayQueryHandle(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Creates a new HitObject representing a committed hit from a
+/// RayQuery and committed attributes
+struct DxilInst_HitObject_FromRayQueryWithAttrs {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_FromRayQueryWithAttrs(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_FromRayQueryWithAttrs);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_rayQueryHandle = 1,
+    arg_HitKind = 2,
+    arg_CommittedAttribs = 3,
+  };
+  // Accessors
+  llvm::Value *get_rayQueryHandle() const { return Instr->getOperand(1); }
+  void set_rayQueryHandle(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_HitKind() const { return Instr->getOperand(2); }
+  void set_HitKind(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_CommittedAttribs() const { return Instr->getOperand(3); }
+  void set_CommittedAttribs(llvm::Value *val) { Instr->setOperand(3, val); }
+};
+
 /// This instruction Creates a new HitObject representing a miss
 struct DxilInst_HitObject_MakeMiss {
   llvm::Instruction *Instr;
diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp
index 786d4a5ef6..7945197eba 100644
--- a/lib/DXIL/DxilOperations.cpp
+++ b/lib/DXIL/DxilOperations.cpp
@@ -2311,24 +2311,24 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      0,
      {},
      {}}, // Overloads: v
-    {OC::ReservedB1,
-     "ReservedB1",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB2,
-     "ReservedB2",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
+
+    // Shader Execution Reordering
+    {OC::HitObject_FromRayQuery,
+     "HitObject_FromRayQuery",
+     OCC::HitObject_FromRayQuery,
+     "hitObject_FromRayQuery",
+     Attribute::ReadOnly,
      0,
      {},
      {}}, // Overloads: v
-
-    // Shader Execution Reordering
+    {OC::HitObject_FromRayQueryWithAttrs,
+     "HitObject_FromRayQueryWithAttrs",
+     OCC::HitObject_FromRayQueryWithAttrs,
+     "hitObject_FromRayQueryWithAttrs",
+     Attribute::ReadOnly,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
     {OC::HitObject_MakeMiss,
      "HitObject_MakeMiss",
      OCC::HitObject_MakeMiss,
@@ -3446,8 +3446,10 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
     minor = 9;
     return;
   }
-  // Instructions: HitObject_MakeMiss=265, HitObject_MakeNop=266
-  if ((265 <= op && op <= 266)) {
+  // Instructions: HitObject_FromRayQuery=263,
+  // HitObject_FromRayQueryWithAttrs=264, HitObject_MakeMiss=265,
+  // HitObject_MakeNop=266
+  if ((263 <= op && op <= 266)) {
     major = 6;
     minor = 9;
     mask =
@@ -5622,16 +5624,20 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(pV);
     A(pI32);
     break;
-  case OpCode::ReservedB1:
-    A(pV);
+
+    // Shader Execution Reordering
+  case OpCode::HitObject_FromRayQuery:
+    A(pHit);
+    A(pI32);
     A(pI32);
     break;
-  case OpCode::ReservedB2:
-    A(pV);
+  case OpCode::HitObject_FromRayQueryWithAttrs:
+    A(pHit);
+    A(pI32);
+    A(pI32);
     A(pI32);
+    A(udt);
     break;
-
-    // Shader Execution Reordering
   case OpCode::HitObject_MakeMiss:
     A(pHit);
     A(pI32);
@@ -5997,6 +6003,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
       return nullptr;
     return FT->getParamType(15);
   case OpCode::ReportHit:
+  case OpCode::HitObject_FromRayQueryWithAttrs:
     if (FT->getNumParams() <= 3)
       return nullptr;
     return FT->getParamType(3);
@@ -6080,8 +6087,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::ReservedA1:
   case OpCode::ReservedA2:
   case OpCode::ReservedB0:
-  case OpCode::ReservedB1:
-  case OpCode::ReservedB2:
+  case OpCode::HitObject_FromRayQuery:
   case OpCode::HitObject_MakeMiss:
   case OpCode::HitObject_MakeNop:
   case OpCode::ReservedB5:
diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_passing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_passing.ll
new file mode 100644
index 0000000000..5b0c65fd6b
--- /dev/null
+++ b/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_passing.ll
@@ -0,0 +1,84 @@
+; REQUIRES: dxil-1-9
+; RUN: %dxv %s | FileCheck %s
+
+; CHECK: Validation succeeded.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%struct.Payload = type { <3 x float> }
+%struct.CustomAttrs = type { float, float }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.HitObject = type { i8* }
+%struct.RaytracingAccelerationStructure = type { i32 }
+
+@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+  %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", align 4
+  %2 = alloca %struct.CustomAttrs, align 4
+  %3 = call i32 @dx.op.allocateRayQuery(i32 178, i32 5)  ; AllocateRayQuery(constRayFlags)
+  %4 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1)  ; CreateHandleForLib(Resource)
+  %5 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %4, %dx.types.ResourceProperties { i32 16, i32 0 })  ; AnnotateHandle(res,props)  resource: RTAccelerationStructure
+  call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %3, %dx.types.Handle %5, i32 0, i32 255, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03)  ; RayQuery_TraceRayInline(rayQueryHandle,accelerationStructure,rayFlags,instanceInclusionMask,origin_X,origin_Y,origin_Z,tMin,direction_X,direction_Y,direction_Z,tMax)
+  %6 = call %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32 263, i32 %3)  ; HitObject_FromRayQuery(rayQueryHandle)
+  %7 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %3, i32 16, %struct.CustomAttrs* nonnull %2)  ; HitObject_FromRayQueryWithAttrs(rayQueryHandle,HitKind,CommittedAttribs)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare i32 @dx.op.allocateRayQuery(i32, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.rayQuery_TraceRayInline(i32, i32, %dx.types.Handle, i32, i32, float, float, float, float, float, float, float, float) #0
+
+; Function Attrs: nounwind readonly
+declare %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32, i32, i32, %struct.CustomAttrs*) #1
+
+; Function Attrs: nounwind readonly
+declare %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.typeAnnotations = !{!6}
+!dx.dxrPayloadAnnotations = !{!10}
+!dx.entryPoints = !{!13, !15}
+
+!0 = !{i32 1, i32 9}
+!1 = !{!"lib", i32 6, i32 9}
+!2 = !{!3, null, null, null}
+!3 = !{!4}
+!4 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !5}
+!5 = !{i32 0, i32 4}
+!6 = !{i32 1, void ()* @"\01?main@@YAXXZ", !7}
+!7 = !{!8}
+!8 = !{i32 1, !9, !9}
+!9 = !{}
+!10 = !{i32 0, %struct.Payload undef, !11}
+!11 = !{!12}
+!12 = !{i32 0, i32 8210}
+!13 = !{null, !"", null, !2, !14}
+!14 = !{i32 0, i64 33554432}
+!15 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !16}
+!16 = !{i32 8, i32 7, i32 5, !17}
+!17 = !{i32 0}
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index a6cc52df1a..b3b9c82528 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -848,7 +848,10 @@ def populate_categories_and_models(self):
             self.name_idx[i].category = "Extended Command Information"
             self.name_idx[i].shader_stages = ("vertex",)
             self.name_idx[i].shader_model = 6, 8
-        for i in ("HitObject_MakeMiss,HitObject_MakeNop").split(","):
+        for i in (
+            "HitObject_MakeMiss,HitObject_MakeNop"
+            + ",HitObject_FromRayQuery,HitObject_FromRayQueryWithAttrs"
+        ).split(","):
             self.name_idx[i].category = "Shader Execution Reordering"
             self.name_idx[i].shader_model = 6, 9
             self.name_idx[i].shader_stages = (
@@ -5739,7 +5742,46 @@ def UFI(name, **mappings):
         next_op_idx = self.reserve_dxil_op_range("ReservedA", next_op_idx, 3)
 
         # Shader Execution Reordering
-        next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 3)
+        next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 1)
+
+        self.add_dxil_op(
+            "HitObject_FromRayQuery",
+            next_op_idx,
+            "HitObject_FromRayQuery",
+            "Creates a new HitObject representing a committed hit from a RayQuery",
+            "v",
+            "ro",
+            [
+                db_dxil_param(
+                    0, "hit_object", "", "HitObject created from RayQuery object"
+                ),
+                db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_FromRayQueryWithAttrs",
+            next_op_idx,
+            "HitObject_FromRayQueryWithAttrs",
+            "Creates a new HitObject representing a committed hit from a RayQuery and committed attributes",
+            "u",
+            "ro",
+            [
+                db_dxil_param(
+                    0, "hit_object", "", "HitObject created from RayQuery object"
+                ),
+                db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
+                db_dxil_param(
+                    3,
+                    "i32",
+                    "HitKind",
+                    "User-specified value in range of 0-127 to identify the type of hit",
+                ),
+                db_dxil_param(4, "udt", "CommittedAttribs", "Committed attributes"),
+            ],
+        )
+        next_op_idx += 1
 
         self.add_dxil_op(
             "HitObject_MakeMiss",

From 94f9275debff15d3e57d83c60bae16055c2d60c6 Mon Sep 17 00:00:00 2001
From: Simon Moll <smoll@nvidia.com>
Date: Sun, 13 Apr 2025 02:08:54 +0200
Subject: [PATCH 79/88] [SER] HitObject accessors DXIL opcodes and check-pass
 tests (#7276)

Add the DXIL operations and a passing validation test for:
- HitObject_IsMiss, HitObject_IsHit, HitObject_IsNop
- HitObject_RayFlags, HitObject_RayTMin, HitObject_RayTCurrent
- HitObject_GeometryIndex, HitObject_InstanceIndex, HitObject_InstanceID
- HitObject_PrimitiveIndex, HitObject_HitKind, HitObject_ShaderTableIndex
- HitObject_WorldRayOrigin, HitObject_WorldRayDirection,
- HitObject_ObjectRayOrigin, HitObject_ObjectRayDirection
- HitObject_ObjectToWorld3x4, HitObject_WorldToObject3x4
- HitObject_SetShaderTableIndex, HitObject_LoadLocalRootTableConstant
- HitObject_Attributes

Closes #7310
DXC SER implementation tracker: #7214
---
 include/dxc/DXIL/DxilConstants.h              |  58 +-
 include/dxc/DXIL/DxilInstructions.h           | 685 ++++++++++++++++++
 lib/DXIL/DxilOperations.cpp                   | 500 +++++++------
 .../ser_hitobject_accessors_passing.ll        | 110 +++
 utils/hct/hctdb.py                            | 342 ++++++++-
 5 files changed, 1446 insertions(+), 249 deletions(-)
 create mode 100644 tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll

diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
index 9c71eb329e..723abe552f 100644
--- a/include/dxc/DXIL/DxilConstants.h
+++ b/include/dxc/DXIL/DxilConstants.h
@@ -503,32 +503,11 @@ enum class OpCode : unsigned {
   ReservedA1 = 260,  // reserved
   ReservedA2 = 261,  // reserved
   ReservedB0 = 262,  // reserved
-  ReservedB10 = 272, // reserved
-  ReservedB11 = 273, // reserved
-  ReservedB12 = 274, // reserved
-  ReservedB13 = 275, // reserved
-  ReservedB14 = 276, // reserved
-  ReservedB15 = 277, // reserved
-  ReservedB16 = 278, // reserved
-  ReservedB17 = 279, // reserved
-  ReservedB18 = 280, // reserved
-  ReservedB19 = 281, // reserved
-  ReservedB20 = 282, // reserved
-  ReservedB21 = 283, // reserved
-  ReservedB22 = 284, // reserved
-  ReservedB23 = 285, // reserved
-  ReservedB24 = 286, // reserved
-  ReservedB25 = 287, // reserved
-  ReservedB26 = 288, // reserved
-  ReservedB27 = 289, // reserved
   ReservedB28 = 290, // reserved
   ReservedB29 = 291, // reserved
   ReservedB30 = 292, // reserved
   ReservedB5 = 267,  // reserved
   ReservedB6 = 268,  // reserved
-  ReservedB7 = 269,  // reserved
-  ReservedB8 = 270,  // reserved
-  ReservedB9 = 271,  // reserved
   ReservedC0 = 293,  // reserved
   ReservedC1 = 294,  // reserved
   ReservedC2 = 295,  // reserved
@@ -914,13 +893,42 @@ enum class OpCode : unsigned {
                                    // operation with a mipmap-level offset
 
   // Shader Execution Reordering
+  HitObject_Attributes = 289,   // Returns the attributes set for this HitObject
   HitObject_FromRayQuery = 263, // Creates a new HitObject representing a
                                 // committed hit from a RayQuery
   HitObject_FromRayQueryWithAttrs =
       264, // Creates a new HitObject representing a committed hit from a
            // RayQuery and committed attributes
+  HitObject_GeometryIndex = 281, // Returns the geometry index committed on hit
+  HitObject_HitKind = 285,       // Returns the HitKind of the hit
+  HitObject_InstanceID = 283,    // Returns the instance id committed on hit
+  HitObject_InstanceIndex = 282, // Returns the instance index committed on hit
+  HitObject_IsHit = 270,  // Returns `true` if the HitObject is a NOP-HitObject
+  HitObject_IsMiss = 269, // Returns `true` if the HitObject represents a miss
+  HitObject_IsNop = 271,  // Returns `true` if the HitObject represents a nop
+  HitObject_LoadLocalRootTableConstant =
+      288, // Returns the root table constant for this HitObject and offset
   HitObject_MakeMiss = 265, // Creates a new HitObject representing a miss
   HitObject_MakeNop = 266,  // Creates an empty nop HitObject
+  HitObject_ObjectRayDirection =
+      278,                          // Returns the ray direction in object space
+  HitObject_ObjectRayOrigin = 277,  // Returns the ray origin in object space
+  HitObject_ObjectToWorld3x4 = 279, // Returns the object to world space
+                                    // transformation matrix in 3x4 form
+  HitObject_PrimitiveIndex =
+      284,                  // Returns the primitive index committed on hit
+  HitObject_RayFlags = 272, // Returns the ray flags set in the HitObject
+  HitObject_RayTCurrent =
+      274,                 // Returns the current T value set in the HitObject
+  HitObject_RayTMin = 273, // Returns the TMin value set in the HitObject
+  HitObject_SetShaderTableIndex =
+      287, // Returns a HitObject with updated shader table index
+  HitObject_ShaderTableIndex =
+      286, // Returns the shader table index set for this HitObject
+  HitObject_WorldRayDirection = 276, // Returns the ray direction in world space
+  HitObject_WorldRayOrigin = 275,    // Returns the ray origin in world space
+  HitObject_WorldToObject3x4 = 280,  // Returns the world to object space
+                                     // transformation matrix in 3x4 form
 
   // Synchronization
   AtomicBinOp = 78,           // performs an atomic operation on two operands
@@ -1297,10 +1305,16 @@ enum class OpCodeClass : unsigned {
   WriteSamplerFeedbackLevel,
 
   // Shader Execution Reordering
+  HitObject_Attributes,
   HitObject_FromRayQuery,
   HitObject_FromRayQueryWithAttrs,
+  HitObject_LoadLocalRootTableConstant,
   HitObject_MakeMiss,
   HitObject_MakeNop,
+  HitObject_SetShaderTableIndex,
+  HitObject_StateMatrix,
+  HitObject_StateScalar,
+  HitObject_StateVector,
 
   // Synchronization
   AtomicBinOp,
@@ -1366,7 +1380,7 @@ enum class OpCodeClass : unsigned {
   NumOpClasses_Dxil_1_7 = 153,
   NumOpClasses_Dxil_1_8 = 174,
 
-  NumOpClasses = 181 // exclusive last value of enumeration
+  NumOpClasses = 187 // exclusive last value of enumeration
 };
 // OPCODECLASS-ENUM:END
 
diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h
index 15f7a1362b..2655124c2d 100644
--- a/include/dxc/DXIL/DxilInstructions.h
+++ b/include/dxc/DXIL/DxilInstructions.h
@@ -8987,6 +8987,691 @@ struct DxilInst_HitObject_MakeNop {
   bool requiresUniformInputs() const { return false; }
 };
 
+/// This instruction Returns `true` if the HitObject represents a miss
+struct DxilInst_HitObject_IsMiss {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_IsMiss(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_IsMiss);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns `true` if the HitObject is a NOP-HitObject
+struct DxilInst_HitObject_IsHit {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_IsHit(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_IsHit);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns `true` if the HitObject represents a nop
+struct DxilInst_HitObject_IsNop {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_IsNop(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_IsNop);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the ray flags set in the HitObject
+struct DxilInst_HitObject_RayFlags {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_RayFlags(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_RayFlags);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the TMin value set in the HitObject
+struct DxilInst_HitObject_RayTMin {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_RayTMin(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_RayTMin);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the current T value set in the HitObject
+struct DxilInst_HitObject_RayTCurrent {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_RayTCurrent(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_RayTCurrent);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the ray origin in world space
+struct DxilInst_HitObject_WorldRayOrigin {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_WorldRayOrigin(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_WorldRayOrigin);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_component = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_component() const { return Instr->getOperand(2); }
+  void set_component(llvm::Value *val) { Instr->setOperand(2, val); }
+  int32_t get_component_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(2))
+                         ->getZExtValue());
+  }
+  void set_component_val(int32_t val) {
+    Instr->setOperand(2, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
+
+/// This instruction Returns the ray direction in world space
+struct DxilInst_HitObject_WorldRayDirection {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_WorldRayDirection(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_WorldRayDirection);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_component = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_component() const { return Instr->getOperand(2); }
+  void set_component(llvm::Value *val) { Instr->setOperand(2, val); }
+  int32_t get_component_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(2))
+                         ->getZExtValue());
+  }
+  void set_component_val(int32_t val) {
+    Instr->setOperand(2, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
+
+/// This instruction Returns the ray origin in object space
+struct DxilInst_HitObject_ObjectRayOrigin {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_ObjectRayOrigin(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_ObjectRayOrigin);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_component = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_component() const { return Instr->getOperand(2); }
+  void set_component(llvm::Value *val) { Instr->setOperand(2, val); }
+  int32_t get_component_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(2))
+                         ->getZExtValue());
+  }
+  void set_component_val(int32_t val) {
+    Instr->setOperand(2, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
+
+/// This instruction Returns the ray direction in object space
+struct DxilInst_HitObject_ObjectRayDirection {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_ObjectRayDirection(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_ObjectRayDirection);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_component = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_component() const { return Instr->getOperand(2); }
+  void set_component(llvm::Value *val) { Instr->setOperand(2, val); }
+  int32_t get_component_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(2))
+                         ->getZExtValue());
+  }
+  void set_component_val(int32_t val) {
+    Instr->setOperand(2, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
+
+/// This instruction Returns the object to world space transformation matrix in
+/// 3x4 form
+struct DxilInst_HitObject_ObjectToWorld3x4 {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_ObjectToWorld3x4(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_ObjectToWorld3x4);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_row = 2,
+    arg_col = 3,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_row() const { return Instr->getOperand(2); }
+  void set_row(llvm::Value *val) { Instr->setOperand(2, val); }
+  int32_t get_row_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(2))
+                         ->getZExtValue());
+  }
+  void set_row_val(int32_t val) {
+    Instr->setOperand(2, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+  llvm::Value *get_col() const { return Instr->getOperand(3); }
+  void set_col(llvm::Value *val) { Instr->setOperand(3, val); }
+  int32_t get_col_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(3))
+                         ->getZExtValue());
+  }
+  void set_col_val(int32_t val) {
+    Instr->setOperand(3, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
+
+/// This instruction Returns the world to object space transformation matrix in
+/// 3x4 form
+struct DxilInst_HitObject_WorldToObject3x4 {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_WorldToObject3x4(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_WorldToObject3x4);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_row = 2,
+    arg_col = 3,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_row() const { return Instr->getOperand(2); }
+  void set_row(llvm::Value *val) { Instr->setOperand(2, val); }
+  int32_t get_row_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(2))
+                         ->getZExtValue());
+  }
+  void set_row_val(int32_t val) {
+    Instr->setOperand(2, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+  llvm::Value *get_col() const { return Instr->getOperand(3); }
+  void set_col(llvm::Value *val) { Instr->setOperand(3, val); }
+  int32_t get_col_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(3))
+                         ->getZExtValue());
+  }
+  void set_col_val(int32_t val) {
+    Instr->setOperand(3, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
+
+/// This instruction Returns the geometry index committed on hit
+struct DxilInst_HitObject_GeometryIndex {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_GeometryIndex(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_GeometryIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the instance index committed on hit
+struct DxilInst_HitObject_InstanceIndex {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_InstanceIndex(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_InstanceIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the instance id committed on hit
+struct DxilInst_HitObject_InstanceID {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_InstanceID(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_InstanceID);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the primitive index committed on hit
+struct DxilInst_HitObject_PrimitiveIndex {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_PrimitiveIndex(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_PrimitiveIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the HitKind of the hit
+struct DxilInst_HitObject_HitKind {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_HitKind(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_HitKind);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the shader table index set for this HitObject
+struct DxilInst_HitObject_ShaderTableIndex {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_ShaderTableIndex(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_ShaderTableIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns a HitObject with updated shader table index
+struct DxilInst_HitObject_SetShaderTableIndex {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_SetShaderTableIndex(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_SetShaderTableIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_shaderTableIndex = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_shaderTableIndex() const { return Instr->getOperand(2); }
+  void set_shaderTableIndex(llvm::Value *val) { Instr->setOperand(2, val); }
+};
+
+/// This instruction Returns the root table constant for this HitObject and
+/// offset
+struct DxilInst_HitObject_LoadLocalRootTableConstant {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_LoadLocalRootTableConstant(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_LoadLocalRootTableConstant);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_offset = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_offset() const { return Instr->getOperand(2); }
+  void set_offset(llvm::Value *val) { Instr->setOperand(2, val); }
+};
+
+/// This instruction Returns the attributes set for this HitObject
+struct DxilInst_HitObject_Attributes {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_Attributes(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_Attributes);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_attributes = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_attributes() const { return Instr->getOperand(2); }
+  void set_attributes(llvm::Value *val) { Instr->setOperand(2, val); }
+};
+
 /// This instruction reads from a raw buffer and structured buffer
 struct DxilInst_RawBufferVectorLoad {
   llvm::Instruction *Instr;
diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp
index 7945197eba..d9276fc7d6 100644
--- a/lib/DXIL/DxilOperations.cpp
+++ b/lib/DXIL/DxilOperations.cpp
@@ -2362,174 +2362,177 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      0,
      {},
      {}}, // Overloads: v
-    {OC::ReservedB7,
-     "ReservedB7",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB8,
-     "ReservedB8",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB9,
-     "ReservedB9",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB10,
-     "ReservedB10",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB11,
-     "ReservedB11",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB12,
-     "ReservedB12",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB13,
-     "ReservedB13",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB14,
-     "ReservedB14",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB15,
-     "ReservedB15",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB16,
-     "ReservedB16",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB17,
-     "ReservedB17",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB18,
-     "ReservedB18",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB19,
-     "ReservedB19",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB20,
-     "ReservedB20",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB21,
-     "ReservedB21",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB22,
-     "ReservedB22",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB23,
-     "ReservedB23",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB24,
-     "ReservedB24",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB25,
-     "ReservedB25",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
-    {OC::ReservedB26,
-     "ReservedB26",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
+
+    // Shader Execution Reordering
+    {OC::HitObject_IsMiss,
+     "HitObject_IsMiss",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+    {OC::HitObject_IsHit,
+     "HitObject_IsHit",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+    {OC::HitObject_IsNop,
+     "HitObject_IsNop",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+    {OC::HitObject_RayFlags,
+     "HitObject_RayFlags",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::HitObject_RayTMin,
+     "HitObject_RayTMin",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_RayTCurrent,
+     "HitObject_RayTCurrent",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_WorldRayOrigin,
+     "HitObject_WorldRayOrigin",
+     OCC::HitObject_StateVector,
+     "hitObject_StateVector",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_WorldRayDirection,
+     "HitObject_WorldRayDirection",
+     OCC::HitObject_StateVector,
+     "hitObject_StateVector",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_ObjectRayOrigin,
+     "HitObject_ObjectRayOrigin",
+     OCC::HitObject_StateVector,
+     "hitObject_StateVector",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_ObjectRayDirection,
+     "HitObject_ObjectRayDirection",
+     OCC::HitObject_StateVector,
+     "hitObject_StateVector",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_ObjectToWorld3x4,
+     "HitObject_ObjectToWorld3x4",
+     OCC::HitObject_StateMatrix,
+     "hitObject_StateMatrix",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_WorldToObject3x4,
+     "HitObject_WorldToObject3x4",
+     OCC::HitObject_StateMatrix,
+     "hitObject_StateMatrix",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_GeometryIndex,
+     "HitObject_GeometryIndex",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::HitObject_InstanceIndex,
+     "HitObject_InstanceIndex",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::HitObject_InstanceID,
+     "HitObject_InstanceID",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::HitObject_PrimitiveIndex,
+     "HitObject_PrimitiveIndex",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::HitObject_HitKind,
+     "HitObject_HitKind",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::HitObject_ShaderTableIndex,
+     "HitObject_ShaderTableIndex",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::HitObject_SetShaderTableIndex,
+     "HitObject_SetShaderTableIndex",
+     OCC::HitObject_SetShaderTableIndex,
+     "hitObject_SetShaderTableIndex",
+     Attribute::ReadNone,
      0,
      {},
      {}}, // Overloads: v
-    {OC::ReservedB27,
-     "ReservedB27",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
+    {OC::HitObject_LoadLocalRootTableConstant,
+     "HitObject_LoadLocalRootTableConstant",
+     OCC::HitObject_LoadLocalRootTableConstant,
+     "hitObject_LoadLocalRootTableConstant",
+     Attribute::ReadOnly,
      0,
      {},
      {}}, // Overloads: v
+    {OC::HitObject_Attributes,
+     "HitObject_Attributes",
+     OCC::HitObject_Attributes,
+     "hitObject_Attributes",
+     Attribute::ArgMemOnly,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+
     {OC::ReservedB28,
      "ReservedB28",
      OCC::Reserved,
@@ -3448,8 +3451,17 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
   }
   // Instructions: HitObject_FromRayQuery=263,
   // HitObject_FromRayQueryWithAttrs=264, HitObject_MakeMiss=265,
-  // HitObject_MakeNop=266
-  if ((263 <= op && op <= 266)) {
+  // HitObject_MakeNop=266, HitObject_IsMiss=269, HitObject_IsHit=270,
+  // HitObject_IsNop=271, HitObject_RayFlags=272, HitObject_RayTMin=273,
+  // HitObject_RayTCurrent=274, HitObject_WorldRayOrigin=275,
+  // HitObject_WorldRayDirection=276, HitObject_ObjectRayOrigin=277,
+  // HitObject_ObjectRayDirection=278, HitObject_ObjectToWorld3x4=279,
+  // HitObject_WorldToObject3x4=280, HitObject_GeometryIndex=281,
+  // HitObject_InstanceIndex=282, HitObject_InstanceID=283,
+  // HitObject_PrimitiveIndex=284, HitObject_HitKind=285,
+  // HitObject_ShaderTableIndex=286, HitObject_SetShaderTableIndex=287,
+  // HitObject_LoadLocalRootTableConstant=288, HitObject_Attributes=289
+  if ((263 <= op && op <= 266) || (269 <= op && op <= 289)) {
     major = 6;
     minor = 9;
     mask =
@@ -5666,90 +5678,126 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(pV);
     A(pI32);
     break;
-  case OpCode::ReservedB7:
-    A(pV);
+
+    // Shader Execution Reordering
+  case OpCode::HitObject_IsMiss:
+    A(pI1);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB8:
-    A(pV);
+  case OpCode::HitObject_IsHit:
+    A(pI1);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB9:
-    A(pV);
+  case OpCode::HitObject_IsNop:
+    A(pI1);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB10:
-    A(pV);
+  case OpCode::HitObject_RayFlags:
     A(pI32);
+    A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB11:
-    A(pV);
+  case OpCode::HitObject_RayTMin:
+    A(pF32);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB12:
-    A(pV);
+  case OpCode::HitObject_RayTCurrent:
+    A(pF32);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB13:
-    A(pV);
+  case OpCode::HitObject_WorldRayOrigin:
+    A(pF32);
+    A(pI32);
+    A(pHit);
     A(pI32);
     break;
-  case OpCode::ReservedB14:
-    A(pV);
+  case OpCode::HitObject_WorldRayDirection:
+    A(pF32);
+    A(pI32);
+    A(pHit);
     A(pI32);
     break;
-  case OpCode::ReservedB15:
-    A(pV);
+  case OpCode::HitObject_ObjectRayOrigin:
+    A(pF32);
+    A(pI32);
+    A(pHit);
     A(pI32);
     break;
-  case OpCode::ReservedB16:
-    A(pV);
+  case OpCode::HitObject_ObjectRayDirection:
+    A(pF32);
+    A(pI32);
+    A(pHit);
     A(pI32);
     break;
-  case OpCode::ReservedB17:
-    A(pV);
+  case OpCode::HitObject_ObjectToWorld3x4:
+    A(pF32);
+    A(pI32);
+    A(pHit);
+    A(pI32);
     A(pI32);
     break;
-  case OpCode::ReservedB18:
-    A(pV);
+  case OpCode::HitObject_WorldToObject3x4:
+    A(pF32);
+    A(pI32);
+    A(pHit);
+    A(pI32);
     A(pI32);
     break;
-  case OpCode::ReservedB19:
-    A(pV);
+  case OpCode::HitObject_GeometryIndex:
     A(pI32);
+    A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB20:
-    A(pV);
+  case OpCode::HitObject_InstanceIndex:
+    A(pI32);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB21:
-    A(pV);
+  case OpCode::HitObject_InstanceID:
     A(pI32);
+    A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB22:
-    A(pV);
+  case OpCode::HitObject_PrimitiveIndex:
+    A(pI32);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB23:
-    A(pV);
+  case OpCode::HitObject_HitKind:
     A(pI32);
+    A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB24:
-    A(pV);
+  case OpCode::HitObject_ShaderTableIndex:
     A(pI32);
+    A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB25:
-    A(pV);
+  case OpCode::HitObject_SetShaderTableIndex:
+    A(pHit);
+    A(pI32);
+    A(pHit);
     A(pI32);
     break;
-  case OpCode::ReservedB26:
-    A(pV);
+  case OpCode::HitObject_LoadLocalRootTableConstant:
+    A(pI32);
+    A(pI32);
+    A(pHit);
     A(pI32);
     break;
-  case OpCode::ReservedB27:
+  case OpCode::HitObject_Attributes:
     A(pV);
     A(pI32);
+    A(pHit);
+    A(udt);
     break;
+
+    //
   case OpCode::ReservedB28:
     A(pV);
     A(pI32);
@@ -5959,6 +6007,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::TempRegStore:
   case OpCode::CallShader:
   case OpCode::Pack4x8:
+  case OpCode::HitObject_Attributes:
     if (FT->getNumParams() <= 2)
       return nullptr;
     return FT->getParamType(2);
@@ -6092,27 +6141,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::HitObject_MakeNop:
   case OpCode::ReservedB5:
   case OpCode::ReservedB6:
-  case OpCode::ReservedB7:
-  case OpCode::ReservedB8:
-  case OpCode::ReservedB9:
-  case OpCode::ReservedB10:
-  case OpCode::ReservedB11:
-  case OpCode::ReservedB12:
-  case OpCode::ReservedB13:
-  case OpCode::ReservedB14:
-  case OpCode::ReservedB15:
-  case OpCode::ReservedB16:
-  case OpCode::ReservedB17:
-  case OpCode::ReservedB18:
-  case OpCode::ReservedB19:
-  case OpCode::ReservedB20:
-  case OpCode::ReservedB21:
-  case OpCode::ReservedB22:
-  case OpCode::ReservedB23:
-  case OpCode::ReservedB24:
-  case OpCode::ReservedB25:
-  case OpCode::ReservedB26:
-  case OpCode::ReservedB27:
+  case OpCode::HitObject_SetShaderTableIndex:
+  case OpCode::HitObject_LoadLocalRootTableConstant:
   case OpCode::ReservedB28:
   case OpCode::ReservedB29:
   case OpCode::ReservedB30:
@@ -6164,6 +6194,13 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::RayQuery_CommittedInstanceContributionToHitGroupIndex:
   case OpCode::StartVertexLocation:
   case OpCode::StartInstanceLocation:
+  case OpCode::HitObject_RayFlags:
+  case OpCode::HitObject_GeometryIndex:
+  case OpCode::HitObject_InstanceIndex:
+  case OpCode::HitObject_InstanceID:
+  case OpCode::HitObject_PrimitiveIndex:
+  case OpCode::HitObject_HitKind:
+  case OpCode::HitObject_ShaderTableIndex:
     return IntegerType::get(Ctx, 32);
   case OpCode::CalculateLOD:
   case OpCode::DomainLocation:
@@ -6190,6 +6227,14 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::RayQuery_CandidateObjectRayDirection:
   case OpCode::RayQuery_CommittedObjectRayOrigin:
   case OpCode::RayQuery_CommittedObjectRayDirection:
+  case OpCode::HitObject_RayTMin:
+  case OpCode::HitObject_RayTCurrent:
+  case OpCode::HitObject_WorldRayOrigin:
+  case OpCode::HitObject_WorldRayDirection:
+  case OpCode::HitObject_ObjectRayOrigin:
+  case OpCode::HitObject_ObjectRayDirection:
+  case OpCode::HitObject_ObjectToWorld3x4:
+  case OpCode::HitObject_WorldToObject3x4:
     return Type::getFloatTy(Ctx);
   case OpCode::MakeDouble:
   case OpCode::SplitDouble:
@@ -6200,6 +6245,9 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::RayQuery_CommittedTriangleFrontFace:
   case OpCode::IsHelperLane:
   case OpCode::QuadVote:
+  case OpCode::HitObject_IsMiss:
+  case OpCode::HitObject_IsHit:
+  case OpCode::HitObject_IsNop:
     return IntegerType::get(Ctx, 1);
   case OpCode::CBufferLoadLegacy:
   case OpCode::Sample:
diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll
new file mode 100644
index 0000000000..e527125009
--- /dev/null
+++ b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll
@@ -0,0 +1,110 @@
+; REQUIRES: dxil-1-9
+; RUN: %dxv %s 2>&1 | FileCheck %s
+
+; CHECK: Validation succeeded.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%struct.AttribType = type { float, float }
+%dx.types.HitObject = type { i8* }
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+  %attrs = alloca %struct.AttribType, align 4
+  %nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)  ; HitObject_MakeNop()
+
+  %r269 = call i1 @dx.op.hitObject_StateScalar.i1(i32 269, %dx.types.HitObject %nop)  ; HitObject_IsMiss(hitObject)
+
+  %r270 = call i1 @dx.op.hitObject_StateScalar.i1(i32 270, %dx.types.HitObject %nop)  ; HitObject_IsHit(hitObject)
+
+  %r271 = call i1 @dx.op.hitObject_StateScalar.i1(i32 271, %dx.types.HitObject %nop)  ; HitObject_IsNop(hitObject)
+
+  %r272 = call i32 @dx.op.hitObject_StateScalar.i32(i32 272, %dx.types.HitObject %nop)  ; HitObject_RayFlags(hitObject)
+
+  %r273 = call float @dx.op.hitObject_StateScalar.f32(i32 273, %dx.types.HitObject %nop)  ; HitObject_RayTMin(hitObject)
+
+  %r274 = call float @dx.op.hitObject_StateScalar.f32(i32 274, %dx.types.HitObject %nop)  ; HitObject_RayTCurrent(hitObject)
+
+  %r275 = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %nop, i32 0)  ; HitObject_WorldRayOrigin(hitObject,component)
+
+  %r276 = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %nop, i32 0)  ; HitObject_WorldRayDirection(hitObject,component)
+
+  %r277 = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %nop, i32 0)  ; HitObject_ObjectRayOrigin(hitObject,component)
+
+  %r278 = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %nop, i32 0)  ; HitObject_ObjectRayDirection(hitObject,component)
+
+  %r279 = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 0, i32 0)  ; HitObject_ObjectToWorld3x4(hitObject,row,col)
+
+  %r280 = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 0, i32 0)  ; HitObject_WorldToObject3x4(hitObject,row,col)
+
+  %r281 = call i32 @dx.op.hitObject_StateScalar.i32(i32 281, %dx.types.HitObject %nop)  ; HitObject_GeometryIndex(hitObject)
+
+  %r282 = call i32 @dx.op.hitObject_StateScalar.i32(i32 282, %dx.types.HitObject %nop)  ; HitObject_InstanceIndex(hitObject)
+
+  %r283 = call i32 @dx.op.hitObject_StateScalar.i32(i32 283, %dx.types.HitObject %nop)  ; HitObject_InstanceID(hitObject)
+
+  %r284 = call i32 @dx.op.hitObject_StateScalar.i32(i32 284, %dx.types.HitObject %nop)  ; HitObject_PrimitiveIndex(hitObject)
+
+  %r285 = call i32 @dx.op.hitObject_StateScalar.i32(i32 285, %dx.types.HitObject %nop)  ; HitObject_HitKind(hitObject)
+
+  %r286 = call i32 @dx.op.hitObject_StateScalar.i32(i32 286, %dx.types.HitObject %nop)  ; HitObject_ShaderTableIndex(hitObject)
+
+  %r287 = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject %nop, i32 1)  ; HitObject_SetShaderTableIndex(hitObject,shaderTableIndex)
+
+  %r288 = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject %nop, i32 42)  ; HitObject_LoadLocalRootTableConstant(hitObject,offset)
+
+  call void @dx.op.hitObject_Attributes.struct.AttribType(i32 289, %dx.types.HitObject %nop, %struct.AttribType* nonnull %attrs)  ; HitObject_Attributes(hitObject,attributes)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1
+
+; Function Attrs: nounwind readnone
+declare %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32, %dx.types.HitObject, i32) #1
+
+; Function Attrs: nounwind readnone
+declare i1 @dx.op.hitObject_StateScalar.i1(i32, %dx.types.HitObject) #1
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.hitObject_StateScalar.i32(i32, %dx.types.HitObject) #1
+
+; Function Attrs: nounwind readonly
+declare i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32, %dx.types.HitObject, i32) #2
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.hitObject_StateVector.f32(i32, %dx.types.HitObject, i32) #1
+
+; Function Attrs: nounwind argmemonly
+declare void @dx.op.hitObject_Attributes.struct.AttribType(i32, %dx.types.HitObject, %struct.AttribType*) #3
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.hitObject_StateScalar.f32(i32, %dx.types.HitObject) #1
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.hitObject_StateMatrix.f32(i32, %dx.types.HitObject, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
+attributes #3 = { nounwind argmemonly }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.typeAnnotations = !{!2}
+!dx.entryPoints = !{!3, !4}
+
+!0 = !{i32 1, i32 9}
+!1 = !{!"lib", i32 6, i32 9}
+!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !5}
+!3 = !{null, !"", null, null, !6}
+!4 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !7}
+!5 = !{!8}
+!6 = !{i32 0, i64 0}
+!7 = !{i32 8, i32 7, i32 5, !9}
+!8 = !{i32 1, !10, !10}
+!9 = !{i32 0}
+!10 = !{}
+
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index b3b9c82528..28695a4036 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -851,6 +851,11 @@ def populate_categories_and_models(self):
         for i in (
             "HitObject_MakeMiss,HitObject_MakeNop"
             + ",HitObject_FromRayQuery,HitObject_FromRayQueryWithAttrs"
+            + ",HitObject_IsMiss,HitObject_IsHit,HitObject_IsNop"
+            + ",HitObject_RayFlags,HitObject_RayTMin,HitObject_RayTCurrent,HitObject_GeometryIndex,HitObject_InstanceIndex,HitObject_InstanceID,HitObject_PrimitiveIndex,HitObject_HitKind,HitObject_ShaderTableIndex"
+            + ",HitObject_WorldRayOrigin,HitObject_WorldRayDirection,HitObject_ObjectRayOrigin,HitObject_ObjectRayDirection"
+            + ",HitObject_ObjectToWorld3x4,HitObject_WorldToObject3x4"
+            + ",HitObject_SetShaderTableIndex,HitObject_LoadLocalRootTableConstant,HitObject_Attributes"
         ).split(","):
             self.name_idx[i].category = "Shader Execution Reordering"
             self.name_idx[i].shader_model = 6, 9
@@ -5817,7 +5822,342 @@ def UFI(name, **mappings):
         )
         next_op_idx += 1
 
-        next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 26, 5)
+        next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 2, 5)
+
+        self.add_dxil_op(
+            "HitObject_IsMiss",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns `true` if the HitObject represents a miss",
+            "1",
+            "rn",
+            [
+                db_dxil_param(0, "i1", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_IsHit",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns `true` if the HitObject is a NOP-HitObject",
+            "1",
+            "rn",
+            [
+                db_dxil_param(0, "i1", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_IsNop",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns `true` if the HitObject represents a nop",
+            "1",
+            "rn",
+            [
+                db_dxil_param(0, "i1", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_RayFlags",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the ray flags set in the HitObject",
+            "i",
+            "rn",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_RayTMin",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the TMin value set in the HitObject",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_RayTCurrent",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the current T value set in the HitObject",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_WorldRayOrigin",
+            next_op_idx,
+            "HitObject_StateVector",
+            "Returns the ray origin in world space",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_WorldRayDirection",
+            next_op_idx,
+            "HitObject_StateVector",
+            "Returns the ray direction in world space",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_ObjectRayOrigin",
+            next_op_idx,
+            "HitObject_StateVector",
+            "Returns the ray origin in object space",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_ObjectRayDirection",
+            next_op_idx,
+            "HitObject_StateVector",
+            "Returns the ray direction in object space",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_ObjectToWorld3x4",
+            next_op_idx,
+            "HitObject_StateMatrix",
+            "Returns the object to world space transformation matrix in 3x4 form",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(
+                    3,
+                    "i32",
+                    "row",
+                    "row [0..2], , relative to the element",
+                    is_const=True,
+                ),
+                db_dxil_param(
+                    4,
+                    "i32",
+                    "col",
+                    "column [0..3], relative to the element",
+                    is_const=True,
+                ),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_WorldToObject3x4",
+            next_op_idx,
+            "HitObject_StateMatrix",
+            "Returns the world to object space transformation matrix in 3x4 form",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(
+                    3,
+                    "i32",
+                    "row",
+                    "row [0..2], relative to the element",
+                    is_const=True,
+                ),
+                db_dxil_param(
+                    4,
+                    "i32",
+                    "col",
+                    "column [0..3], relative to the element",
+                    is_const=True,
+                ),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_GeometryIndex",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the geometry index committed on hit",
+            "i",
+            "rn",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_InstanceIndex",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the instance index committed on hit",
+            "i",
+            "rn",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_InstanceID",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the instance id committed on hit",
+            "i",
+            "rn",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_PrimitiveIndex",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the primitive index committed on hit",
+            "i",
+            "rn",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_HitKind",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the HitKind of the hit",
+            "i",
+            "rn",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_ShaderTableIndex",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the shader table index set for this HitObject",
+            "i",
+            "rn",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_SetShaderTableIndex",
+            next_op_idx,
+            "HitObject_SetShaderTableIndex",
+            "Returns a HitObject with updated shader table index",
+            "v",
+            "rn",
+            [
+                db_dxil_param(
+                    0, "hit_object", "hitObject", "hit with shader table index set"
+                ),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(3, "i32", "shaderTableIndex", "shader table index"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_LoadLocalRootTableConstant",
+            next_op_idx,
+            "HitObject_LoadLocalRootTableConstant",
+            "Returns the root table constant for this HitObject and offset",
+            "v",
+            "ro",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(3, "i32", "offset", "offset"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_Attributes",
+            next_op_idx,
+            "HitObject_Attributes",
+            "Returns the attributes set for this HitObject",
+            "u",
+            "amo",
+            [
+                retvoid_param,
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(
+                    3, "udt", "attributes", "pointer to store the attributes to"
+                ),
+            ],
+        )
+        next_op_idx += 1
+
+        next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 3, 28)
 
         # Reserved block C
         next_op_idx = self.reserve_dxil_op_range("ReservedC", next_op_idx, 10)

From 8280d0fb4104ce9c67af3b9f4a1335760c7c5113 Mon Sep 17 00:00:00 2001
From: Simon Moll <smoll@nvidia.com>
Date: Mon, 14 Apr 2025 18:36:35 +0200
Subject: [PATCH 80/88] [SER] HitObject_Invoke|TraceRay DXIL opcodes and
 check-pass test (#7278)

Add the DXIL operations and a passing validation test for:
- HitObject_TraceRay
- HitObject_Invoke

DXC SER implementation tracker: #7214
---
 include/dxc/DXIL/DxilConstants.h              |  10 +-
 include/dxc/DXIL/DxilInstructions.h           | 117 ++++++++++++++++++
 lib/DXIL/DxilOperations.cpp                   |  90 ++++++++------
 .../DxilShaderAccessTracking.cpp              |   7 +-
 lib/DxilValidation/DxilValidation.cpp         |  23 ++--
 .../ser_hitobject_traceinvoke_passing.ll      |  68 ++++++++++
 utils/hct/hctdb.py                            |  86 ++++++++++++-
 7 files changed, 349 insertions(+), 52 deletions(-)
 create mode 100644 tools/clang/test/LitDXILValidation/ser_hitobject_traceinvoke_passing.ll

diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
index 723abe552f..e002779d09 100644
--- a/include/dxc/DXIL/DxilConstants.h
+++ b/include/dxc/DXIL/DxilConstants.h
@@ -502,11 +502,9 @@ enum class OpCode : unsigned {
   ReservedA0 = 259,  // reserved
   ReservedA1 = 260,  // reserved
   ReservedA2 = 261,  // reserved
-  ReservedB0 = 262,  // reserved
   ReservedB28 = 290, // reserved
   ReservedB29 = 291, // reserved
   ReservedB30 = 292, // reserved
-  ReservedB5 = 267,  // reserved
   ReservedB6 = 268,  // reserved
   ReservedC0 = 293,  // reserved
   ReservedC1 = 294,  // reserved
@@ -903,6 +901,8 @@ enum class OpCode : unsigned {
   HitObject_HitKind = 285,       // Returns the HitKind of the hit
   HitObject_InstanceID = 283,    // Returns the instance id committed on hit
   HitObject_InstanceIndex = 282, // Returns the instance index committed on hit
+  HitObject_Invoke = 267, // Represents the invocation of the CH/MS shader
+                          // represented by the HitObject
   HitObject_IsHit = 270,  // Returns `true` if the HitObject is a NOP-HitObject
   HitObject_IsMiss = 269, // Returns `true` if the HitObject represents a miss
   HitObject_IsNop = 271,  // Returns `true` if the HitObject represents a nop
@@ -925,6 +925,8 @@ enum class OpCode : unsigned {
       287, // Returns a HitObject with updated shader table index
   HitObject_ShaderTableIndex =
       286, // Returns the shader table index set for this HitObject
+  HitObject_TraceRay = 262, // Analogous to TraceRay but without invoking CH/MS
+                            // and returns the intermediate state as a HitObject
   HitObject_WorldRayDirection = 276, // Returns the ray direction in world space
   HitObject_WorldRayOrigin = 275,    // Returns the ray origin in world space
   HitObject_WorldToObject3x4 = 280,  // Returns the world to object space
@@ -1308,6 +1310,7 @@ enum class OpCodeClass : unsigned {
   HitObject_Attributes,
   HitObject_FromRayQuery,
   HitObject_FromRayQueryWithAttrs,
+  HitObject_Invoke,
   HitObject_LoadLocalRootTableConstant,
   HitObject_MakeMiss,
   HitObject_MakeNop,
@@ -1315,6 +1318,7 @@ enum class OpCodeClass : unsigned {
   HitObject_StateMatrix,
   HitObject_StateScalar,
   HitObject_StateVector,
+  HitObject_TraceRay,
 
   // Synchronization
   AtomicBinOp,
@@ -1380,7 +1384,7 @@ enum class OpCodeClass : unsigned {
   NumOpClasses_Dxil_1_7 = 153,
   NumOpClasses_Dxil_1_8 = 174,
 
-  NumOpClasses = 187 // exclusive last value of enumeration
+  NumOpClasses = 189 // exclusive last value of enumeration
 };
 // OPCODECLASS-ENUM:END
 
diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h
index 2655124c2d..e39f754c68 100644
--- a/include/dxc/DXIL/DxilInstructions.h
+++ b/include/dxc/DXIL/DxilInstructions.h
@@ -8850,6 +8850,92 @@ struct DxilInst_AllocateRayQuery2 {
   }
 };
 
+/// This instruction Analogous to TraceRay but without invoking CH/MS and
+/// returns the intermediate state as a HitObject
+struct DxilInst_HitObject_TraceRay {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_TraceRay(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_TraceRay);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (16 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_accelerationStructure = 1,
+    arg_rayFlags = 2,
+    arg_instanceInclusionMask = 3,
+    arg_rayContributionToHitGroupIndex = 4,
+    arg_multiplierForGeometryContributionToHitGroupIndex = 5,
+    arg_missShaderIndex = 6,
+    arg_Origin_X = 7,
+    arg_Origin_Y = 8,
+    arg_Origin_Z = 9,
+    arg_TMin = 10,
+    arg_Direction_X = 11,
+    arg_Direction_Y = 12,
+    arg_Direction_Z = 13,
+    arg_TMax = 14,
+    arg_payload = 15,
+  };
+  // Accessors
+  llvm::Value *get_accelerationStructure() const {
+    return Instr->getOperand(1);
+  }
+  void set_accelerationStructure(llvm::Value *val) {
+    Instr->setOperand(1, val);
+  }
+  llvm::Value *get_rayFlags() const { return Instr->getOperand(2); }
+  void set_rayFlags(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_instanceInclusionMask() const {
+    return Instr->getOperand(3);
+  }
+  void set_instanceInclusionMask(llvm::Value *val) {
+    Instr->setOperand(3, val);
+  }
+  llvm::Value *get_rayContributionToHitGroupIndex() const {
+    return Instr->getOperand(4);
+  }
+  void set_rayContributionToHitGroupIndex(llvm::Value *val) {
+    Instr->setOperand(4, val);
+  }
+  llvm::Value *get_multiplierForGeometryContributionToHitGroupIndex() const {
+    return Instr->getOperand(5);
+  }
+  void set_multiplierForGeometryContributionToHitGroupIndex(llvm::Value *val) {
+    Instr->setOperand(5, val);
+  }
+  llvm::Value *get_missShaderIndex() const { return Instr->getOperand(6); }
+  void set_missShaderIndex(llvm::Value *val) { Instr->setOperand(6, val); }
+  llvm::Value *get_Origin_X() const { return Instr->getOperand(7); }
+  void set_Origin_X(llvm::Value *val) { Instr->setOperand(7, val); }
+  llvm::Value *get_Origin_Y() const { return Instr->getOperand(8); }
+  void set_Origin_Y(llvm::Value *val) { Instr->setOperand(8, val); }
+  llvm::Value *get_Origin_Z() const { return Instr->getOperand(9); }
+  void set_Origin_Z(llvm::Value *val) { Instr->setOperand(9, val); }
+  llvm::Value *get_TMin() const { return Instr->getOperand(10); }
+  void set_TMin(llvm::Value *val) { Instr->setOperand(10, val); }
+  llvm::Value *get_Direction_X() const { return Instr->getOperand(11); }
+  void set_Direction_X(llvm::Value *val) { Instr->setOperand(11, val); }
+  llvm::Value *get_Direction_Y() const { return Instr->getOperand(12); }
+  void set_Direction_Y(llvm::Value *val) { Instr->setOperand(12, val); }
+  llvm::Value *get_Direction_Z() const { return Instr->getOperand(13); }
+  void set_Direction_Z(llvm::Value *val) { Instr->setOperand(13, val); }
+  llvm::Value *get_TMax() const { return Instr->getOperand(14); }
+  void set_TMax(llvm::Value *val) { Instr->setOperand(14, val); }
+  llvm::Value *get_payload() const { return Instr->getOperand(15); }
+  void set_payload(llvm::Value *val) { Instr->setOperand(15, val); }
+};
+
 /// This instruction Creates a new HitObject representing a committed hit from a
 /// RayQuery
 struct DxilInst_HitObject_FromRayQuery {
@@ -8987,6 +9073,37 @@ struct DxilInst_HitObject_MakeNop {
   bool requiresUniformInputs() const { return false; }
 };
 
+/// This instruction Represents the invocation of the CH/MS shader represented
+/// by the HitObject
+struct DxilInst_HitObject_Invoke {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_Invoke(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_Invoke);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_payload = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_payload() const { return Instr->getOperand(2); }
+  void set_payload(llvm::Value *val) { Instr->setOperand(2, val); }
+};
+
 /// This instruction Returns `true` if the HitObject represents a miss
 struct DxilInst_HitObject_IsMiss {
   llvm::Instruction *Instr;
diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp
index d9276fc7d6..b837d6e65d 100644
--- a/lib/DXIL/DxilOperations.cpp
+++ b/lib/DXIL/DxilOperations.cpp
@@ -2303,16 +2303,16 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      0,
      {},
      {}}, // Overloads: v
-    {OC::ReservedB0,
-     "ReservedB0",
-     OCC::Reserved,
-     "reserved",
-     Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
 
     // Shader Execution Reordering
+    {OC::HitObject_TraceRay,
+     "HitObject_TraceRay",
+     OCC::HitObject_TraceRay,
+     "hitObject_TraceRay",
+     Attribute::None,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
     {OC::HitObject_FromRayQuery,
      "HitObject_FromRayQuery",
      OCC::HitObject_FromRayQuery,
@@ -2345,15 +2345,15 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      0,
      {},
      {}}, // Overloads: v
-
-    {OC::ReservedB5,
-     "ReservedB5",
-     OCC::Reserved,
-     "reserved",
+    {OC::HitObject_Invoke,
+     "HitObject_Invoke",
+     OCC::HitObject_Invoke,
+     "hitObject_Invoke",
      Attribute::None,
-     0,
-     {},
-     {}}, // Overloads: v
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+
     {OC::ReservedB6,
      "ReservedB6",
      OCC::Reserved,
@@ -3449,19 +3449,20 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
     minor = 9;
     return;
   }
-  // Instructions: HitObject_FromRayQuery=263,
+  // Instructions: HitObject_TraceRay=262, HitObject_FromRayQuery=263,
   // HitObject_FromRayQueryWithAttrs=264, HitObject_MakeMiss=265,
-  // HitObject_MakeNop=266, HitObject_IsMiss=269, HitObject_IsHit=270,
-  // HitObject_IsNop=271, HitObject_RayFlags=272, HitObject_RayTMin=273,
-  // HitObject_RayTCurrent=274, HitObject_WorldRayOrigin=275,
-  // HitObject_WorldRayDirection=276, HitObject_ObjectRayOrigin=277,
-  // HitObject_ObjectRayDirection=278, HitObject_ObjectToWorld3x4=279,
-  // HitObject_WorldToObject3x4=280, HitObject_GeometryIndex=281,
-  // HitObject_InstanceIndex=282, HitObject_InstanceID=283,
-  // HitObject_PrimitiveIndex=284, HitObject_HitKind=285,
-  // HitObject_ShaderTableIndex=286, HitObject_SetShaderTableIndex=287,
+  // HitObject_MakeNop=266, HitObject_Invoke=267, HitObject_IsMiss=269,
+  // HitObject_IsHit=270, HitObject_IsNop=271, HitObject_RayFlags=272,
+  // HitObject_RayTMin=273, HitObject_RayTCurrent=274,
+  // HitObject_WorldRayOrigin=275, HitObject_WorldRayDirection=276,
+  // HitObject_ObjectRayOrigin=277, HitObject_ObjectRayDirection=278,
+  // HitObject_ObjectToWorld3x4=279, HitObject_WorldToObject3x4=280,
+  // HitObject_GeometryIndex=281, HitObject_InstanceIndex=282,
+  // HitObject_InstanceID=283, HitObject_PrimitiveIndex=284,
+  // HitObject_HitKind=285, HitObject_ShaderTableIndex=286,
+  // HitObject_SetShaderTableIndex=287,
   // HitObject_LoadLocalRootTableConstant=288, HitObject_Attributes=289
-  if ((263 <= op && op <= 266) || (269 <= op && op <= 289)) {
+  if ((262 <= op && op <= 267) || (269 <= op && op <= 289)) {
     major = 6;
     minor = 9;
     mask =
@@ -5632,12 +5633,27 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(pV);
     A(pI32);
     break;
-  case OpCode::ReservedB0:
-    A(pV);
-    A(pI32);
-    break;
 
     // Shader Execution Reordering
+  case OpCode::HitObject_TraceRay:
+    A(pHit);
+    A(pI32);
+    A(pRes);
+    A(pI32);
+    A(pI32);
+    A(pI32);
+    A(pI32);
+    A(pI32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(udt);
+    break;
   case OpCode::HitObject_FromRayQuery:
     A(pHit);
     A(pI32);
@@ -5668,12 +5684,14 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(pHit);
     A(pI32);
     break;
-
-    //
-  case OpCode::ReservedB5:
+  case OpCode::HitObject_Invoke:
     A(pV);
     A(pI32);
+    A(pHit);
+    A(udt);
     break;
+
+    //
   case OpCode::ReservedB6:
     A(pV);
     A(pI32);
@@ -6007,6 +6025,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::TempRegStore:
   case OpCode::CallShader:
   case OpCode::Pack4x8:
+  case OpCode::HitObject_Invoke:
   case OpCode::HitObject_Attributes:
     if (FT->getNumParams() <= 2)
       return nullptr;
@@ -6048,6 +6067,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
       return nullptr;
     return FT->getParamType(5);
   case OpCode::TraceRay:
+  case OpCode::HitObject_TraceRay:
     if (FT->getNumParams() <= 15)
       return nullptr;
     return FT->getParamType(15);
@@ -6135,11 +6155,9 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::ReservedA0:
   case OpCode::ReservedA1:
   case OpCode::ReservedA2:
-  case OpCode::ReservedB0:
   case OpCode::HitObject_FromRayQuery:
   case OpCode::HitObject_MakeMiss:
   case OpCode::HitObject_MakeNop:
-  case OpCode::ReservedB5:
   case OpCode::ReservedB6:
   case OpCode::HitObject_SetShaderTableIndex:
   case OpCode::HitObject_LoadLocalRootTableConstant:
diff --git a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
index bd96d83965..1dddb6c0e6 100644
--- a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
+++ b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
@@ -905,13 +905,14 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) {
           case DXIL::OpCode::BufferUpdateCounter:
             readWrite = ShaderAccessFlags::Counter;
             break;
+          case DXIL::OpCode::HitObject_TraceRay:
           case DXIL::OpCode::TraceRay: {
             // Read of AccelerationStructure; doesn't match function attribute
-            auto res = GetResourceFromHandle(Call->getArgOperand(1), DM);
-            if (res.accessStyle == AccessStyle::None) {
+            auto Res = GetResourceFromHandle(Call->getArgOperand(1), DM);
+            if (Res.accessStyle == AccessStyle::None) {
               continue;
             }
-            if (EmitResourceAccess(DM, res, Call, HlslOP, Ctx,
+            if (EmitResourceAccess(DM, Res, Call, HlslOP, Ctx,
                                    ShaderAccessFlags::Read)) {
               Modified = true;
             }
diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
index aa7bb398fa..5ec72e0267 100644
--- a/lib/DxilValidation/DxilValidation.cpp
+++ b/lib/DxilValidation/DxilValidation.cpp
@@ -1006,6 +1006,15 @@ static bool ValidateStorageMasks(Instruction *I, DXIL::OpCode Opcode,
   return true;
 }
 
+static void ValidateASHandle(CallInst *CI, Value *Hdl,
+                             ValidationContext &ValCtx) {
+  DxilResourceProperties RP = ValCtx.GetResourceFromVal(Hdl);
+  if (RP.getResourceClass() == DXIL::ResourceClass::Invalid ||
+      RP.getResourceKind() != DXIL::ResourceKind::RTAccelerationStructure) {
+    ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay);
+  }
+}
+
 static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode,
                                    ValidationContext &ValCtx) {
   switch (Opcode) {
@@ -1587,14 +1596,12 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode,
   case DXIL::OpCode::TraceRay: {
     DxilInst_TraceRay TraceRay(CI);
     Value *Hdl = TraceRay.get_AccelerationStructure();
-    DxilResourceProperties RP = ValCtx.GetResourceFromVal(Hdl);
-    if (RP.getResourceClass() == DXIL::ResourceClass::Invalid) {
-      ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay);
-      return;
-    }
-    if (RP.getResourceKind() != DXIL::ResourceKind::RTAccelerationStructure) {
-      ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay);
-    }
+    ValidateASHandle(CI, Hdl, ValCtx);
+  } break;
+  case DXIL::OpCode::HitObject_TraceRay: {
+    DxilInst_HitObject_TraceRay HOTraceRay(CI);
+    Value *Hdl = HOTraceRay.get_accelerationStructure();
+    ValidateASHandle(CI, Hdl, ValCtx);
   } break;
   default:
     break;
diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_traceinvoke_passing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_traceinvoke_passing.ll
new file mode 100644
index 0000000000..f3b99300be
--- /dev/null
+++ b/tools/clang/test/LitDXILValidation/ser_hitobject_traceinvoke_passing.ll
@@ -0,0 +1,68 @@
+; REQUIRES: dxil-1-9
+; RUN: %dxv %s 2>&1 | FileCheck %s
+
+; CHECK: Validation succeeded.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%struct.Payload = type { <3 x float> }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.HitObject = type { i8* }
+%struct.RaytracingAccelerationStructure = type { i32 }
+
+@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+  %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", align 4
+  %2 = alloca %struct.Payload, align 4
+  %3 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1)  ; CreateHandleForLib(Resource)
+  %4 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 16, i32 0 })  ; AnnotateHandle(res,props)  resource: RTAccelerationStructure
+  %5 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)  ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload)
+  call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %5, %struct.Payload* nonnull %2)  ; HitObject_Invoke(hitObject,payload)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.Payload*) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.hitObject_Invoke.struct.Payload(i32, %dx.types.HitObject, %struct.Payload*) #0
+
+; Function Attrs: nounwind readnone
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.typeAnnotations = !{!3}
+!dx.dxrPayloadAnnotations = !{!4}
+!dx.entryPoints = !{!5, !6}
+
+!0 = !{i32 1, i32 9}
+!1 = !{!"lib", i32 6, i32 9}
+!2 = !{!7, null, null, null}
+!3 = !{i32 1, void ()* @"\01?main@@YAXXZ", !8}
+!4 = !{i32 0, %struct.Payload undef, !9}
+!5 = !{null, !"", null, !2, null}
+!6 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !10}
+!7 = !{!11}
+!8 = !{!12}
+!9 = !{!13}
+!10 = !{i32 8, i32 7, i32 5, !14}
+!11 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !15}
+!12 = !{i32 1, !16, !16}
+!13 = !{i32 0, i32 8210}
+!14 = !{i32 0}
+!15 = !{i32 0, i32 4}
+!16 = !{}
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 28695a4036..595bad7c1b 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -850,6 +850,7 @@ def populate_categories_and_models(self):
             self.name_idx[i].shader_model = 6, 8
         for i in (
             "HitObject_MakeMiss,HitObject_MakeNop"
+            + ",HitObject_TraceRay,HitObject_Invoke"
             + ",HitObject_FromRayQuery,HitObject_FromRayQueryWithAttrs"
             + ",HitObject_IsMiss,HitObject_IsHit,HitObject_IsNop"
             + ",HitObject_RayFlags,HitObject_RayTMin,HitObject_RayTCurrent,HitObject_GeometryIndex,HitObject_InstanceIndex,HitObject_InstanceID,HitObject_PrimitiveIndex,HitObject_HitKind,HitObject_ShaderTableIndex"
@@ -5747,7 +5748,68 @@ def UFI(name, **mappings):
         next_op_idx = self.reserve_dxil_op_range("ReservedA", next_op_idx, 3)
 
         # Shader Execution Reordering
-        next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 1)
+        self.add_dxil_op(
+            "HitObject_TraceRay",
+            next_op_idx,
+            "HitObject_TraceRay",
+            "Analogous to TraceRay but without invoking CH/MS and returns the intermediate state as a HitObject",
+            "u",
+            "",
+            [
+                db_dxil_param(0, "hit_object", "", "Resulting HitObject"),
+                db_dxil_param(
+                    2,
+                    "res",
+                    "accelerationStructure",
+                    "Top-level acceleration structure to use",
+                ),
+                db_dxil_param(
+                    3,
+                    "i32",
+                    "rayFlags",
+                    "Valid combination of Ray_flags",
+                ),
+                db_dxil_param(
+                    4,
+                    "i32",
+                    "instanceInclusionMask",
+                    "Bottom 8 bits of InstanceInclusionMask are used to include/reject geometry instances based on the InstanceMask in each instance: if(!((InstanceInclusionMask & InstanceMask) & 0xff)) { ignore intersection }",
+                ),
+                db_dxil_param(
+                    5,
+                    "i32",
+                    "rayContributionToHitGroupIndex",
+                    "Offset to add into Addressing calculations within shader tables for hit group indexing.  Only the bottom 4 bits of this value are used",
+                ),
+                db_dxil_param(
+                    6,
+                    "i32",
+                    "multiplierForGeometryContributionToHitGroupIndex",
+                    "Stride to multiply by per-geometry GeometryContributionToHitGroupIndex in Addressing calculations within shader tables for hit group indexing.  Only the bottom 4 bits of this value are used",
+                ),
+                db_dxil_param(
+                    7,
+                    "i32",
+                    "missShaderIndex",
+                    "Miss shader index in Addressing calculations within shader tables.  Only the bottom 16 bits of this value are used",
+                ),
+                db_dxil_param(8, "f", "Origin_X", "Origin x of the ray"),
+                db_dxil_param(9, "f", "Origin_Y", "Origin y of the ray"),
+                db_dxil_param(10, "f", "Origin_Z", "Origin z of the ray"),
+                db_dxil_param(11, "f", "TMin", "Tmin of the ray"),
+                db_dxil_param(12, "f", "Direction_X", "Direction x of the ray"),
+                db_dxil_param(13, "f", "Direction_Y", "Direction y of the ray"),
+                db_dxil_param(14, "f", "Direction_Z", "Direction z of the ray"),
+                db_dxil_param(15, "f", "TMax", "Tmax of the ray"),
+                db_dxil_param(
+                    16,
+                    "udt",
+                    "payload",
+                    "User-defined payload structure",
+                ),
+            ],
+        )
+        next_op_idx += 1
 
         self.add_dxil_op(
             "HitObject_FromRayQuery",
@@ -5822,7 +5884,27 @@ def UFI(name, **mappings):
         )
         next_op_idx += 1
 
-        next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 2, 5)
+        self.add_dxil_op(
+            "HitObject_Invoke",
+            next_op_idx,
+            "HitObject_Invoke",
+            "Represents the invocation of the CH/MS shader represented by the HitObject",
+            "u",
+            "",
+            [
+                retvoid_param,
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(
+                    3,
+                    "udt",
+                    "payload",
+                    "User-defined payload structure",
+                ),
+            ],
+        )
+        next_op_idx += 1
+
+        next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 1, 6)
 
         self.add_dxil_op(
             "HitObject_IsMiss",

From b5a9cd59df273cd684b5db82acec88e90f87893b Mon Sep 17 00:00:00 2001
From: Simon Moll <smoll@nvidia.com>
Date: Mon, 14 Apr 2025 23:04:15 +0200
Subject: [PATCH 81/88] [SER] MaybeReorderThread DXIL opcode and validation
 (#7256)

- DXIL opcodes for MaybeReorderThread
- Validator rules
- DXV validation test (passing & expected failures)

Specification:
https://github.com/microsoft/hlsl-specs/blob/main/proposals/0027-shader-execution-reordering.md

DXC SER implementation tracker: #7214
---
 include/dxc/DXIL/DxilConstants.h              |  5 +-
 include/dxc/DXIL/DxilInstructions.h           | 37 ++++++++++++
 lib/DXIL/DxilOperations.cpp                   | 29 +++++----
 lib/DxilValidation/DxilValidation.cpp         | 24 ++++++++
 .../ser_maybereorder_failing.ll               | 60 +++++++++++++++++++
 .../ser_maybereorder_passing.ll               | 46 ++++++++++++++
 utils/hct/hctdb.py                            | 38 +++++++++++-
 7 files changed, 223 insertions(+), 16 deletions(-)
 create mode 100644 tools/clang/test/LitDXILValidation/ser_maybereorder_failing.ll
 create mode 100644 tools/clang/test/LitDXILValidation/ser_maybereorder_passing.ll

diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
index e002779d09..8c73328fbd 100644
--- a/include/dxc/DXIL/DxilConstants.h
+++ b/include/dxc/DXIL/DxilConstants.h
@@ -505,7 +505,6 @@ enum class OpCode : unsigned {
   ReservedB28 = 290, // reserved
   ReservedB29 = 291, // reserved
   ReservedB30 = 292, // reserved
-  ReservedB6 = 268,  // reserved
   ReservedC0 = 293,  // reserved
   ReservedC1 = 294,  // reserved
   ReservedC2 = 295,  // reserved
@@ -931,6 +930,7 @@ enum class OpCode : unsigned {
   HitObject_WorldRayOrigin = 275,    // Returns the ray origin in world space
   HitObject_WorldToObject3x4 = 280,  // Returns the world to object space
                                      // transformation matrix in 3x4 form
+  MaybeReorderThread = 268,          // Reorders the current thread
 
   // Synchronization
   AtomicBinOp = 78,           // performs an atomic operation on two operands
@@ -1319,6 +1319,7 @@ enum class OpCodeClass : unsigned {
   HitObject_StateScalar,
   HitObject_StateVector,
   HitObject_TraceRay,
+  MaybeReorderThread,
 
   // Synchronization
   AtomicBinOp,
@@ -1384,7 +1385,7 @@ enum class OpCodeClass : unsigned {
   NumOpClasses_Dxil_1_7 = 153,
   NumOpClasses_Dxil_1_8 = 174,
 
-  NumOpClasses = 189 // exclusive last value of enumeration
+  NumOpClasses = 190 // exclusive last value of enumeration
 };
 // OPCODECLASS-ENUM:END
 
diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h
index e39f754c68..a99c5360d4 100644
--- a/include/dxc/DXIL/DxilInstructions.h
+++ b/include/dxc/DXIL/DxilInstructions.h
@@ -9104,6 +9104,43 @@ struct DxilInst_HitObject_Invoke {
   void set_payload(llvm::Value *val) { Instr->setOperand(2, val); }
 };
 
+/// This instruction Reorders the current thread
+struct DxilInst_MaybeReorderThread {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_MaybeReorderThread(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::MaybeReorderThread);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_coherenceHint = 2,
+    arg_numCoherenceHintBitsFromLSB = 3,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_coherenceHint() const { return Instr->getOperand(2); }
+  void set_coherenceHint(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_numCoherenceHintBitsFromLSB() const {
+    return Instr->getOperand(3);
+  }
+  void set_numCoherenceHintBitsFromLSB(llvm::Value *val) {
+    Instr->setOperand(3, val);
+  }
+};
+
 /// This instruction Returns `true` if the HitObject represents a miss
 struct DxilInst_HitObject_IsMiss {
   llvm::Instruction *Instr;
diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp
index b837d6e65d..f614ba9d14 100644
--- a/lib/DXIL/DxilOperations.cpp
+++ b/lib/DXIL/DxilOperations.cpp
@@ -2353,17 +2353,14 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      1,
      {{0x100}},
      {{0x0}}}, // Overloads: u
-
-    {OC::ReservedB6,
-     "ReservedB6",
-     OCC::Reserved,
-     "reserved",
+    {OC::MaybeReorderThread,
+     "MaybeReorderThread",
+     OCC::MaybeReorderThread,
+     "maybeReorderThread",
      Attribute::None,
      0,
      {},
      {}}, // Overloads: v
-
-    // Shader Execution Reordering
     {OC::HitObject_IsMiss,
      "HitObject_IsMiss",
      OCC::HitObject_StateScalar,
@@ -3449,6 +3446,13 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
     minor = 9;
     return;
   }
+  // Instructions: MaybeReorderThread=268
+  if (op == 268) {
+    major = 6;
+    minor = 9;
+    mask = SFLAG(Library) | SFLAG(RayGeneration);
+    return;
+  }
   // Instructions: HitObject_TraceRay=262, HitObject_FromRayQuery=263,
   // HitObject_FromRayQueryWithAttrs=264, HitObject_MakeMiss=265,
   // HitObject_MakeNop=266, HitObject_Invoke=267, HitObject_IsMiss=269,
@@ -5690,14 +5694,13 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(pHit);
     A(udt);
     break;
-
-    //
-  case OpCode::ReservedB6:
+  case OpCode::MaybeReorderThread:
     A(pV);
     A(pI32);
+    A(pHit);
+    A(pI32);
+    A(pI32);
     break;
-
-    // Shader Execution Reordering
   case OpCode::HitObject_IsMiss:
     A(pI1);
     A(pI32);
@@ -6158,7 +6161,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::HitObject_FromRayQuery:
   case OpCode::HitObject_MakeMiss:
   case OpCode::HitObject_MakeNop:
-  case OpCode::ReservedB6:
+  case OpCode::MaybeReorderThread:
   case OpCode::HitObject_SetShaderTableIndex:
   case OpCode::HitObject_LoadLocalRootTableConstant:
   case OpCode::ReservedB28:
diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
index 5ec72e0267..00a6b9ae14 100644
--- a/lib/DxilValidation/DxilValidation.cpp
+++ b/lib/DxilValidation/DxilValidation.cpp
@@ -1886,6 +1886,30 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
                                   {"CreateHandleForLib", "Library"});
     }
     break;
+
+  // Shader Execution Reordering
+  case DXIL::OpCode::MaybeReorderThread: {
+    Value *HitObject = CI->getArgOperand(1);
+    Value *CoherenceHintBits = CI->getArgOperand(2);
+    Value *NumCoherenceHintBits = CI->getArgOperand(3);
+
+    if (isa<UndefValue>(HitObject))
+      ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject);
+
+    if (isa<UndefValue>(NumCoherenceHintBits))
+      ValCtx.EmitInstrError(
+          CI, ValidationRule::InstrMayReorderThreadUndefCoherenceHintParam);
+
+    ConstantInt *NumCoherenceHintBitsConst =
+        dyn_cast<ConstantInt>(NumCoherenceHintBits);
+    const bool HasCoherenceHint =
+        NumCoherenceHintBitsConst &&
+        NumCoherenceHintBitsConst->getLimitedValue() != 0;
+    if (HasCoherenceHint && isa<UndefValue>(CoherenceHintBits))
+      ValCtx.EmitInstrError(
+          CI, ValidationRule::InstrMayReorderThreadUndefCoherenceHintParam);
+  } break;
+
   case DXIL::OpCode::AtomicBinOp:
   case DXIL::OpCode::AtomicCompareExchange: {
     Type *pOverloadType = OP::GetOverloadType(Opcode, CI->getCalledFunction());
diff --git a/tools/clang/test/LitDXILValidation/ser_maybereorder_failing.ll b/tools/clang/test/LitDXILValidation/ser_maybereorder_failing.ll
new file mode 100644
index 0000000000..4502b9241d
--- /dev/null
+++ b/tools/clang/test/LitDXILValidation/ser_maybereorder_failing.ll
@@ -0,0 +1,60 @@
+; REQUIRES: dxil-1-9
+; RUN: not %dxv %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.HitObject = type { i8* }
+
+; CHECK: Function: ?main@@YAXXZ: error: Use of undef coherence hint or num coherence hint bits in MaybeReorderThread.
+; CHECK-NEXT: note: at 'call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 1, i32 undef)'
+
+; CHECK: Function: ?main@@YAXXZ: error: Use of undef coherence hint or num coherence hint bits in MaybeReorderThread.
+; CHECK-NEXT: note: at 'call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 undef, i32 1)'
+
+; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef.
+; CHECK-NEXT: note: at 'call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject undef, i32 11, i32 0)'
+
+; CHECK: Validation failed.
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+  %nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)  ; HitObject_MakeNop()
+
+  ; Validate that hit object is not undef.
+  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject undef, i32 11, i32 0)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+
+  ; Validate that coherence hint is not undef while numCoherenceHintBitsFromLSB is not 0.
+  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 undef, i32 1)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+
+  ; Validate that num coherence hint bits from LSB is not undef.
+  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 1, i32 undef)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.maybeReorderThread(i32, %dx.types.HitObject, i32, i32) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.typeAnnotations = !{!2}
+!dx.entryPoints = !{!6, !8}
+
+!0 = !{i32 1, i32 9}
+!1 = !{!"lib", i32 6, i32 9}
+!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !3}
+!3 = !{!4}
+!4 = !{i32 1, !5, !5}
+!5 = !{}
+!6 = !{null, !"", null, null, !7}
+!7 = !{i32 0, i64 0}
+!8 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !9}
+!9 = !{i32 8, i32 7, i32 5, !10}
+!10 = !{i32 0}
diff --git a/tools/clang/test/LitDXILValidation/ser_maybereorder_passing.ll b/tools/clang/test/LitDXILValidation/ser_maybereorder_passing.ll
new file mode 100644
index 0000000000..8ee7677bd4
--- /dev/null
+++ b/tools/clang/test/LitDXILValidation/ser_maybereorder_passing.ll
@@ -0,0 +1,46 @@
+; REQUIRES: dxil-1-9
+; RUN: %dxv %s | FileCheck %s
+
+; CHECK: Validation succeeded.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.HitObject = type { i8* }
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+  %nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)  ; HitObject_MakeNop()
+  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 241, i32 3)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+  
+  ; Coherence hint disabled, accept 'undef' coherence hint bits.
+  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 undef, i32 0)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.maybeReorderThread(i32, %dx.types.HitObject, i32, i32) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.typeAnnotations = !{!2}
+!dx.entryPoints = !{!6, !8}
+
+!0 = !{i32 1, i32 9}
+!1 = !{!"lib", i32 6, i32 9}
+!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !3}
+!3 = !{!4}
+!4 = !{i32 1, !5, !5}
+!5 = !{}
+!6 = !{null, !"", null, null, !7}
+!7 = !{i32 0, i64 0}
+!8 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !9}
+!9 = !{i32 8, i32 7, i32 5, !10}
+!10 = !{i32 0}
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 595bad7c1b..9b2f33727a 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -866,6 +866,13 @@ def populate_categories_and_models(self):
                 "closesthit",
                 "miss",
             )
+        for i in ("MaybeReorderThread").split(","):
+            self.name_idx[i].category = "Shader Execution Reordering"
+            self.name_idx[i].shader_model = 6, 9
+            self.name_idx[i].shader_stages = (
+                "library",
+                "raygeneration",
+            )
 
     def populate_llvm_instructions(self):
         # Add instructions that map to LLVM instructions.
@@ -5904,7 +5911,26 @@ def UFI(name, **mappings):
         )
         next_op_idx += 1
 
-        next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 1, 6)
+        self.add_dxil_op(
+            "MaybeReorderThread",
+            next_op_idx,
+            "MaybeReorderThread",
+            "Reorders the current thread",
+            "v",
+            "",
+            [
+                retvoid_param,
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(3, "i32", "coherenceHint", "Coherence hint"),
+                db_dxil_param(
+                    4,
+                    "i32",
+                    "numCoherenceHintBitsFromLSB",
+                    "Num coherence hint bits from LSB",
+                ),
+            ],
+        )
+        next_op_idx += 1
 
         self.add_dxil_op(
             "HitObject_IsMiss",
@@ -8267,6 +8293,16 @@ def build_valrules(self):
             "Invalid use of completed record handle.",
         )
 
+        # Shader Execution Reordering
+        self.add_valrule(
+            "Instr.UndefHitObject",
+            "HitObject is undef.",
+        )
+        self.add_valrule(
+            "Instr.MayReorderThreadUndefCoherenceHintParam",
+            "Use of undef coherence hint or num coherence hint bits in MaybeReorderThread.",
+        )
+
         # Some legacy rules:
         # - space is only supported for shader targets 5.1 and higher
         # - multiple rules regarding derivatives, which isn't a supported feature for DXIL

From 47e11af022d4ed41ac87348f822ea8804b55523a Mon Sep 17 00:00:00 2001
From: Dan Brown <61992655+danbrown-amd@users.noreply.github.com>
Date: Mon, 14 Apr 2025 15:20:13 -0600
Subject: [PATCH 82/88] [spirv] Handles rvalue as implicit object argument of
 vk::BufferPointer::Get(). (#7313)

[spirv] Handles rvalue as implicit object argument of
vk::BufferPointer::Get().
Fixes #7302.
---
 tools/clang/lib/SPIRV/SpirvEmitter.cpp        |  6 ++++
 .../vk.buffer-pointer.rvalue.hlsl             | 35 +++++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl

diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
index eed4f6369f..cd5f860555 100644
--- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@@ -10932,6 +10932,12 @@ SpirvInstruction *SpirvEmitter::processIntrinsicGetBufferContents(
   SpirvInstruction *bufferPointer = doExpr(obj);
   if (!bufferPointer)
     return nullptr;
+  if (bufferPointer->isRValue()) {
+    bufferPointer->setRValue(false);
+    bufferPointer->setStorageClass(spv::StorageClass::PhysicalStorageBuffer);
+    return bufferPointer;
+  }
+
   unsigned align = hlsl::GetVKBufferPointerAlignment(obj->getType());
   lowerTypeVisitor.visitInstruction(bufferPointer);
 
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl
new file mode 100644
index 0000000000..930770cc16
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl
@@ -0,0 +1,35 @@
+// RUN: %dxc -spirv -HV 202x -Od -T cs_6_9 %s | FileCheck %s
+
+// Issue #7302: implicit object argument of Get() evaluates to rvalue
+
+template<class T, class U>
+[[vk::ext_instruction(/*spv::OpBitcast*/124)]]
+T bitcast(U);
+
+struct Content
+{
+  int a;
+};
+
+// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK-DAG: [[I1:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 1
+// CHECK-DAG: [[IO:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0
+// CHECK: [[UINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 0
+// CHECK-DAG: [[UDEADBEEF:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 3735928559
+// CHECK-DAG: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0
+// CHECK: [[V2UINT:%[_0-9A-Za-z]*]] = OpTypeVector [[UINT]] 2
+// CHECK: [[VECTOR:%[_0-9A-Za-z]*]] = OpConstantComposite [[V2UINT]] [[UDEADBEEF]] [[U0]]
+// CHECK: [[CONTENT:%[_0-9A-Za-z]*]] = OpTypeStruct [[INT]]
+// CHECK: [[PPCONTENT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[CONTENT]]
+// CHECK: [[PPINT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[INT]]
+
+[numthreads(1, 1, 1)]
+void main()
+{
+  bitcast<vk::BufferPointer<Content> >(uint32_t2(0xdeadbeefu,0x0u)).Get().a = 1;
+}
+
+// CHECK: [[BITCAST:%[0-9]*]] = OpBitcast [[PPCONTENT]] [[VECTOR]]
+// CHECK: [[PTR:%[0-9]*]] = OpAccessChain [[PPINT]] [[BITCAST]] [[IO]]
+// CHECK: OpStore [[PTR]] [[I1]] Aligned 4
+

From 30a757960b6d8ff792a59638ed826606e5675409 Mon Sep 17 00:00:00 2001
From: Dan Brown <61992655+danbrown-amd@users.noreply.github.com>
Date: Tue, 15 Apr 2025 09:29:20 -0600
Subject: [PATCH 83/88] [spirv] Fixes vk::BufferPointer constructor expression
 construction. (#7331)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Constructors are now properly attached to the template class declaration
instead of a specialization.
Closes #6489 (again).

---------

Co-authored-by: Nathan Gauër <github@keenuts.net>
---
 tools/clang/lib/AST/ASTContextHLSL.cpp        | 16 +++--
 tools/clang/lib/Sema/SemaExprCXX.cpp          | 63 +++++++++++++------
 .../vk.buffer-pointer.from-uint.hlsl          | 46 ++++++++++++++
 3 files changed, 102 insertions(+), 23 deletions(-)
 create mode 100644 tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl

diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp
index 2c3c20546f..0a688c03fa 100644
--- a/tools/clang/lib/AST/ASTContextHLSL.cpp
+++ b/tools/clang/lib/AST/ASTContextHLSL.cpp
@@ -1390,19 +1390,27 @@ CXXRecordDecl *hlsl::DeclareVkBufferPointerType(ASTContext &context,
       DeclarationName(&context.Idents.get("Get")), true);
   CanQualType canQualType =
       recordDecl->getTypeForDecl()->getCanonicalTypeUnqualified();
-  CreateConstructorDeclarationWithParams(
+  auto *copyConstructorDecl = CreateConstructorDeclarationWithParams(
       context, recordDecl, context.VoidTy,
       {context.getRValueReferenceType(canQualType)}, {"bufferPointer"},
-      context.DeclarationNames.getCXXConstructorName(canQualType), false);
-  CreateConstructorDeclarationWithParams(
+      context.DeclarationNames.getCXXConstructorName(canQualType), false, true);
+  auto *addressConstructorDecl = CreateConstructorDeclarationWithParams(
       context, recordDecl, context.VoidTy, {context.UnsignedIntTy}, {"address"},
-      context.DeclarationNames.getCXXConstructorName(canQualType), false);
+      context.DeclarationNames.getCXXConstructorName(canQualType), false, true);
+  hlsl::CreateFunctionTemplateDecl(
+      context, recordDecl, copyConstructorDecl,
+      Builder.getTemplateDecl()->getTemplateParameters()->begin(), 2);
+  hlsl::CreateFunctionTemplateDecl(
+      context, recordDecl, addressConstructorDecl,
+      Builder.getTemplateDecl()->getTemplateParameters()->begin(), 2);
 
   StringRef OpcodeGroup = GetHLOpcodeGroupName(HLOpcodeGroup::HLIntrinsic);
   unsigned Opcode = static_cast<unsigned>(IntrinsicOp::MOP_GetBufferContents);
   methodDecl->addAttr(
       HLSLIntrinsicAttr::CreateImplicit(context, OpcodeGroup, "", Opcode));
   methodDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context));
+  copyConstructorDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context));
+  addressConstructorDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context));
 
   return Builder.completeDefinition();
 }
diff --git a/tools/clang/lib/Sema/SemaExprCXX.cpp b/tools/clang/lib/Sema/SemaExprCXX.cpp
index 4723bc93e9..5113c56205 100644
--- a/tools/clang/lib/Sema/SemaExprCXX.cpp
+++ b/tools/clang/lib/Sema/SemaExprCXX.cpp
@@ -1057,26 +1057,51 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
     Expr *Arg = Exprs[0];
 #ifdef ENABLE_SPIRV_CODEGEN
     if (hlsl::IsVKBufferPointerType(Ty) && Arg->getType()->isIntegerType()) {
-      for (auto *ctor : Ty->getAsCXXRecordDecl()->ctors()) {
-        if (auto *functionType = ctor->getType()->getAs<FunctionProtoType>()) {
-          if (functionType->getNumParams() != 1 ||
-              !functionType->getParamType(0)->isIntegerType())
-            continue;
-
-          CanQualType argType = Arg->getType()->getCanonicalTypeUnqualified();
-          if (!Arg->isRValue()) {
-            Arg = ImpCastExprToType(Arg, argType, CK_LValueToRValue).get();
-          }
-          if (argType != Context.UnsignedLongLongTy) {
-            Arg = ImpCastExprToType(Arg, Context.UnsignedLongLongTy,
-                                    CK_IntegralCast)
-                      .get();
-          }
-          return CXXConstructExpr::Create(
-              Context, Ty, TyBeginLoc, ctor, false, {Arg}, false, false, false,
-              false, CXXConstructExpr::ConstructionKind::CK_Complete,
-              SourceRange(LParenLoc, RParenLoc));
+      typedef DeclContext::specific_decl_iterator<FunctionTemplateDecl> ft_iter;
+      auto *recordDecl = Ty->getAsCXXRecordDecl();
+      auto *specDecl = cast<ClassTemplateSpecializationDecl>(recordDecl);
+      auto *templatedDecl =
+          specDecl->getSpecializedTemplate()->getTemplatedDecl();
+      auto functionTemplateDecls =
+          llvm::iterator_range<ft_iter>(ft_iter(templatedDecl->decls_begin()),
+                                        ft_iter(templatedDecl->decls_end()));
+      for (auto *ftd : functionTemplateDecls) {
+        auto *fd = ftd->getTemplatedDecl();
+        if (fd->getNumParams() != 1 ||
+            !fd->getParamDecl(0)->getType()->isIntegerType())
+          continue;
+
+        void *insertPos;
+        auto templateArgs = ftd->getInjectedTemplateArgs();
+        auto *functionDecl = ftd->findSpecialization(templateArgs, insertPos);
+        if (!functionDecl) {
+          DeclarationNameInfo DInfo(ftd->getDeclName(),
+                                    recordDecl->getLocation());
+          auto *templateArgList = TemplateArgumentList::CreateCopy(
+              Context, templateArgs.data(), templateArgs.size());
+          functionDecl = CXXConstructorDecl::Create(
+              Context, recordDecl, Arg->getLocStart(), DInfo, Ty, TInfo, false,
+              false, false, false);
+          functionDecl->setFunctionTemplateSpecialization(ftd, templateArgList,
+                                                          insertPos);
+        } else if (functionDecl->getDeclKind() != Decl::Kind::CXXConstructor) {
+          continue;
+        }
+
+        CanQualType argType = Arg->getType()->getCanonicalTypeUnqualified();
+        if (!Arg->isRValue()) {
+          Arg = ImpCastExprToType(Arg, argType, CK_LValueToRValue).get();
+        }
+        if (argType != Context.UnsignedLongLongTy) {
+          Arg = ImpCastExprToType(Arg, Context.UnsignedLongLongTy,
+                                  CK_IntegralCast)
+                    .get();
         }
+        return CXXConstructExpr::Create(
+            Context, Ty, TyBeginLoc, cast<CXXConstructorDecl>(functionDecl),
+            false, {Arg}, false, false, false, false,
+            CXXConstructExpr::ConstructionKind::CK_Complete,
+            SourceRange(LParenLoc, RParenLoc));
       }
     }
 #endif
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl
new file mode 100644
index 0000000000..b44e1eca09
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl
@@ -0,0 +1,46 @@
+// RUN: %dxc -spirv -Od -T cs_6_7 %s | FileCheck %s
+// RUN: %dxc -spirv -Od -T cs_6_7 -DALIGN_16 %s | FileCheck %s
+// RUN: %dxc -spirv -Od -T cs_6_7 -DNO_PC %s | FileCheck %s
+
+// Was getting bogus type errors with the defined changes
+
+#ifdef ALIGN_16
+typedef vk::BufferPointer<uint, 16> BufferType;
+#else
+typedef vk::BufferPointer<uint, 32> BufferType;
+#endif
+#ifndef NO_PC
+struct PushConstantStruct {
+  BufferType push_buffer;
+};
+[[vk::push_constant]] PushConstantStruct push_constant;
+#endif
+
+RWStructuredBuffer<uint> output;
+
+// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0
+// CHECK: [[UINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 0
+// CHECK: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0
+// CHECK: [[PPUINT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[UINT]]
+// CHECK: [[PFPPUINT:%[_0-9A-Za-z]*]] = OpTypePointer Function [[PPUINT]]
+// CHECK: [[PUUINT:%[_0-9A-Za-z]*]] = OpTypePointer Uniform [[UINT]]
+// CHECK: [[OUTPUT:%[_0-9A-Za-z]*]] = OpVariable %{{[_0-9A-Za-z]*}} Uniform
+
+[numthreads(1, 1, 1)]
+void main() {
+  uint64_t addr = 123;
+  vk::BufferPointer<uint, 32> test = vk::BufferPointer<uint, 32>(addr);
+  output[0] = test.Get();
+}
+
+// CHECK: [[TEST:%[_0-9A-Za-z]*]] = OpVariable [[PFPPUINT]] Function
+// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpConvertUToPtr [[PPUINT]]
+// CHECK: OpStore [[TEST]] [[X1]]
+// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PPUINT]] [[TEST]] Aligned 32
+// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[UINT]] [[X2]] Aligned 4
+// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpAccessChain [[PUUINT]] [[OUTPUT]] [[I0]] [[U0]]
+// CHECK: OpStore [[X4]] [[X3]]
+// CHECK: OpReturn
+// CHECK: OpFunctionEnd
+

From ea3d8466d807fccbee1a3dc16d4b15bafd12d4fe Mon Sep 17 00:00:00 2001
From: Simon Moll <smoll@nvidia.com>
Date: Tue, 15 Apr 2025 22:30:11 +0200
Subject: [PATCH 84/88] [SER] Declare all SER HLSL intrinsics (#7347)

Simplify merging the SER lowering PRs by declaring all missing SER HLSL
intrinsics up front. This reserves stable HLSL opcodes similar to what
was done for the DXIL opcodes before.

Specification:
https://github.com/microsoft/hlsl-specs/blob/main/proposals/0027-shader-execution-reordering.md
DXC SER implementation tracker: #7214
---
 include/dxc/HlslIntrinsicOp.h         |  29 +++++-
 include/dxc/dxcapi.internal.h         |   7 +-
 lib/HLSL/HLOperationLower.cpp         | 128 ++++++++++++++++++++++++++
 tools/clang/lib/Sema/SemaHLSL.cpp     |  10 +-
 utils/hct/gen_intrin_main.txt         |  28 ++++++
 utils/hct/hctdb.py                    |   1 +
 utils/hct/hlsl_intrinsic_opcodes.json |  31 ++++++-
 7 files changed, 225 insertions(+), 9 deletions(-)

diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h
index 68b88822e8..d37c27a38e 100644
--- a/include/dxc/HlslIntrinsicOp.h
+++ b/include/dxc/HlslIntrinsicOp.h
@@ -336,7 +336,34 @@ enum class IntrinsicOp {
   MOP_TraceRayInline = 325,
   MOP_WorldRayDirection = 326,
   MOP_WorldRayOrigin = 327,
+  MOP_DxHitObject_FromRayQuery = 363,
+  MOP_DxHitObject_GetAttributes = 364,
+  MOP_DxHitObject_GetGeometryIndex = 365,
+  MOP_DxHitObject_GetHitKind = 366,
+  MOP_DxHitObject_GetInstanceID = 367,
+  MOP_DxHitObject_GetInstanceIndex = 368,
+  MOP_DxHitObject_GetObjectRayDirection = 369,
+  MOP_DxHitObject_GetObjectRayOrigin = 370,
+  MOP_DxHitObject_GetObjectToWorld3x4 = 371,
+  MOP_DxHitObject_GetObjectToWorld4x3 = 372,
+  MOP_DxHitObject_GetPrimitiveIndex = 373,
+  MOP_DxHitObject_GetRayFlags = 374,
+  MOP_DxHitObject_GetRayTCurrent = 375,
+  MOP_DxHitObject_GetRayTMin = 376,
+  MOP_DxHitObject_GetShaderTableIndex = 377,
+  MOP_DxHitObject_GetWorldRayDirection = 378,
+  MOP_DxHitObject_GetWorldRayOrigin = 379,
+  MOP_DxHitObject_GetWorldToObject3x4 = 380,
+  MOP_DxHitObject_GetWorldToObject4x3 = 381,
+  MOP_DxHitObject_Invoke = 382,
+  MOP_DxHitObject_IsHit = 383,
+  MOP_DxHitObject_IsMiss = 384,
+  MOP_DxHitObject_IsNop = 385,
+  MOP_DxHitObject_LoadLocalRootTableConstant = 386,
+  MOP_DxHitObject_MakeMiss = 387,
   MOP_DxHitObject_MakeNop = 358,
+  MOP_DxHitObject_SetShaderTableIndex = 388,
+  MOP_DxHitObject_TraceRay = 389,
   IOP_DxMaybeReorderThread = 359,
   MOP_Count = 328,
   MOP_FinishedCrossGroupSharing = 329,
@@ -369,7 +396,7 @@ enum class IntrinsicOp {
   IOP_usign = 355,
   MOP_InterlockedUMax = 356,
   MOP_InterlockedUMin = 357,
-  Num_Intrinsics = 363,
+  Num_Intrinsics = 390,
 };
 inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) {
   switch (opcode) {
diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h
index d37054194b..28bd3e7066 100644
--- a/include/dxc/dxcapi.internal.h
+++ b/include/dxc/dxcapi.internal.h
@@ -131,12 +131,13 @@ enum LEGAL_INTRINSIC_COMPTYPES {
   LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS = 50,
 
   LICOMPTYPE_HIT_OBJECT = 51,
+  LICOMPTYPE_RAY_QUERY = 52,
 
 #ifdef ENABLE_SPIRV_CODEGEN
-  LICOMPTYPE_VK_BUFFER_POINTER = 52,
-  LICOMPTYPE_COUNT = 53
+  LICOMPTYPE_VK_BUFFER_POINTER = 53,
+  LICOMPTYPE_COUNT = 54
 #else
-  LICOMPTYPE_COUNT = 52
+  LICOMPTYPE_COUNT = 53
 #endif
 };
 
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index c0f9d7fddd..b5114fa34b 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -6197,6 +6197,77 @@ Value *TranslateMaybeReorderThread(CallInst *CI, IntrinsicOp IOP,
                                    bool &Translated) {
   return nullptr; // TODO: Merge SER DXIL patches
 }
+
+Value *TranslateHitObjectFromRayQuery(CallInst *CI, IntrinsicOp IOP,
+                                      OP::OpCode OpCode,
+                                      HLOperationLowerHelper &Helper,
+                                      HLObjectOperationLowerHelper *pObjHelper,
+                                      bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP,
+                                  OP::OpCode OpCode,
+                                  HLOperationLowerHelper &Helper,
+                                  HLObjectOperationLowerHelper *pObjHelper,
+                                  bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectInvoke(CallInst *CI, IntrinsicOp IOP,
+                                OP::OpCode OpCode,
+                                HLOperationLowerHelper &Helper,
+                                HLObjectOperationLowerHelper *pObjHelper,
+                                bool &Translated) {
+  return nullptr; // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectGetAttributes(CallInst *CI, IntrinsicOp IOP,
+                                       OP::OpCode OpCode,
+                                       HLOperationLowerHelper &Helper,
+                                       HLObjectOperationLowerHelper *pObjHelper,
+                                       bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectScalarGetter(CallInst *CI, IntrinsicOp IOP,
+                                      OP::OpCode OpCode,
+                                      HLOperationLowerHelper &Helper,
+                                      HLObjectOperationLowerHelper *pObjHelper,
+                                      bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectVectorGetter(CallInst *CI, IntrinsicOp IOP,
+                                      OP::OpCode OpCode,
+                                      HLOperationLowerHelper &Helper,
+                                      HLObjectOperationLowerHelper *pObjHelper,
+                                      bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectMatrixGetter(CallInst *CI, IntrinsicOp IOP,
+                                      OP::OpCode OpCode,
+                                      HLOperationLowerHelper &Helper,
+                                      HLObjectOperationLowerHelper *pObjHelper,
+                                      bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectLoadLocalRootTableConstant(
+    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
+    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper,
+    bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectSetShaderTableIndex(
+    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
+    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper,
+    bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
 } // namespace
 
 // Resource Handle.
@@ -6908,6 +6979,63 @@ IntrinsicLower gLowerTable[] = {
      DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_GetBufferContents, UnsupportedVulkanIntrinsic,
      DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_DxHitObject_FromRayQuery, TranslateHitObjectFromRayQuery,
+     DXIL::OpCode::HitObject_FromRayQuery},
+    {IntrinsicOp::MOP_DxHitObject_GetAttributes,
+     TranslateHitObjectGetAttributes, DXIL::OpCode::HitObject_Attributes},
+    {IntrinsicOp::MOP_DxHitObject_GetGeometryIndex,
+     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_GeometryIndex},
+    {IntrinsicOp::MOP_DxHitObject_GetHitKind, TranslateHitObjectScalarGetter,
+     DXIL::OpCode::HitObject_HitKind},
+    {IntrinsicOp::MOP_DxHitObject_GetInstanceID, TranslateHitObjectScalarGetter,
+     DXIL::OpCode::HitObject_InstanceID},
+    {IntrinsicOp::MOP_DxHitObject_GetInstanceIndex,
+     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_InstanceIndex},
+    {IntrinsicOp::MOP_DxHitObject_GetObjectRayDirection,
+     TranslateHitObjectVectorGetter,
+     DXIL::OpCode::HitObject_ObjectRayDirection},
+    {IntrinsicOp::MOP_DxHitObject_GetObjectRayOrigin,
+     TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_ObjectRayOrigin},
+    {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld3x4,
+     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4},
+    {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld4x3,
+     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4},
+    {IntrinsicOp::MOP_DxHitObject_GetPrimitiveIndex,
+     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_PrimitiveIndex},
+    {IntrinsicOp::MOP_DxHitObject_GetRayFlags, TranslateHitObjectScalarGetter,
+     DXIL::OpCode::HitObject_RayFlags},
+    {IntrinsicOp::MOP_DxHitObject_GetRayTCurrent,
+     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_RayTCurrent},
+    {IntrinsicOp::MOP_DxHitObject_GetRayTMin, TranslateHitObjectScalarGetter,
+     DXIL::OpCode::HitObject_RayTMin},
+    {IntrinsicOp::MOP_DxHitObject_GetShaderTableIndex,
+     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_ShaderTableIndex},
+    {IntrinsicOp::MOP_DxHitObject_GetWorldRayDirection,
+     TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayDirection},
+    {IntrinsicOp::MOP_DxHitObject_GetWorldRayOrigin,
+     TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayOrigin},
+    {IntrinsicOp::MOP_DxHitObject_GetWorldToObject3x4,
+     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4},
+    {IntrinsicOp::MOP_DxHitObject_GetWorldToObject4x3,
+     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4},
+    {IntrinsicOp::MOP_DxHitObject_Invoke, TranslateHitObjectInvoke,
+     DXIL::OpCode::HitObject_Invoke},
+    {IntrinsicOp::MOP_DxHitObject_IsHit, TranslateHitObjectScalarGetter,
+     DXIL::OpCode::HitObject_IsHit},
+    {IntrinsicOp::MOP_DxHitObject_IsMiss, TranslateHitObjectScalarGetter,
+     DXIL::OpCode::HitObject_IsMiss},
+    {IntrinsicOp::MOP_DxHitObject_IsNop, TranslateHitObjectScalarGetter,
+     DXIL::OpCode::HitObject_IsNop},
+    {IntrinsicOp::MOP_DxHitObject_LoadLocalRootTableConstant,
+     TranslateHitObjectLoadLocalRootTableConstant,
+     DXIL::OpCode::HitObject_LoadLocalRootTableConstant},
+    {IntrinsicOp::MOP_DxHitObject_MakeMiss, TranslateHitObjectMake,
+     DXIL::OpCode::HitObject_MakeMiss},
+    {IntrinsicOp::MOP_DxHitObject_SetShaderTableIndex,
+     TranslateHitObjectSetShaderTableIndex,
+     DXIL::OpCode::HitObject_SetShaderTableIndex},
+    {IntrinsicOp::MOP_DxHitObject_TraceRay, TranslateHitObjectTraceRay,
+     DXIL::OpCode::HitObject_TraceRay},
 };
 } // namespace
 static_assert(
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 5236a1e3c4..230c7e65d9 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -580,9 +580,9 @@ const UINT g_uBasicKindProps[] = {
     0, // AR_OBJECT_PROCEDURAL_PRIMITIVE_HIT_GROUP,
     0, // AR_OBJECT_RAYTRACING_PIPELINE_CONFIG1,
 
-    BPROP_OBJECT, // AR_OBJECT_RAY_QUERY,
-    BPROP_OBJECT, // AR_OBJECT_HEAP_RESOURCE,
-    BPROP_OBJECT, // AR_OBJECT_HEAP_SAMPLER,
+    LICOMPTYPE_RAY_QUERY, // AR_OBJECT_RAY_QUERY,
+    BPROP_OBJECT,         // AR_OBJECT_HEAP_RESOURCE,
+    BPROP_OBJECT,         // AR_OBJECT_HEAP_SAMPLER,
 
     BPROP_OBJECT | BPROP_RWBUFFER | BPROP_TEXTURE, // AR_OBJECT_RWTEXTURE2DMS
     BPROP_OBJECT | BPROP_RWBUFFER |
@@ -1135,6 +1135,9 @@ static const ArBasicKind g_ResourceCT[] = {AR_OBJECT_HEAP_RESOURCE,
 
 static const ArBasicKind g_RayDescCT[] = {AR_OBJECT_RAY_DESC, AR_BASIC_UNKNOWN};
 
+static const ArBasicKind g_RayQueryCT[] = {AR_OBJECT_RAY_QUERY,
+                                           AR_BASIC_UNKNOWN};
+
 static const ArBasicKind g_AccelerationStructCT[] = {
     AR_OBJECT_ACCELERATION_STRUCT, AR_BASIC_UNKNOWN};
 
@@ -1297,6 +1300,7 @@ const ArBasicKind *g_LegalIntrinsicCompTypes[] = {
     g_GroupNodeOutputRecordsCT,  // LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS
     g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS
     g_DxHitObjectCT,             // LICOMPTYPE_HIT_OBJECT
+    g_RayQueryCT,                // LICOMPTYPE_RAY_QUERY
 #ifdef ENABLE_SPIRV_CODEGEN
     g_VKBufferPointerCT, // LICOMPTYPE_VK_BUFFER_POINTER
 #endif
diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt
index 55c3643d95..f1274fd308 100644
--- a/utils/hct/gen_intrin_main.txt
+++ b/utils/hct/gen_intrin_main.txt
@@ -1101,6 +1101,34 @@ uint [[ro]] CommittedInstanceContributionToHitGroupIndex();
 // Shader Execution Reordering
 namespace DxHitObjectMethods {
     DxHitObject [[static,class_prefix,min_sm=6.9]] MakeNop();
+    DxHitObject [[static,class_prefix,min_sm=6.9]] MakeMiss(in uint RayFlags, in uint MissShaderIndex, in ray_desc Ray);
+    DxHitObject [[static,class_prefix,min_sm=6.9]] FromRayQuery(in RayQuery rq);
+    DxHitObject [[static,class_prefix,min_sm=6.9]] FromRayQuery(in RayQuery rq, in uint HitKind, in udt Attributes);
+    DxHitObject [[static,class_prefix,min_sm=6.9]] TraceRay(in acceleration_struct AccelerationStructure, in uint RayFlags, in uint InstanceInclusionMask, in uint RayContributionToHitGroupIndex, in uint MultiplierForGeometryContributionToHitGroupIndex, in uint MissShaderIndex, in ray_desc Ray, inout udt Payload);
+    void [[static,class_prefix,min_sm=6.9]] Invoke(in DxHitObject ho, inout udt Payload);
+    bool [[rn,class_prefix,min_sm=6.9]] IsMiss();
+    bool [[rn,class_prefix,min_sm=6.9]] IsHit();
+    bool [[rn,class_prefix,min_sm=6.9]] IsNop();
+    uint [[rn,class_prefix,min_sm=6.9]] GetRayFlags();
+    float [[rn,class_prefix,min_sm=6.9]] GetRayTMin();
+    float [[rn,class_prefix,min_sm=6.9]] GetRayTCurrent();
+    float<3> [[rn,class_prefix,min_sm=6.9]] GetWorldRayOrigin();
+    float<3> [[rn,class_prefix,min_sm=6.9]] GetWorldRayDirection();
+    float<3> [[rn,class_prefix,min_sm=6.9]] GetObjectRayOrigin();
+    float<3> [[rn,class_prefix,min_sm=6.9]] GetObjectRayDirection();
+    float<3,4> [[rn,class_prefix,min_sm=6.9]] GetObjectToWorld3x4();
+    float<4,3> [[rn,class_prefix,min_sm=6.9]] GetObjectToWorld4x3();
+    float<3,4> [[rn,class_prefix,min_sm=6.9]] GetWorldToObject3x4();
+    float<4,3> [[rn,class_prefix,min_sm=6.9]] GetWorldToObject4x3();
+    uint [[rn,class_prefix,min_sm=6.9]] GetGeometryIndex();
+    uint [[rn,class_prefix,min_sm=6.9]] GetInstanceIndex();
+    uint [[rn,class_prefix,min_sm=6.9]] GetInstanceID();
+    uint [[rn,class_prefix,min_sm=6.9]] GetPrimitiveIndex();
+    uint [[rn,class_prefix,min_sm=6.9]] GetHitKind();
+    uint [[rn,class_prefix,min_sm=6.9]] GetShaderTableIndex();
+    $funcT [[class_prefix,min_sm=6.9]] GetAttributes();
+    void [[class_prefix,min_sm=6.9]] SetShaderTableIndex(in uint RecordIndex);
+    uint [[ro,class_prefix,min_sm=6.9]] LoadLocalRootTableConstant(in uint RootConstantOffsetInBytes);
 } namespace
 
 namespace DxIntrinsics {
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 9b2f33727a..6344fb5849 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -9183,6 +9183,7 @@ def __init__(self, intrinsic_defs, opcode_data):
             "ThreadNodeOutputRecords": "LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS",
             "DxHitObject": "LICOMPTYPE_HIT_OBJECT",
             "VkBufferPointer": "LICOMPTYPE_VK_BUFFER_POINTER",
+            "RayQuery": "LICOMPTYPE_RAY_QUERY",
         }
 
         self.trans_rowcol = {"r": "IA_R", "c": "IA_C", "r2": "IA_R2", "c2": "IA_C2"}
diff --git a/utils/hct/hlsl_intrinsic_opcodes.json b/utils/hct/hlsl_intrinsic_opcodes.json
index c4527277cd..d99b84b745 100644
--- a/utils/hct/hlsl_intrinsic_opcodes.json
+++ b/utils/hct/hlsl_intrinsic_opcodes.json
@@ -1,6 +1,6 @@
 {
   "IntrinsicOpCodes": {
-    "Num_Intrinsics": 363,
+    "Num_Intrinsics": 390,
     "IOP_AcceptHitAndEndSearch": 0,
     "IOP_AddUint64": 1,
     "IOP_AllMemoryBarrier": 2,
@@ -363,6 +363,33 @@
     "IOP_DxMaybeReorderThread": 359,
     "IOP_Vkreinterpret_pointer_cast": 360,
     "IOP_Vkstatic_pointer_cast": 361,
-    "MOP_GetBufferContents": 362
+    "MOP_GetBufferContents": 362,
+    "MOP_DxHitObject_FromRayQuery": 363,
+    "MOP_DxHitObject_GetAttributes": 364,
+    "MOP_DxHitObject_GetGeometryIndex": 365,
+    "MOP_DxHitObject_GetHitKind": 366,
+    "MOP_DxHitObject_GetInstanceID": 367,
+    "MOP_DxHitObject_GetInstanceIndex": 368,
+    "MOP_DxHitObject_GetObjectRayDirection": 369,
+    "MOP_DxHitObject_GetObjectRayOrigin": 370,
+    "MOP_DxHitObject_GetObjectToWorld3x4": 371,
+    "MOP_DxHitObject_GetObjectToWorld4x3": 372,
+    "MOP_DxHitObject_GetPrimitiveIndex": 373,
+    "MOP_DxHitObject_GetRayFlags": 374,
+    "MOP_DxHitObject_GetRayTCurrent": 375,
+    "MOP_DxHitObject_GetRayTMin": 376,
+    "MOP_DxHitObject_GetShaderTableIndex": 377,
+    "MOP_DxHitObject_GetWorldRayDirection": 378,
+    "MOP_DxHitObject_GetWorldRayOrigin": 379,
+    "MOP_DxHitObject_GetWorldToObject3x4": 380,
+    "MOP_DxHitObject_GetWorldToObject4x3": 381,
+    "MOP_DxHitObject_Invoke": 382,
+    "MOP_DxHitObject_IsHit": 383,
+    "MOP_DxHitObject_IsMiss": 384,
+    "MOP_DxHitObject_IsNop": 385,
+    "MOP_DxHitObject_LoadLocalRootTableConstant": 386,
+    "MOP_DxHitObject_MakeMiss": 387,
+    "MOP_DxHitObject_SetShaderTableIndex": 388,
+    "MOP_DxHitObject_TraceRay": 389
   }
 }

From 5f18e2bac0833412ca07637a98d445a84f7d30e2 Mon Sep 17 00:00:00 2001
From: Tex Riddell <texr@microsoft.com>
Date: Tue, 15 Apr 2025 15:43:10 -0700
Subject: [PATCH 85/88] Add HctGen of DXIL.rst back to build without
 LLVM_BUILD_DOCS required (#7346)

HctGen of DXIL.rst should happen on every ordinary build, and be updated
with other HctGen modified files.

This isn't about building the doc, it's about updating it when
definitions change in hctdb.py.

We've been missing updates to DXIL.rst for quite a while due to this
issue, introduced
[here](https://github.com/microsoft/DirectXShaderCompiler/pull/6715/files#diff-1e7de1ae2d059d21e1dd75d5812d5a34b0222cef273b7c3a2af62eb747f9d20aR768-R770).

This also brings DXIL.rst up to date.
---
 CMakeLists.txt |   4 +-
 docs/DXIL.rst  | 698 +++++++++++++++++++++++++++----------------------
 2 files changed, 380 insertions(+), 322 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0977fa1246..5210718005 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -762,9 +762,7 @@ if (LLVM_INCLUDE_DOCS)
   add_subdirectory(docs)
 endif()
 
-if (LLVM_BUILD_DOCS)
-  add_hlsl_hctgen(DxilDocs OUTPUT docs/DXIL.rst CODE_TAG) # HLSL Change
-endif()
+add_hlsl_hctgen(DxilDocs OUTPUT docs/DXIL.rst CODE_TAG) # HLSL Change
 
 add_subdirectory(cmake/modules)
 
diff --git a/docs/DXIL.rst b/docs/DXIL.rst
index a68e31d0a9..a1c5055085 100644
--- a/docs/DXIL.rst
+++ b/docs/DXIL.rst
@@ -1984,54 +1984,57 @@ The following LLVM instructions are valid in a DXIL program, with the specified
 .. <py::lines('INSTR-RST')>hctdb_instrhelp.get_instrs_rst()</py>
 .. INSTR-RST:BEGIN
 
-============= ======================================================================= =================
-Instruction   Action                                                                  Operand overloads
-============= ======================================================================= =================
-Ret           returns a value (possibly void), from a function.                       vhfd1wil
-Br            branches (conditional or unconditional)
-Switch        performs a multiway switch
-Add           returns the sum of its two operands                                     wil
-FAdd          returns the sum of its two operands                                     hfd
-Sub           returns the difference of its two operands                              wil
-FSub          returns the difference of its two operands                              hfd
-Mul           returns the product of its two operands                                 wil
-FMul          returns the product of its two operands                                 hfd
-UDiv          returns the quotient of its two unsigned operands                       wil
-SDiv          returns the quotient of its two signed operands                         wil
-FDiv          returns the quotient of its two operands                                hfd
-URem          returns the remainder from the unsigned division of its two operands    wil
-SRem          returns the remainder from the signed division of its two operands      wil
-FRem          returns the remainder from the division of its two operands             hfd
-Shl           shifts left (logical)                                                   wil
-LShr          shifts right (logical), with zero bit fill                              wil
-AShr          shifts right (arithmetic), with 'a' operand sign bit fill               wil
-And           returns a  bitwise logical and of its two operands                      1wil
-Or            returns a bitwise logical or of its two operands                        1wil
-Xor           returns a bitwise logical xor of its two operands                       1wil
-Alloca        allocates memory on the stack frame of the currently executing function
-Load          reads from memory
-Store         writes to memory
-GetElementPtr gets the address of a subelement of an aggregate value
-AtomicCmpXchg atomically modifies memory
-AtomicRMW     atomically modifies memory
-Trunc         truncates an integer                                                    1wil
-ZExt          zero extends an integer                                                 1wil
-SExt          sign extends an integer                                                 1wil
-FPToUI        converts a floating point to UInt                                       hfd1wil
-FPToSI        converts a floating point to SInt                                       hfd1wil
-UIToFP        converts a UInt to floating point                                       hfd1wil
-SIToFP        converts a SInt to floating point                                       hfd1wil
-FPTrunc       truncates a floating point                                              hfd
-FPExt         extends a floating point                                                hfd
-BitCast       performs a bit-preserving type cast                                     hfd1wil
-AddrSpaceCast casts a value addrspace
-ICmp          compares integers                                                       1wil
-FCmp          compares floating points                                                hfd
-PHI           is a PHI node instruction
-Call          calls a function
-Select        selects an instruction
-ExtractValue  extracts from aggregate
-============= ======================================================================= =================
+============== ======================================================================= =================
+Instruction    Action                                                                  Operand overloads
+============== ======================================================================= =================
+Ret            returns a value (possibly void), from a function.                       vhfd1wil
+Br             branches (conditional or unconditional)
+Switch         performs a multiway switch
+Add            returns the sum of its two operands                                     wil
+FAdd           returns the sum of its two operands                                     hfd
+Sub            returns the difference of its two operands                              wil
+FSub           returns the difference of its two operands                              hfd
+Mul            returns the product of its two operands                                 wil
+FMul           returns the product of its two operands                                 hfd
+UDiv           returns the quotient of its two unsigned operands                       wil
+SDiv           returns the quotient of its two signed operands                         wil
+FDiv           returns the quotient of its two operands                                hfd
+URem           returns the remainder from the unsigned division of its two operands    wil
+SRem           returns the remainder from the signed division of its two operands      wil
+FRem           returns the remainder from the division of its two operands             hfd
+Shl            shifts left (logical)                                                   wil
+LShr           shifts right (logical), with zero bit fill                              wil
+AShr           shifts right (arithmetic), with 'a' operand sign bit fill               wil
+And            returns a  bitwise logical and of its two operands                      1wil
+Or             returns a bitwise logical or of its two operands                        1wil
+Xor            returns a bitwise logical xor of its two operands                       1wil
+Alloca         allocates memory on the stack frame of the currently executing function
+Load           reads from memory
+Store          writes to memory
+GetElementPtr  gets the address of a subelement of an aggregate value
+AtomicCmpXchg  atomically modifies memory
+AtomicRMW      atomically modifies memory
+Trunc          truncates an integer                                                    1wil
+ZExt           zero extends an integer                                                 1wil
+SExt           sign extends an integer                                                 1wil
+FPToUI         converts a floating point to UInt                                       hfd1wil
+FPToSI         converts a floating point to SInt                                       hfd1wil
+UIToFP         converts a UInt to floating point                                       hfd1wil
+SIToFP         converts a SInt to floating point                                       hfd1wil
+FPTrunc        truncates a floating point                                              hfd
+FPExt          extends a floating point                                                hfd
+BitCast        performs a bit-preserving type cast                                     hfd1wil
+AddrSpaceCast  casts a value addrspace
+ICmp           compares integers                                                       1wil
+FCmp           compares floating points                                                hfd
+PHI            is a PHI node instruction
+Call           calls a function
+Select         selects an instruction
+ExtractElement extracts from vector
+InsertElement  inserts into vector
+ShuffleVector  Shuffle two vectors
+ExtractValue   extracts from aggregate
+============== ======================================================================= =================
 
 
 FAdd
@@ -2369,6 +2372,53 @@ ID  Name                                                  Description
 255 SampleCmpBias                                         samples a texture after applying the input bias to the mipmap level and compares a single component against the specified comparison value
 256 StartVertexLocation                                   returns the BaseVertexLocation from DrawIndexedInstanced or StartVertexLocation from DrawInstanced
 257 StartInstanceLocation                                 returns the StartInstanceLocation from Draw*Instanced
+258 AllocateRayQuery2                                     allocates space for RayQuery and return handle
+259 ReservedA0                                            reserved
+260 ReservedA1                                            reserved
+261 ReservedA2                                            reserved
+262 HitObject_TraceRay                                    Analogous to TraceRay but without invoking CH/MS and returns the intermediate state as a HitObject
+263 HitObject_FromRayQuery                                Creates a new HitObject representing a committed hit from a RayQuery
+264 HitObject_FromRayQueryWithAttrs                       Creates a new HitObject representing a committed hit from a RayQuery and committed attributes
+265 HitObject_MakeMiss                                    Creates a new HitObject representing a miss
+266 HitObject_MakeNop                                     Creates an empty nop HitObject
+267 HitObject_Invoke                                      Represents the invocation of the CH/MS shader represented by the HitObject
+268 MaybeReorderThread                                    Reorders the current thread
+269 HitObject_IsMiss                                      Returns `true` if the HitObject represents a miss
+270 HitObject_IsHit                                       Returns `true` if the HitObject is a NOP-HitObject
+271 HitObject_IsNop                                       Returns `true` if the HitObject represents a nop
+272 HitObject_RayFlags                                    Returns the ray flags set in the HitObject
+273 HitObject_RayTMin                                     Returns the TMin value set in the HitObject
+274 HitObject_RayTCurrent                                 Returns the current T value set in the HitObject
+275 HitObject_WorldRayOrigin                              Returns the ray origin in world space
+276 HitObject_WorldRayDirection                           Returns the ray direction in world space
+277 HitObject_ObjectRayOrigin                             Returns the ray origin in object space
+278 HitObject_ObjectRayDirection                          Returns the ray direction in object space
+279 HitObject_ObjectToWorld3x4                            Returns the object to world space transformation matrix in 3x4 form
+280 HitObject_WorldToObject3x4                            Returns the world to object space transformation matrix in 3x4 form
+281 HitObject_GeometryIndex                               Returns the geometry index committed on hit
+282 HitObject_InstanceIndex                               Returns the instance index committed on hit
+283 HitObject_InstanceID                                  Returns the instance id committed on hit
+284 HitObject_PrimitiveIndex                              Returns the primitive index committed on hit
+285 HitObject_HitKind                                     Returns the HitKind of the hit
+286 HitObject_ShaderTableIndex                            Returns the shader table index set for this HitObject
+287 HitObject_SetShaderTableIndex                         Returns a HitObject with updated shader table index
+288 HitObject_LoadLocalRootTableConstant                  Returns the root table constant for this HitObject and offset
+289 HitObject_Attributes                                  Returns the attributes set for this HitObject
+290 ReservedB28                                           reserved
+291 ReservedB29                                           reserved
+292 ReservedB30                                           reserved
+293 ReservedC0                                            reserved
+294 ReservedC1                                            reserved
+295 ReservedC2                                            reserved
+296 ReservedC3                                            reserved
+297 ReservedC4                                            reserved
+298 ReservedC5                                            reserved
+299 ReservedC6                                            reserved
+300 ReservedC7                                            reserved
+301 ReservedC8                                            reserved
+302 ReservedC9                                            reserved
+303 RawBufferVectorLoad                                   reads from a raw buffer and structured buffer
+304 RawBufferVectorStore                                  writes to a RWByteAddressBuffer or RWStructuredBuffer
 === ===================================================== =======================================================================================================================================================================================================================
 
 
@@ -3015,277 +3065,287 @@ The set of validation rules that are known to hold for a DXIL program is identif
 .. <py::lines('VALRULES-RST')>hctdb_instrhelp.get_valrules_rst()</py>
 .. VALRULES-RST:BEGIN
 
-========================================= ========================================================================================================================================================================================================================================================================================================
-Rule Code                                 Description
-========================================= ========================================================================================================================================================================================================================================================================================================
-BITCODE.VALID                             Module must be bitcode-valid
-CONTAINER.PARTINVALID                     DXIL Container must not contain unknown parts
-CONTAINER.PARTMATCHES                     DXIL Container Parts must match Module
-CONTAINER.PARTMISSING                     DXIL Container requires certain parts, corresponding to module
-CONTAINER.PARTREPEATED                    DXIL Container must have only one of each part type
-CONTAINER.ROOTSIGNATUREINCOMPATIBLE       Root Signature in DXIL Container must be compatible with shader
-DECL.ATTRSTRUCT                           Attributes parameter must be struct type
-DECL.DXILFNEXTERN                         External function must be a DXIL function
-DECL.DXILNSRESERVED                       The DXIL reserved prefixes must only be used by built-in functions and types
-DECL.EXTRAARGS                            Extra arguments not allowed for shader functions
-DECL.FNATTRIBUTE                          Functions should only contain known function attributes
-DECL.FNFLATTENPARAM                       Function parameters must not use struct types
-DECL.FNISCALLED                           Functions can only be used by call instructions
-DECL.MULTIPLENODEINPUTS                   A node shader may not have more than one input record
-DECL.NODELAUNCHINPUTTYPE                  Invalid input record type for node launch type
-DECL.NOTUSEDEXTERNAL                      External declaration should not be used
-DECL.PARAMSTRUCT                          Callable function parameter must be struct type
-DECL.PAYLOADSTRUCT                        Payload parameter must be struct type
-DECL.RAYQUERYINFNSIG                      Rayquery objects not allowed in function signatures
-DECL.RESOURCEINFNSIG                      Resources not allowed in function signatures
-DECL.SHADERMISSINGARG                     payload/params/attributes parameter is required for certain shader types
-DECL.SHADERRETURNVOID                     Shader functions must return void
-DECL.USEDEXTERNALFUNCTION                 External function must be used
-DECL.USEDINTERNAL                         Internal declaration must be used
-FLOW.DEADLOOP                             Loop must have break.
-FLOW.FUNCTIONCALL                         Function with parameter is not permitted
-FLOW.NORECURSION                          Recursion is not permitted.
-FLOW.REDUCIBLE                            Execution flow must be reducible.
-INSTR.ALLOWED                             Instructions must be of an allowed type.
-INSTR.ATOMICCONST                         Constant destination to atomic.
-INSTR.ATOMICINTRINNONUAV                  Non-UAV destination to atomic intrinsic.
-INSTR.ATOMICOPNONGROUPSHAREDORRECORD      Non-groupshared or node record destination to atomic operation.
-INSTR.ATTRIBUTEATVERTEXNOINTERPOLATION    Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function.
-INSTR.BARRIERFLAGINVALID                  Invalid %0 flags on DXIL operation '%1'
-INSTR.BARRIERMODEFORNONCS                 sync in a non-Compute/Amplification/Mesh/Node Shader must only sync UAV (sync_uglobal).
-INSTR.BARRIERMODENOMEMORY                 sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory).  Only _t (thread group sync) is optional.
-INSTR.BARRIERMODEUSELESSUGROUP            sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal.
-INSTR.BARRIERNONCONSTANTFLAGARGUMENT      Memory type, access, or sync flag is not constant
-INSTR.BARRIERREQUIRESNODE                 sync in a non-Node Shader must not sync node record memory.
-INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER  BufferUpdateCounter valid only when HasCounter is true.
-INSTR.BUFFERUPDATECOUNTERONUAV            BufferUpdateCounter valid only on UAV.
-INSTR.CALLOLOAD                           Call to DXIL intrinsic must match overload signature
-INSTR.CANNOTPULLPOSITION                  pull-model evaluation of position disallowed
-INSTR.CBUFFERCLASSFORCBUFFERHANDLE        Expect Cbuffer for CBufferLoad handle.
-INSTR.CBUFFEROUTOFBOUND                   Cbuffer access out of bound.
-INSTR.CHECKACCESSFULLYMAPPED              CheckAccessFullyMapped should only be used on resource status.
-INSTR.COORDINATECOUNTFORRAWTYPEDBUF       raw/typed buffer don't need 2 coordinates.
-INSTR.COORDINATECOUNTFORSTRUCTBUF         structured buffer require 2 coordinates.
-INSTR.CREATEHANDLEIMMRANGEID              Local resource must map to global resource.
-INSTR.DXILSTRUCTUSER                      Dxil struct types should only be used by ExtractValue.
-INSTR.DXILSTRUCTUSEROUTOFBOUND            Index out of bound when extract value from dxil struct types.
-INSTR.EVALINTERPOLATIONMODE               Interpolation mode on %0 used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample.
-INSTR.EXTRACTVALUE                        ExtractValue should only be used on dxil struct types and cmpxchg.
-INSTR.FAILTORESLOVETGSMPOINTER            TGSM pointers must originate from an unambiguous TGSM global variable.
-INSTR.HANDLENOTFROMCREATEHANDLE           Resource handle should returned by createHandle.
-INSTR.ILLEGALDXILOPCODE                   DXILOpCode must be [0..%0].  %1 specified.
-INSTR.ILLEGALDXILOPFUNCTION               '%0' is not a DXILOpFuncition for DXILOpcode '%1'.
-INSTR.IMMBIASFORSAMPLEB                   bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate.
-INSTR.INBOUNDSACCESS                      Access to out-of-bounds memory is disallowed.
-INSTR.MINPRECISIONNOTPRECISE              Instructions marked precise may not refer to minprecision values.
-INSTR.MINPRECISONBITCAST                  Bitcast on minprecison types is not allowed.
-INSTR.MIPLEVELFORGETDIMENSION             Use mip level on buffer when GetDimensions.
-INSTR.MIPONUAVLOAD                        uav load don't support mipLevel/sampleIndex.
-INSTR.MISSINGSETMESHOUTPUTCOUNTS          Missing SetMeshOutputCounts call.
-INSTR.MULTIPLEGETMESHPAYLOAD              GetMeshPayload cannot be called multiple times.
-INSTR.MULTIPLESETMESHOUTPUTCOUNTS         SetMeshOUtputCounts cannot be called multiple times.
-INSTR.NODERECORDHANDLEUSEAFTERCOMPLETE    Invalid use of completed record handle.
-INSTR.NOGENERICPTRADDRSPACECAST           Address space cast between pointer types must have one part to be generic address space.
-INSTR.NOIDIVBYZERO                        No signed integer division by zero.
-INSTR.NOINDEFINITEACOS                    No indefinite arccosine.
-INSTR.NOINDEFINITEASIN                    No indefinite arcsine.
-INSTR.NOINDEFINITEDSXY                    No indefinite derivative calculation.
-INSTR.NOINDEFINITELOG                     No indefinite logarithm.
-INSTR.NONDOMINATINGDISPATCHMESH           Non-Dominating DispatchMesh call.
-INSTR.NONDOMINATINGSETMESHOUTPUTCOUNTS    Non-Dominating SetMeshOutputCounts call.
-INSTR.NOREADINGUNINITIALIZED              Instructions should not read uninitialized value.
-INSTR.NOTONCEDISPATCHMESH                 DispatchMesh must be called exactly once in an Amplification shader.
-INSTR.NOUDIVBYZERO                        No unsigned integer division by zero.
-INSTR.OFFSETONUAVLOAD                     uav load don't support offset.
-INSTR.OLOAD                               DXIL intrinsic overload must be valid.
-INSTR.ONLYONEALLOCCONSUME                 RWStructuredBuffers may increment or decrement their counters, but not both.
-INSTR.OPCODERESERVED                      Instructions must not reference reserved opcodes.
-INSTR.OPCONST                             DXIL intrinsic requires an immediate constant operand
-INSTR.OPCONSTRANGE                        Constant values must be in-range for operation.
-INSTR.OPERANDRANGE                        DXIL intrinsic operand must be within defined range
-INSTR.PTRBITCAST                          Pointer type bitcast must be have same size.
-INSTR.RESOURCECLASSFORLOAD                load can only run on UAV/SRV resource.
-INSTR.RESOURCECLASSFORSAMPLERGATHER       sample, lod and gather should be on srv resource.
-INSTR.RESOURCECLASSFORUAVSTORE            store should be on uav resource.
-INSTR.RESOURCECOORDINATEMISS              coord uninitialized.
-INSTR.RESOURCECOORDINATETOOMANY           out of bound coord must be undef.
-INSTR.RESOURCEKINDFORBUFFERLOADSTORE      buffer load/store only works on Raw/Typed/StructuredBuffer.
-INSTR.RESOURCEKINDFORCALCLOD              lod requires resource declared as texture1D/2D/3D/Cube/CubeArray/1DArray/2DArray.
-INSTR.RESOURCEKINDFORGATHER               gather requires resource declared as texture/2D/Cube/2DArray/CubeArray.
-INSTR.RESOURCEKINDFORGETDIM               Invalid resource kind on GetDimensions.
-INSTR.RESOURCEKINDFORSAMPLE               sample/_l/_d requires resource declared as texture1D/2D/3D/Cube/1DArray/2DArray/CubeArray.
-INSTR.RESOURCEKINDFORSAMPLEC              samplec requires resource declared as texture1D/2D/Cube/1DArray/2DArray/CubeArray.
-INSTR.RESOURCEKINDFORTEXTURELOAD          texture load only works on Texture1D/1DArray/2D/2DArray/3D/MS2D/MS2DArray.
-INSTR.RESOURCEKINDFORTEXTURESTORE         texture store only works on Texture1D/1DArray/2D/2DArray/3D.
-INSTR.RESOURCEKINDFORTRACERAY             TraceRay should only use RTAccelerationStructure.
-INSTR.RESOURCEMAPTOSINGLEENTRY            Fail to map resource to resource table.
-INSTR.RESOURCEOFFSETMISS                  offset uninitialized.
-INSTR.RESOURCEOFFSETTOOMANY               out of bound offset must be undef.
-INSTR.RESOURCEUSER                        Resource should only be used by Load/GEP/Call.
-INSTR.SAMPLECOMPTYPE                      sample_* instructions require resource to be declared to return UNORM, SNORM or FLOAT.
-INSTR.SAMPLEINDEXFORLOAD2DMS              load on Texture2DMS/2DMSArray require sampleIndex.
-INSTR.SAMPLERMODEFORLOD                   lod instruction requires sampler declared in default mode.
-INSTR.SAMPLERMODEFORSAMPLE                sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode.
-INSTR.SAMPLERMODEFORSAMPLEC               sample_c_*/gather_c instructions require sampler declared in comparison mode.
-INSTR.SIGNATUREOPERATIONNOTINENTRY        Dxil operation for input output signature must be in entryPoints.
-INSTR.STATUS                              Resource status should only be used by CheckAccessFullyMapped.
-INSTR.STRUCTBITCAST                       Bitcast on struct types is not allowed.
-INSTR.SVCONFLICTINGLAUNCHMODE             Input system values are compatible with node shader launch mode.
-INSTR.TEXTUREOFFSET                       offset texture instructions must take offset which can resolve to integer literal in the range -8 to 7.
-INSTR.TGSMRACECOND                        Race condition writing to shared memory detected, consider making this write conditional.
-INSTR.UNDEFINEDVALUEFORUAVSTORE           Assignment of undefined values to UAV.
-INSTR.UNDEFRESULTFORGETDIMENSION          GetDimensions used undef dimension %0 on %1.
-INSTR.WRITEMASKFORTYPEDUAVSTORE           store on typed uav must write to all four components of the UAV.
-INSTR.WRITEMASKGAPFORUAV                  UAV write mask must be contiguous, starting at x: .x, .xy, .xyz, or .xyzw.
-INSTR.WRITEMASKMATCHVALUEFORUAVSTORE      uav store write mask must match store value mask, write mask is %0 and store value mask is %1.
-META.BARYCENTRICSFLOAT3                   only 'float3' type is allowed for SV_Barycentrics.
-META.BARYCENTRICSINTERPOLATION            SV_Barycentrics cannot be used with 'nointerpolation' type.
-META.BARYCENTRICSTWOPERSPECTIVES          There can only be up to two input attributes of SV_Barycentrics with different perspective interpolation mode.
-META.BRANCHFLATTEN                        Can't use branch and flatten attributes together.
-META.CLIPCULLMAXCOMPONENTS                Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components
-META.CLIPCULLMAXROWS                      Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows.
-META.COMPUTEWITHNODE                      Compute entry must not have node metadata
-META.CONTROLFLOWHINTNOTONCONTROLFLOW      Control flow hint only works on control flow inst.
-META.DENSERESIDS                          Resource identifiers must be zero-based and dense.
-META.DUPLICATESYSVALUE                    System value may only appear once in signature
-META.ENTRYFUNCTION                        entrypoint not found.
-META.FLAGSUSAGE                           Flags must match usage.
-META.FORCECASEONSWITCH                    Attribute forcecase only works for switch.
-META.GLCNOTONAPPENDCONSUME                globallycoherent cannot be used with append/consume buffers: '%0'.
-META.INTEGERINTERPMODE                    Interpolation mode on integer must be Constant
-META.INTERPMODEINONEROW                   Interpolation mode must be identical for all elements packed into the same row.
-META.INTERPMODEVALID                      Interpolation mode must be valid
-META.INVALIDCONTROLFLOWHINT               Invalid control flow hint.
-META.KNOWN                                Named metadata should be known
-META.MAXTESSFACTOR                        Hull Shader MaxTessFactor must be [%0..%1].  %2 specified.
-META.NOENTRYPROPSFORENTRY                 Entry point %0 must have entry properties.
-META.NOSEMANTICOVERLAP                    Semantics must not overlap
-META.REQUIRED                             Required metadata missing.
-META.SEMAKINDMATCHESNAME                  Semantic name must match system value, when defined.
-META.SEMAKINDVALID                        Semantic kind must be valid
-META.SEMANTICCOMPTYPE                     %0 must be %1.
-META.SEMANTICINDEXMAX                     System value semantics have a maximum valid semantic index
-META.SEMANTICLEN                          Semantic length must be at least 1 and at most 64.
-META.SEMANTICSHOULDBEALLOCATED            Semantic should have a valid packing location
-META.SEMANTICSHOULDNOTBEALLOCATED         Semantic should have a packing location of -1
-META.SIGNATURECOMPTYPE                    signature %0 specifies unrecognized or invalid component type.
-META.SIGNATUREDATAWIDTH                   Data width must be identical for all elements packed into the same row.
-META.SIGNATUREILLEGALCOMPONENTORDER       Component ordering for packed elements must be: arbitrary < system value < system generated value
-META.SIGNATUREINDEXCONFLICT               Only elements with compatible indexing rules may be packed together
-META.SIGNATUREOUTOFRANGE                  Signature elements must fit within maximum signature size
-META.SIGNATUREOVERLAP                     Signature elements may not overlap in packing location.
-META.STRUCTBUFALIGNMENT                   StructuredBuffer stride not aligned
-META.STRUCTBUFALIGNMENTOUTOFBOUND         StructuredBuffer stride out of bounds
-META.SYSTEMVALUEROWS                      System value may only have 1 row
-META.TARGET                               Target triple must be 'dxil-ms-dx'
-META.TESSELLATOROUTPUTPRIMITIVE           Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW.
-META.TESSELLATORPARTITION                 Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even.
-META.TEXTURETYPE                          elements of typed buffers and textures must fit in four 32-bit quantities.
-META.USED                                 All metadata must be used by dxil.
-META.VALIDSAMPLERMODE                     Invalid sampler mode on sampler .
-META.VALUERANGE                           Metadata value must be within range.
-META.VERSIONSUPPORTED                     Version in metadata must be supported.
-META.WELLFORMED                           Metadata must be well-formed in operand count and types.
-SM.64BITRAWBUFFERLOADSTORE                i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3.
-SM.AMPLIFICATIONSHADERPAYLOADSIZE         For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.
-SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.
-SM.APPENDANDCONSUMEONSAMEUAV              BufferUpdateCounter inc and dec on a given UAV (%d) cannot both be in the same shader for shader model less than 5.1.
-SM.CBUFFERARRAYOFFSETALIGNMENT            CBuffer array offset must be aligned to 16-bytes
-SM.CBUFFERELEMENTOVERFLOW                 CBuffer elements must not overflow
-SM.CBUFFEROFFSETOVERLAP                   CBuffer offsets must not overlap
-SM.CBUFFERSIZE                            CBuffer size must not exceed 65536 bytes
-SM.CBUFFERTEMPLATETYPEMUSTBESTRUCT        D3D12 constant/texture buffer template element can only be a struct.
-SM.COMPLETEPOSITION                       Not all elements of SV_Position were written.
-SM.CONSTANTINTERPMODE                     Interpolation mode must be constant for MS primitive output.
-SM.COUNTERONLYONSTRUCTBUF                 BufferUpdateCounter valid only on structured buffers.
-SM.CSNOSIGNATURES                         Compute shaders must not have shader signatures.
-SM.DOMAINLOCATIONIDXOOB                   DomainLocation component index out of bounds for the domain.
-SM.DSINPUTCONTROLPOINTCOUNTRANGE          DS input control point count must be [0..%0].  %1 specified.
-SM.DXILVERSION                            Target shader model requires specific Dxil Version
-SM.GSINSTANCECOUNTRANGE                   GS instance count must be [1..%0].  %1 specified.
-SM.GSOUTPUTVERTEXCOUNTRANGE               GS output vertex count must be [0..%0].  %1 specified.
-SM.GSTOTALOUTPUTVERTEXDATARANGE           Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2.  This value cannot be greater than %3.
-SM.GSVALIDINPUTPRIMITIVE                  GS input primitive unrecognized.
-SM.GSVALIDOUTPUTPRIMITIVETOPOLOGY         GS output primitive topology unrecognized.
-SM.HSINPUTCONTROLPOINTCOUNTRANGE          HS input control point count must be [0..%0].  %1 specified.
-SM.HULLPASSTHRUCONTROLPOINTCOUNTMATCH     For pass thru hull shader, input control point count must match output control point count
-SM.INCOMPATIBLECALLINENTRY                Features used in internal function calls must be compatible with entry
-SM.INCOMPATIBLEDERIVINCOMPUTESHADERMODEL  Derivatives in compute-model shaders require shader model 6.6 and above
-SM.INCOMPATIBLEDERIVLAUNCH                Node shaders only support derivatives in broadcasting launch mode
-SM.INCOMPATIBLEOPERATION                  Operations used in entry function must be compatible with shader stage and other properties
-SM.INCOMPATIBLEREQUIRESGROUP              Functions requiring groupshared memory must be called from shaders with a visible group
-SM.INCOMPATIBLESHADERMODEL                Functions may only use features available in the current shader model
-SM.INCOMPATIBLESTAGE                      Functions may only use features available in the entry function's stage
-SM.INCOMPATIBLETHREADGROUPDIM             When derivatives are used in compute-model shaders, the thread group dimensions must be compatible
-SM.INSIDETESSFACTORSIZEMATCHDOMAIN        InsideTessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
-SM.INVALIDRESOURCECOMPTYPE                Invalid resource return type.
-SM.INVALIDRESOURCEKIND                    Invalid resources kind.
-SM.INVALIDSAMPLERFEEDBACKTYPE             Invalid sampler feedback type.
-SM.INVALIDTEXTUREKINDONUAV                TextureCube[Array] resources are not supported with UAVs.
-SM.ISOLINEOUTPUTPRIMITIVEMISMATCH         Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain.
-SM.MAXMSSMSIZE                            Total Thread Group Shared Memory storage is %0, exceeded %1.
-SM.MAXTGSMSIZE                            Total Thread Group Shared Memory storage is %0, exceeded %1.
-SM.MAXTHEADGROUP                          Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1.
-SM.MESHPSIGROWCOUNT                       For shader '%0', primitive output signatures are taking up more than %1 rows.
-SM.MESHSHADERINOUTSIZE                    For shader '%0', payload plus output size is greater than %1.
-SM.MESHSHADERMAXPRIMITIVECOUNT            MS max primitive output count must be [0..%0].  %1 specified.
-SM.MESHSHADERMAXVERTEXCOUNT               MS max vertex output count must be [0..%0].  %1 specified.
-SM.MESHSHADEROUTPUTSIZE                   For shader '%0', vertex plus primitive output size is greater than %1.
-SM.MESHSHADERPAYLOADSIZE                  For mesh shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.
-SM.MESHSHADERPAYLOADSIZEDECLARED          For mesh shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.
-SM.MESHTOTALSIGROWCOUNT                   For shader '%0', vertex and primitive output signatures are taking up more than %1 rows.
-SM.MESHVSIGROWCOUNT                       For shader '%0', vertex output signatures are taking up more than %1 rows.
-SM.MULTISTREAMMUSTBEPOINT                 When multiple GS output streams are used they must be pointlists
-SM.NAME                                   Target shader model name must be known
-SM.NOINTERPMODE                           Interpolation mode must be undefined for VS input/PS output/patch constant.
-SM.NOPSOUTPUTIDX                          Pixel shader output registers are not indexable.
-SM.OPCODE                                 Opcode must be defined in target shader model
-SM.OPCODEININVALIDFUNCTION                Invalid DXIL opcode usage like StorePatchConstant in patch constant function
-SM.OPERAND                                Operand must be defined in target shader model.
-SM.OUTPUTCONTROLPOINTCOUNTRANGE           output control point count must be [%0..%1].  %2 specified.
-SM.OUTPUTCONTROLPOINTSTOTALSCALARS        Total number of scalars across all HS output control points must not exceed .
-SM.PATCHCONSTANTONLYFORHSDS               patch constant signature only valid in HS and DS.
-SM.PSCONSISTENTINTERP                     Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample).
-SM.PSCOVERAGEANDINNERCOVERAGE             InnerCoverage and Coverage are mutually exclusive.
-SM.PSMULTIPLEDEPTHSEMANTIC                Pixel Shader only allows one type of depth semantic to be declared.
-SM.PSOUTPUTSEMANTIC                       Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found.
-SM.PSTARGETCOL0                           SV_Target packed location must start at column 0.
-SM.PSTARGETINDEXMATCHESROW                SV_Target semantic index must match packed row location.
-SM.RAYSHADERPAYLOADSIZE                   For shader '%0', %1 size is smaller than argument's allocation size.
-SM.RAYSHADERSIGNATURES                    Ray tracing shader '%0' should not have any shader signatures.
-SM.RESOURCERANGEOVERLAP                   Resource ranges must not overlap
-SM.ROVONLYINPS                            RasterizerOrdered objects are only allowed in 5.0+ pixel shaders.
-SM.SAMPLECOUNTONLYON2DMS                  Only Texture2DMS/2DMSArray could has sample count.
-SM.SEMANTIC                               Semantic must be defined in target shader model
-SM.STREAMINDEXRANGE                       Stream index (%0) must between 0 and %1.
-SM.TESSFACTORFORDOMAIN                    Required TessFactor for domain not found declared anywhere in Patch Constant data.
-SM.TESSFACTORSIZEMATCHDOMAIN              TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
-SM.TGSMUNSUPPORTED                        Thread Group Shared Memory not supported %0.
-SM.THREADGROUPCHANNELRANGE                Declared Thread Group %0 size %1 outside valid range [%2..%3].
-SM.TRIOUTPUTPRIMITIVEMISMATCH             Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain.
-SM.UNDEFINEDOUTPUT                        Not all elements of output %0 were written.
-SM.VALIDDOMAIN                            Invalid Tessellator Domain specified. Must be isoline, tri or quad.
-SM.VIEWIDNEEDSSLOT                        ViewID requires compatible space in pixel shader input signature
-SM.WAVESIZEALLZEROWHENUNDEFINED           WaveSize Max and Preferred must be 0 when Min is 0
-SM.WAVESIZEEXPECTSONEPARAM                WaveSize tag expects exactly 1 parameter.
-SM.WAVESIZEMAXANDPREFERREDZEROWHENNORANGE WaveSize Max and Preferred must be 0 to encode min==max
-SM.WAVESIZEMAXGREATERTHANMIN              WaveSize Max must greater than Min
-SM.WAVESIZENEEDSCONSTANTOPERANDS          WaveSize metadata operands must be constant values.
-SM.WAVESIZENEEDSSM66OR67                  WaveSize is valid only for Shader Model 6.6 and 6.7.
-SM.WAVESIZEONCOMPUTEORNODE                WaveSize only allowed on compute or node shaders
-SM.WAVESIZEPREFERREDINRANGE               WaveSize Preferred must be within Min..Max range
-SM.WAVESIZERANGEEXPECTSTHREEPARAMS        WaveSize Range tag expects exactly 3 parameters.
-SM.WAVESIZERANGENEEDSSM68PLUS             WaveSize Range is valid only for Shader Model 6.8 and higher.
-SM.WAVESIZETAGDUPLICATE                   WaveSize or WaveSizeRange tag may only appear once per entry point.
-SM.WAVESIZEVALUE                          WaveSize value must be a power of 2 in range [4..128]
-SM.ZEROHSINPUTCONTROLPOINTWITHINPUT       When HS input control point count is 0, no input signature should exist.
-TYPES.DEFINED                             Type must be defined based on DXIL primitives
-TYPES.I8                                  I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics.
-TYPES.INTWIDTH                            Int type must be of valid width
-TYPES.NOMULTIDIM                          Only one dimension allowed for array type.
-TYPES.NOPTRTOPTR                          Pointers to pointers, or pointers in structures are not allowed.
-TYPES.NOVECTOR                            Vector types must not be present
-========================================= ========================================================================================================================================================================================================================================================================================================
+===================================================== ========================================================================================================================================================================================================================================================================================================
+Rule Code                                             Description
+===================================================== ========================================================================================================================================================================================================================================================================================================
+BITCODE.VALID                                         Module must be bitcode-valid
+CONTAINER.CONTENTINVALID                              DXIL Container Content is well-formed
+CONTAINER.CONTENTMATCHES                              DXIL Container Content must match Module
+CONTAINER.PARTINVALID                                 DXIL Container must not contain unknown parts
+CONTAINER.PARTMATCHES                                 DXIL Container Parts must match Module
+CONTAINER.PARTMISSING                                 DXIL Container requires certain parts, corresponding to module
+CONTAINER.PARTREPEATED                                DXIL Container must have only one of each part type
+CONTAINER.ROOTSIGNATUREINCOMPATIBLE                   Root Signature in DXIL Container must be compatible with shader
+CONTAINER.UNUSEDITEMINTABLE                           Items in Table must be used
+DECL.ALLOCATERAYQUERY2FLAGSARECONST                   constRayFlags and RayQueryFlags for AllocateRayQuery2 must be constant
+DECL.ALLOCATERAYQUERYFLAGSARECONST                    RayFlags for AllocateRayQuery must be constant
+DECL.ALLOWOPACITYMICROMAPSEXPECTEDGIVENFORCEOMM2STATE When the ForceOMM2State ConstRayFlag is given as an argument to a RayQuery object, AllowOpacityMicromaps is expected as a RayQueryFlag argument
+DECL.ATTRSTRUCT                                       Attributes parameter must be struct type
+DECL.DXILFNEXTERN                                     External function must be a DXIL function
+DECL.DXILNSRESERVED                                   The DXIL reserved prefixes must only be used by built-in functions and types
+DECL.EXTRAARGS                                        Extra arguments not allowed for shader functions
+DECL.FNATTRIBUTE                                      Functions should only contain known function attributes
+DECL.FNFLATTENPARAM                                   Function parameters must not use struct types
+DECL.FNISCALLED                                       Functions can only be used by call instructions
+DECL.MULTIPLENODEINPUTS                               A node shader may not have more than one input record
+DECL.NODELAUNCHINPUTTYPE                              Invalid input record type for node launch type
+DECL.NOTUSEDEXTERNAL                                  External declaration should not be used
+DECL.PARAMSTRUCT                                      Callable function parameter must be struct type
+DECL.PAYLOADSTRUCT                                    Payload parameter must be struct type
+DECL.RAYQUERYINFNSIG                                  Rayquery objects not allowed in function signatures
+DECL.RESOURCEINFNSIG                                  Resources not allowed in function signatures
+DECL.SHADERMISSINGARG                                 payload/params/attributes parameter is required for certain shader types
+DECL.SHADERRETURNVOID                                 Shader functions must return void
+DECL.USEDEXTERNALFUNCTION                             External function must be used
+DECL.USEDINTERNAL                                     Internal declaration must be used
+FLOW.DEADLOOP                                         Loop must have break.
+FLOW.FUNCTIONCALL                                     Function with parameter is not permitted
+FLOW.NORECURSION                                      Recursion is not permitted.
+FLOW.REDUCIBLE                                        Execution flow must be reducible.
+INSTR.ALLOWED                                         Instructions must be of an allowed type.
+INSTR.ATOMICCONST                                     Constant destination to atomic.
+INSTR.ATOMICINTRINNONUAV                              Non-UAV destination to atomic intrinsic.
+INSTR.ATOMICOPNONGROUPSHAREDORRECORD                  Non-groupshared or node record destination to atomic operation.
+INSTR.ATTRIBUTEATVERTEXNOINTERPOLATION                Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function.
+INSTR.BARRIERFLAGINVALID                              Invalid %0 flags on DXIL operation '%1'
+INSTR.BARRIERMODEFORNONCS                             sync in a non-Compute/Amplification/Mesh/Node Shader must only sync UAV (sync_uglobal).
+INSTR.BARRIERMODENOMEMORY                             sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory).  Only _t (thread group sync) is optional.
+INSTR.BARRIERMODEUSELESSUGROUP                        sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal.
+INSTR.BARRIERNONCONSTANTFLAGARGUMENT                  Memory type, access, or sync flag is not constant
+INSTR.BARRIERREQUIRESNODE                             sync in a non-Node Shader must not sync node record memory.
+INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER              BufferUpdateCounter valid only when HasCounter is true.
+INSTR.BUFFERUPDATECOUNTERONUAV                        BufferUpdateCounter valid only on UAV.
+INSTR.CALLOLOAD                                       Call to DXIL intrinsic must match overload signature
+INSTR.CANNOTPULLPOSITION                              pull-model evaluation of position disallowed
+INSTR.CBUFFERCLASSFORCBUFFERHANDLE                    Expect Cbuffer for CBufferLoad handle.
+INSTR.CBUFFEROUTOFBOUND                               Cbuffer access out of bound.
+INSTR.CHECKACCESSFULLYMAPPED                          CheckAccessFullyMapped should only be used on resource status.
+INSTR.CONSTALIGNFORRAWBUF                             Raw Buffer alignment value must be a constant.
+INSTR.COORDINATECOUNTFORRAWTYPEDBUF                   raw/typed buffer offset must be undef.
+INSTR.COORDINATECOUNTFORSTRUCTBUF                     structured buffer requires defined index and offset coordinates.
+INSTR.CREATEHANDLEIMMRANGEID                          Local resource must map to global resource.
+INSTR.DXILSTRUCTUSER                                  Dxil struct types should only be used by ExtractValue.
+INSTR.DXILSTRUCTUSEROUTOFBOUND                        Index out of bound when extract value from dxil struct types.
+INSTR.EVALINTERPOLATIONMODE                           Interpolation mode on %0 used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample.
+INSTR.EXTRACTVALUE                                    ExtractValue should only be used on dxil struct types and cmpxchg.
+INSTR.FAILTORESLOVETGSMPOINTER                        TGSM pointers must originate from an unambiguous TGSM global variable.
+INSTR.HANDLENOTFROMCREATEHANDLE                       Resource handle should returned by createHandle.
+INSTR.ILLEGALDXILOPCODE                               DXILOpCode must be [0..%0].  %1 specified.
+INSTR.ILLEGALDXILOPFUNCTION                           '%0' is not a DXILOpFuncition for DXILOpcode '%1'.
+INSTR.IMMBIASFORSAMPLEB                               bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate.
+INSTR.INBOUNDSACCESS                                  Access to out-of-bounds memory is disallowed.
+INSTR.MAYREORDERTHREADUNDEFCOHERENCEHINTPARAM         Use of undef coherence hint or num coherence hint bits in MaybeReorderThread.
+INSTR.MINPRECISIONNOTPRECISE                          Instructions marked precise may not refer to minprecision values.
+INSTR.MINPRECISONBITCAST                              Bitcast on minprecison types is not allowed.
+INSTR.MIPLEVELFORGETDIMENSION                         Use mip level on buffer when GetDimensions.
+INSTR.MIPONUAVLOAD                                    uav load don't support mipLevel/sampleIndex.
+INSTR.MISSINGSETMESHOUTPUTCOUNTS                      Missing SetMeshOutputCounts call.
+INSTR.MULTIPLEGETMESHPAYLOAD                          GetMeshPayload cannot be called multiple times.
+INSTR.MULTIPLESETMESHOUTPUTCOUNTS                     SetMeshOUtputCounts cannot be called multiple times.
+INSTR.NODERECORDHANDLEUSEAFTERCOMPLETE                Invalid use of completed record handle.
+INSTR.NOGENERICPTRADDRSPACECAST                       Address space cast between pointer types must have one part to be generic address space.
+INSTR.NOIDIVBYZERO                                    No signed integer division by zero.
+INSTR.NOINDEFINITEACOS                                No indefinite arccosine.
+INSTR.NOINDEFINITEASIN                                No indefinite arcsine.
+INSTR.NOINDEFINITEDSXY                                No indefinite derivative calculation.
+INSTR.NOINDEFINITELOG                                 No indefinite logarithm.
+INSTR.NONDOMINATINGDISPATCHMESH                       Non-Dominating DispatchMesh call.
+INSTR.NONDOMINATINGSETMESHOUTPUTCOUNTS                Non-Dominating SetMeshOutputCounts call.
+INSTR.NOREADINGUNINITIALIZED                          Instructions should not read uninitialized value.
+INSTR.NOTONCEDISPATCHMESH                             DispatchMesh must be called exactly once in an Amplification shader.
+INSTR.NOUDIVBYZERO                                    No unsigned integer division by zero.
+INSTR.OFFSETONUAVLOAD                                 uav load don't support offset.
+INSTR.OLOAD                                           DXIL intrinsic overload must be valid.
+INSTR.ONLYONEALLOCCONSUME                             RWStructuredBuffers may increment or decrement their counters, but not both.
+INSTR.OPCODERESERVED                                  Instructions must not reference reserved opcodes.
+INSTR.OPCONST                                         DXIL intrinsic requires an immediate constant operand
+INSTR.OPCONSTRANGE                                    Constant values must be in-range for operation.
+INSTR.OPERANDRANGE                                    DXIL intrinsic operand must be within defined range
+INSTR.PTRBITCAST                                      Pointer type bitcast must be have same size.
+INSTR.RESOURCECLASSFORLOAD                            load can only run on UAV/SRV resource.
+INSTR.RESOURCECLASSFORSAMPLERGATHER                   sample, lod and gather should be on srv resource.
+INSTR.RESOURCECLASSFORUAVSTORE                        store should be on uav resource.
+INSTR.RESOURCECOORDINATEMISS                          coord uninitialized.
+INSTR.RESOURCECOORDINATETOOMANY                       out of bound coord must be undef.
+INSTR.RESOURCEKINDFORBUFFERLOADSTORE                  buffer load/store only works on Raw/Typed/StructuredBuffer.
+INSTR.RESOURCEKINDFORCALCLOD                          lod requires resource declared as texture1D/2D/3D/Cube/CubeArray/1DArray/2DArray.
+INSTR.RESOURCEKINDFORGATHER                           gather requires resource declared as texture/2D/Cube/2DArray/CubeArray.
+INSTR.RESOURCEKINDFORGETDIM                           Invalid resource kind on GetDimensions.
+INSTR.RESOURCEKINDFORSAMPLE                           sample/_l/_d requires resource declared as texture1D/2D/3D/Cube/1DArray/2DArray/CubeArray.
+INSTR.RESOURCEKINDFORSAMPLEC                          samplec requires resource declared as texture1D/2D/Cube/1DArray/2DArray/CubeArray.
+INSTR.RESOURCEKINDFORTEXTURELOAD                      texture load only works on Texture1D/1DArray/2D/2DArray/3D/MS2D/MS2DArray.
+INSTR.RESOURCEKINDFORTEXTURESTORE                     texture store only works on Texture1D/1DArray/2D/2DArray/3D.
+INSTR.RESOURCEKINDFORTRACERAY                         TraceRay should only use RTAccelerationStructure.
+INSTR.RESOURCEMAPTOSINGLEENTRY                        Fail to map resource to resource table.
+INSTR.RESOURCEOFFSETMISS                              offset uninitialized.
+INSTR.RESOURCEOFFSETTOOMANY                           out of bound offset must be undef.
+INSTR.RESOURCEUSER                                    Resource should only be used by Load/GEP/Call.
+INSTR.SAMPLECOMPTYPE                                  sample_* instructions require resource to be declared to return UNORM, SNORM or FLOAT.
+INSTR.SAMPLEINDEXFORLOAD2DMS                          load on Texture2DMS/2DMSArray require sampleIndex.
+INSTR.SAMPLERMODEFORLOD                               lod instruction requires sampler declared in default mode.
+INSTR.SAMPLERMODEFORSAMPLE                            sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode.
+INSTR.SAMPLERMODEFORSAMPLEC                           sample_c_*/gather_c instructions require sampler declared in comparison mode.
+INSTR.SIGNATUREOPERATIONNOTINENTRY                    Dxil operation for input output signature must be in entryPoints.
+INSTR.STATUS                                          Resource status should only be used by CheckAccessFullyMapped.
+INSTR.STRUCTBITCAST                                   Bitcast on struct types is not allowed.
+INSTR.SVCONFLICTINGLAUNCHMODE                         Input system values are compatible with node shader launch mode.
+INSTR.TEXTUREOFFSET                                   offset texture instructions must take offset which can resolve to integer literal in the range -8 to 7.
+INSTR.TGSMRACECOND                                    Race condition writing to shared memory detected, consider making this write conditional.
+INSTR.UNDEFHITOBJECT                                  HitObject is undef.
+INSTR.UNDEFINEDVALUEFORUAVSTORE                       Assignment of undefined values to UAV.
+INSTR.UNDEFRESULTFORGETDIMENSION                      GetDimensions used undef dimension %0 on %1.
+INSTR.WRITEMASKFORTYPEDUAVSTORE                       store on typed uav must write to all four components of the UAV.
+INSTR.WRITEMASKGAPFORUAV                              UAV write mask must be contiguous, starting at x: .x, .xy, .xyz, or .xyzw.
+INSTR.WRITEMASKMATCHVALUEFORUAVSTORE                  uav store write mask must match store value mask, write mask is %0 and store value mask is %1.
+META.BARYCENTRICSFLOAT3                               only 'float3' type is allowed for SV_Barycentrics.
+META.BARYCENTRICSINTERPOLATION                        SV_Barycentrics cannot be used with 'nointerpolation' type.
+META.BARYCENTRICSTWOPERSPECTIVES                      There can only be up to two input attributes of SV_Barycentrics with different perspective interpolation mode.
+META.BRANCHFLATTEN                                    Can't use branch and flatten attributes together.
+META.CLIPCULLMAXCOMPONENTS                            Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components
+META.CLIPCULLMAXROWS                                  Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows.
+META.COMPUTEWITHNODE                                  Compute entry must not have node metadata
+META.CONTROLFLOWHINTNOTONCONTROLFLOW                  Control flow hint only works on control flow inst.
+META.DENSERESIDS                                      Resource identifiers must be zero-based and dense.
+META.DUPLICATESYSVALUE                                System value may only appear once in signature
+META.ENTRYFUNCTION                                    entrypoint not found.
+META.FLAGSUSAGE                                       Flags must match usage.
+META.FORCECASEONSWITCH                                Attribute forcecase only works for switch.
+META.GLCNOTONAPPENDCONSUME                            globallycoherent cannot be used with append/consume buffers: '%0'.
+META.INTEGERINTERPMODE                                Interpolation mode on integer must be Constant
+META.INTERPMODEINONEROW                               Interpolation mode must be identical for all elements packed into the same row.
+META.INTERPMODEVALID                                  Interpolation mode must be valid
+META.INVALIDCONTROLFLOWHINT                           Invalid control flow hint.
+META.KNOWN                                            Named metadata should be known
+META.MAXTESSFACTOR                                    Hull Shader MaxTessFactor must be [%0..%1].  %2 specified.
+META.NOENTRYPROPSFORENTRY                             Entry point %0 must have entry properties.
+META.NOSEMANTICOVERLAP                                Semantics must not overlap
+META.REQUIRED                                         Required metadata missing.
+META.SEMAKINDMATCHESNAME                              Semantic name must match system value, when defined.
+META.SEMAKINDVALID                                    Semantic kind must be valid
+META.SEMANTICCOMPTYPE                                 %0 must be %1.
+META.SEMANTICINDEXMAX                                 System value semantics have a maximum valid semantic index
+META.SEMANTICLEN                                      Semantic length must be at least 1 and at most 64.
+META.SEMANTICSHOULDBEALLOCATED                        Semantic should have a valid packing location
+META.SEMANTICSHOULDNOTBEALLOCATED                     Semantic should have a packing location of -1
+META.SIGNATURECOMPTYPE                                signature %0 specifies unrecognized or invalid component type.
+META.SIGNATUREDATAWIDTH                               Data width must be identical for all elements packed into the same row.
+META.SIGNATUREILLEGALCOMPONENTORDER                   Component ordering for packed elements must be: arbitrary < system value < system generated value
+META.SIGNATUREINDEXCONFLICT                           Only elements with compatible indexing rules may be packed together
+META.SIGNATUREOUTOFRANGE                              Signature elements must fit within maximum signature size
+META.SIGNATUREOVERLAP                                 Signature elements may not overlap in packing location.
+META.STRUCTBUFALIGNMENT                               StructuredBuffer stride not aligned
+META.STRUCTBUFALIGNMENTOUTOFBOUND                     StructuredBuffer stride out of bounds
+META.SYSTEMVALUEROWS                                  System value may only have 1 row
+META.TARGET                                           Target triple must be 'dxil-ms-dx'
+META.TESSELLATOROUTPUTPRIMITIVE                       Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW.
+META.TESSELLATORPARTITION                             Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even.
+META.TEXTURETYPE                                      elements of typed buffers and textures must fit in four 32-bit quantities.
+META.USED                                             All metadata must be used by dxil.
+META.VALIDSAMPLERMODE                                 Invalid sampler mode on sampler .
+META.VALUERANGE                                       Metadata value must be within range.
+META.VERSIONSUPPORTED                                 Version in metadata must be supported.
+META.WELLFORMED                                       Metadata must be well-formed in operand count and types.
+SM.64BITRAWBUFFERLOADSTORE                            i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3.
+SM.AMPLIFICATIONSHADERPAYLOADSIZE                     For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.
+SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED             For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.
+SM.APPENDANDCONSUMEONSAMEUAV                          BufferUpdateCounter inc and dec on a given UAV (%d) cannot both be in the same shader for shader model less than 5.1.
+SM.CBUFFERARRAYOFFSETALIGNMENT                        CBuffer array offset must be aligned to 16-bytes
+SM.CBUFFERELEMENTOVERFLOW                             CBuffer elements must not overflow
+SM.CBUFFEROFFSETOVERLAP                               CBuffer offsets must not overlap
+SM.CBUFFERSIZE                                        CBuffer size must not exceed 65536 bytes
+SM.CBUFFERTEMPLATETYPEMUSTBESTRUCT                    D3D12 constant/texture buffer template element can only be a struct.
+SM.COMPLETEPOSITION                                   Not all elements of SV_Position were written.
+SM.CONSTANTINTERPMODE                                 Interpolation mode must be constant for MS primitive output.
+SM.COUNTERONLYONSTRUCTBUF                             BufferUpdateCounter valid only on structured buffers.
+SM.CSNOSIGNATURES                                     Compute shaders must not have shader signatures.
+SM.DOMAINLOCATIONIDXOOB                               DomainLocation component index out of bounds for the domain.
+SM.DSINPUTCONTROLPOINTCOUNTRANGE                      DS input control point count must be [0..%0].  %1 specified.
+SM.DXILVERSION                                        Target shader model requires specific Dxil Version
+SM.GSINSTANCECOUNTRANGE                               GS instance count must be [1..%0].  %1 specified.
+SM.GSOUTPUTVERTEXCOUNTRANGE                           GS output vertex count must be [0..%0].  %1 specified.
+SM.GSTOTALOUTPUTVERTEXDATARANGE                       Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2.  This value cannot be greater than %3.
+SM.GSVALIDINPUTPRIMITIVE                              GS input primitive unrecognized.
+SM.GSVALIDOUTPUTPRIMITIVETOPOLOGY                     GS output primitive topology unrecognized.
+SM.HSINPUTCONTROLPOINTCOUNTRANGE                      HS input control point count must be [0..%0].  %1 specified.
+SM.HULLPASSTHRUCONTROLPOINTCOUNTMATCH                 For pass thru hull shader, input control point count must match output control point count
+SM.INCOMPATIBLECALLINENTRY                            Features used in internal function calls must be compatible with entry
+SM.INCOMPATIBLEDERIVINCOMPUTESHADERMODEL              Derivatives in compute-model shaders require shader model 6.6 and above
+SM.INCOMPATIBLEDERIVLAUNCH                            Node shaders only support derivatives in broadcasting launch mode
+SM.INCOMPATIBLEOPERATION                              Operations used in entry function must be compatible with shader stage and other properties
+SM.INCOMPATIBLEREQUIRESGROUP                          Functions requiring groupshared memory must be called from shaders with a visible group
+SM.INCOMPATIBLESHADERMODEL                            Functions may only use features available in the current shader model
+SM.INCOMPATIBLESTAGE                                  Functions may only use features available in the entry function's stage
+SM.INCOMPATIBLETHREADGROUPDIM                         When derivatives are used in compute-model shaders, the thread group dimensions must be compatible
+SM.INSIDETESSFACTORSIZEMATCHDOMAIN                    InsideTessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
+SM.INVALIDRESOURCECOMPTYPE                            Invalid resource return type.
+SM.INVALIDRESOURCEKIND                                Invalid resources kind.
+SM.INVALIDSAMPLERFEEDBACKTYPE                         Invalid sampler feedback type.
+SM.INVALIDTEXTUREKINDONUAV                            TextureCube[Array] resources are not supported with UAVs.
+SM.ISOLINEOUTPUTPRIMITIVEMISMATCH                     Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain.
+SM.MAXMSSMSIZE                                        Total Thread Group Shared Memory storage is %0, exceeded %1.
+SM.MAXTGSMSIZE                                        Total Thread Group Shared Memory storage is %0, exceeded %1.
+SM.MAXTHEADGROUP                                      Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1.
+SM.MESHPSIGROWCOUNT                                   For shader '%0', primitive output signatures are taking up more than %1 rows.
+SM.MESHSHADERINOUTSIZE                                For shader '%0', payload plus output size is greater than %1.
+SM.MESHSHADERMAXPRIMITIVECOUNT                        MS max primitive output count must be [0..%0].  %1 specified.
+SM.MESHSHADERMAXVERTEXCOUNT                           MS max vertex output count must be [0..%0].  %1 specified.
+SM.MESHSHADEROUTPUTSIZE                               For shader '%0', vertex plus primitive output size is greater than %1.
+SM.MESHSHADERPAYLOADSIZE                              For mesh shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.
+SM.MESHSHADERPAYLOADSIZEDECLARED                      For mesh shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.
+SM.MESHTOTALSIGROWCOUNT                               For shader '%0', vertex and primitive output signatures are taking up more than %1 rows.
+SM.MESHVSIGROWCOUNT                                   For shader '%0', vertex output signatures are taking up more than %1 rows.
+SM.MULTISTREAMMUSTBEPOINT                             When multiple GS output streams are used they must be pointlists
+SM.NAME                                               Target shader model name must be known
+SM.NOINTERPMODE                                       Interpolation mode must be undefined for VS input/PS output/patch constant.
+SM.NOPSOUTPUTIDX                                      Pixel shader output registers are not indexable.
+SM.OPCODE                                             Opcode must be defined in target shader model
+SM.OPCODEININVALIDFUNCTION                            Invalid DXIL opcode usage like StorePatchConstant in patch constant function
+SM.OPERAND                                            Operand must be defined in target shader model.
+SM.OUTPUTCONTROLPOINTCOUNTRANGE                       output control point count must be [%0..%1].  %2 specified.
+SM.OUTPUTCONTROLPOINTSTOTALSCALARS                    Total number of scalars across all HS output control points must not exceed .
+SM.PATCHCONSTANTONLYFORHSDS                           patch constant signature only valid in HS and DS.
+SM.PROGRAMVERSION                                     Program Version in Dxil Container does not match Dxil Module shader model version
+SM.PSCONSISTENTINTERP                                 Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample).
+SM.PSCOVERAGEANDINNERCOVERAGE                         InnerCoverage and Coverage are mutually exclusive.
+SM.PSMULTIPLEDEPTHSEMANTIC                            Pixel Shader only allows one type of depth semantic to be declared.
+SM.PSOUTPUTSEMANTIC                                   Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found.
+SM.PSTARGETCOL0                                       SV_Target packed location must start at column 0.
+SM.PSTARGETINDEXMATCHESROW                            SV_Target semantic index must match packed row location.
+SM.RAYSHADERPAYLOADSIZE                               For shader '%0', %1 size is smaller than argument's allocation size.
+SM.RAYSHADERSIGNATURES                                Ray tracing shader '%0' should not have any shader signatures.
+SM.RESOURCERANGEOVERLAP                               Resource ranges must not overlap
+SM.ROVONLYINPS                                        RasterizerOrdered objects are only allowed in 5.0+ pixel shaders.
+SM.SAMPLECOUNTONLYON2DMS                              Only Texture2DMS/2DMSArray could has sample count.
+SM.SEMANTIC                                           Semantic must be defined in target shader model
+SM.STREAMINDEXRANGE                                   Stream index (%0) must between 0 and %1.
+SM.TESSFACTORFORDOMAIN                                Required TessFactor for domain not found declared anywhere in Patch Constant data.
+SM.TESSFACTORSIZEMATCHDOMAIN                          TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
+SM.TGSMUNSUPPORTED                                    Thread Group Shared Memory not supported %0.
+SM.THREADGROUPCHANNELRANGE                            Declared Thread Group %0 size %1 outside valid range [%2..%3].
+SM.TRIOUTPUTPRIMITIVEMISMATCH                         Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain.
+SM.UNDEFINEDOUTPUT                                    Not all elements of output %0 were written.
+SM.VALIDDOMAIN                                        Invalid Tessellator Domain specified. Must be isoline, tri or quad.
+SM.VIEWIDNEEDSSLOT                                    ViewID requires compatible space in pixel shader input signature
+SM.WAVESIZEALLZEROWHENUNDEFINED                       WaveSize Max and Preferred must be 0 when Min is 0
+SM.WAVESIZEEXPECTSONEPARAM                            WaveSize tag expects exactly 1 parameter.
+SM.WAVESIZEMAXANDPREFERREDZEROWHENNORANGE             WaveSize Max and Preferred must be 0 to encode min==max
+SM.WAVESIZEMAXGREATERTHANMIN                          WaveSize Max must greater than Min
+SM.WAVESIZENEEDSCONSTANTOPERANDS                      WaveSize metadata operands must be constant values.
+SM.WAVESIZENEEDSSM66OR67                              WaveSize is valid only for Shader Model 6.6 and 6.7.
+SM.WAVESIZEONCOMPUTEORNODE                            WaveSize only allowed on compute or node shaders
+SM.WAVESIZEPREFERREDINRANGE                           WaveSize Preferred must be within Min..Max range
+SM.WAVESIZERANGEEXPECTSTHREEPARAMS                    WaveSize Range tag expects exactly 3 parameters.
+SM.WAVESIZERANGENEEDSSM68PLUS                         WaveSize Range is valid only for Shader Model 6.8 and higher.
+SM.WAVESIZETAGDUPLICATE                               WaveSize or WaveSizeRange tag may only appear once per entry point.
+SM.WAVESIZEVALUE                                      WaveSize value must be a power of 2 in range [4..128]
+SM.ZEROHSINPUTCONTROLPOINTWITHINPUT                   When HS input control point count is 0, no input signature should exist.
+TYPES.DEFINED                                         Type must be defined based on DXIL primitives
+TYPES.I8                                              I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics.
+TYPES.INTWIDTH                                        Int type must be of valid width
+TYPES.NOMULTIDIM                                      Only one dimension allowed for array type.
+TYPES.NOPTRTOPTR                                      Pointers to pointers, or pointers in structures are not allowed.
+TYPES.NOVECTOR                                        Vector types must not be present
+===================================================== ========================================================================================================================================================================================================================================================================================================
 
 .. VALRULES-RST:END
 

From 10bff1319a28e8ad2aa0c5aa894ba8ec4c3a2e2b Mon Sep 17 00:00:00 2001
From: Simon Moll <smoll@nvidia.com>
Date: Wed, 16 Apr 2025 05:35:25 +0200
Subject: [PATCH 86/88] Fix field names in long vector DICompositeType (#7332)

Fix OOB accesses for debug info vector field names for vectors of length >4.
---
 tools/clang/lib/CodeGen/CGDebugInfo.cpp       | 11 ++++++-
 .../hlsl/types/longvec-field-di.hlsl          | 33 +++++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/types/longvec-field-di.hlsl

diff --git a/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/tools/clang/lib/CodeGen/CGDebugInfo.cpp
index 206f7d9523..d947887d62 100644
--- a/tools/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/tools/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -1047,8 +1047,17 @@ bool CGDebugInfo::TryCollectHLSLRecordElements(const RecordType *Ty,
     unsigned VecSize = hlsl::GetHLSLVecSize(QualTy);
     unsigned ElemSizeInBits = CGM.getContext().getTypeSize(ElemQualTy);
     unsigned CurrentAlignedOffset = 0;
+    SmallString<8> FieldNameBuf;
     for (unsigned ElemIdx = 0; ElemIdx < VecSize; ++ElemIdx) {
-      StringRef FieldName = StringRef(&"xyzw"[ElemIdx], 1);
+      StringRef FieldName;
+      if (VecSize <= 4) {
+        FieldName = StringRef(&"xyzw"[ElemIdx], 1);
+      } else {
+        FieldNameBuf.clear();
+        llvm::raw_svector_ostream OS(FieldNameBuf);
+        OS << 'c' << ElemIdx;
+        FieldName = OS.str();
+      }
       CurrentAlignedOffset =
           llvm::RoundUpToAlignment(CurrentAlignedOffset, AlignBits);
       llvm::DIType *FieldType =
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-field-di.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-field-di.hlsl
new file mode 100644
index 0000000000..935ec3cc13
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-field-di.hlsl
@@ -0,0 +1,33 @@
+// RUN: %dxc -Zi -Qembed_debug -T lib_6_9 %s -DNUM=8 | FileCheck %s  --check-prefix=CHECK-LONG
+// RUN: %dxc -Zi -Qembed_debug -T lib_6_9 %s -DNUM=4 | FileCheck %s  --check-prefix=CHECK-SHORT
+
+// Test debug info for short and long vector types
+
+RWByteAddressBuffer buf;
+
+export vector<float, NUM> lv_global_arr_ret() {
+  vector<float, NUM> d = buf.Load<vector<float, NUM> >(0);
+  return d;
+}
+
+// CHECK-LONG:  ![[TYDI:[^ ]+]] = !DICompositeType(tag: DW_TAG_class_type, name: "vector<float, 8>", file: !{{[^ ]+}}, size: 256, align: 32, elements: ![[ELEMDI:[^ ]+]],
+// CHECK-LONG:  ![[ELEMDI]] = !{![[C0:[^ ]+]], ![[C1:[^ ]+]], ![[C2:[^ ]+]], ![[C3:[^ ]+]], ![[C4:[^ ]+]], ![[C5:[^ ]+]], ![[C6:[^ ]+]], ![[C7:[^ ]+]]}
+// CHECK-LONG:  ![[C0]] = !DIDerivedType(tag: DW_TAG_member, name: "c0", scope: !{{[^ ]+}} file: !{{[^ ]+}}, baseType: ![[BASETY:[^ ]+]], size: 32, align: 32, flags: DIFlagPublic)
+// CHECK-LONG:  ![[BASETY]] = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+// CHECK-LONG:  ![[C1]] = !DIDerivedType(tag: DW_TAG_member, name: "c1", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 32, flags: DIFlagPublic)
+// CHECK-LONG:  ![[C2]] = !DIDerivedType(tag: DW_TAG_member, name: "c2", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 64, flags: DIFlagPublic)
+// CHECK-LONG:  ![[C3]] = !DIDerivedType(tag: DW_TAG_member, name: "c3", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 96, flags: DIFlagPublic)
+// CHECK-LONG:  ![[C4]] = !DIDerivedType(tag: DW_TAG_member, name: "c4", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 128, flags: DIFlagPublic)
+// CHECK-LONG:  ![[C5]] = !DIDerivedType(tag: DW_TAG_member, name: "c5", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 160, flags: DIFlagPublic)
+// CHECK-LONG:  ![[C6]] = !DIDerivedType(tag: DW_TAG_member, name: "c6", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 192, flags: DIFlagPublic)
+// CHECK-LONG:  ![[C7]] = !DIDerivedType(tag: DW_TAG_member, name: "c7", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 224, flags: DIFlagPublic)
+// CHECK-LONG:  !{{[^ ]+}} = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, line: 9, type: ![[TYDI]])
+
+// CHECK-SHORT:  ![[TYDI:[^ ]+]] = !DICompositeType(tag: DW_TAG_class_type, name: "vector<float, 4>", file: !{{[^ ]+}}, size: 128, align: 32, elements: ![[ELEMDI:[^ ]+]],
+// CHECK-SHORT:  ![[ELEMDI]] = !{![[X:[^ ]+]], ![[Y:[^ ]+]], ![[Z:[^ ]+]], ![[W:[^ ]+]]}
+// CHECK-SHORT:  ![[X]] = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY:[^ ]+]], size: 32, align: 32, flags: DIFlagPublic)
+// CHECK-SHORT:  ![[BASETY]] = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+// CHECK-SHORT:  ![[Y]] = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 32, flags: DIFlagPublic)
+// CHECK-SHORT:  ![[Z]] = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 64, flags: DIFlagPublic)
+// CHECK-SHORT:  ![[W]] = !DIDerivedType(tag: DW_TAG_member, name: "w", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 96, flags: DIFlagPublic)
+// CHECK-SHORT:  !{{[^ ]+}} = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, line: 9, type: ![[TYDI]])
\ No newline at end of file

From 0a470b51535265b759c0c3a3078fa8f97fd5eb12 Mon Sep 17 00:00:00 2001
From: Steve Urquhart <53908460+SteveUrquhart@users.noreply.github.com>
Date: Wed, 16 Apr 2025 11:00:58 -0400
Subject: [PATCH 87/88] [SPIRV] Remove patch decoration from gl_TessCoord
 (#7187) (#7349)

This PR fixes
https://github.com/microsoft/DirectXShaderCompiler/issues/7187.
gl_TessCoord is not a per-patch builtin and therefore the SPIRV should
not be decorated with Patch. This is clear in the GLSL specification,
and a SPIRV spec clarification is online here:
https://gitlab.khronos.org/spirv/SPIR-V/-/issues/819
---
 tools/clang/lib/SPIRV/DeclResultIdMapper.cpp                   | 3 ++-
 tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv           | 1 -
 tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl | 1 -
 tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl          | 1 -
 4 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
index 0358873589..de73d5e417 100644
--- a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
+++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
@@ -3522,7 +3522,8 @@ SpirvVariable *DeclResultIdMapper::createSpirvInterfaceVariable(
       // Decorate with PerPrimitiveNV for per-primitive out variables.
       spvBuilder.decoratePerPrimitiveNV(varInstr,
                                         varInstr->getSourceLocation());
-    } else {
+    } else if (stageVar.getSemanticInfo().getKind() !=
+               hlsl::Semantic::Kind::DomainLocation) {
       spvBuilder.decoratePatch(varInstr, varInstr->getSourceLocation());
     }
   }
diff --git a/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv b/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv
index 3b0c060a0d..9d915a84f2 100644
--- a/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv
+++ b/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv
@@ -96,7 +96,6 @@ DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input,
 // CHECK-NEXT:                OpDecorate %in_var_TANVCORNER Patch
 // CHECK-NEXT:                OpDecorate %in_var_TANWEIGHTS Patch
 // CHECK-NEXT:                OpDecorate %gl_TessCoord BuiltIn TessCoord
-// CHECK-NEXT:                OpDecorate %gl_TessCoord Patch
 // CHECK-NEXT:                OpDecorate %gl_Position BuiltIn Position
 // CHECK-NEXT:                OpDecorate %in_var_BEZIERPOS Location 0
 // CHECK-NEXT:                OpDecorate %in_var_TANGENT Location 1
diff --git a/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl b/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl
index 5e4049f8c3..391e09a428 100644
--- a/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl
@@ -4,7 +4,6 @@
 // CHECK-SAME: %gl_TessCoord
 
 // CHECK: OpDecorate %gl_TessCoord BuiltIn TessCoord
-// CHECK: OpDecorate %gl_TessCoord Patch
 
 // CHECK: %gl_TessCoord = OpVariable %_ptr_Input_v3float Input
 
diff --git a/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl b/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl
index a8fe81e021..6f073aeb46 100644
--- a/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl
@@ -85,7 +85,6 @@ struct DsOut {
 // CHECK: OpDecorateString %gl_PointSize UserSemantic "PSIZE"
 // CHECK: OpDecorate %gl_TessCoord BuiltIn TessCoord
 // CHECK: OpDecorateString %gl_TessCoord UserSemantic "SV_DomainLocation"
-// CHECK: OpDecorate %gl_TessCoord Patch
 // CHECK: OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
 // CHECK: OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor"
 // CHECK: OpDecorate %gl_TessLevelOuter Patch

From 0beaa767f7712f0ed0ab72e1e17cb94c25f84c34 Mon Sep 17 00:00:00 2001
From: Simon Moll <smoll@nvidia.com>
Date: Thu, 17 Apr 2025 18:20:49 +0200
Subject: [PATCH 88/88] [SER] MaybeReorderThread + Make(Nop|Miss) HLSL -> DXIL
 lowering and tests (#7262)

- HLSL -> DXIL lowering
- ast, hlsl->dxil, dxilgen, and ScalarReplAggregatesHLSL tests

SER implementation tracker (#7214)
---
 include/dxc/HLSL/HLOperations.h               |   4 +
 lib/HLSL/HLOperationLower.cpp                 | 116 ++++++++++++--
 .../Scalar/ScalarReplAggregatesHLSL.cpp       |   8 +
 tools/clang/lib/Sema/SemaHLSL.cpp             |   1 +
 .../hlsl/intrinsics/maybereorder.hlsl         |  37 +++++
 .../objects/HitObject/hitobject_make.hlsl     |  75 +++++++++
 .../hlsl/objects/HitObject/lit.local.cfg      |   1 +
 .../DXC/Passes/DxilGen/hitobject_dxilgen.ll   |   5 +-
 .../Passes/DxilGen/maybereorder_dxilgen.ll    |   3 +-
 .../hitobject_make_scalarrepl.ll              | 142 ++++++++++++++++++
 .../objects/HitObject/hitobject_make.hlsl     |  12 --
 .../objects/HitObject/hitobject_make_ast.hlsl |  24 ---
 .../hlsl/objects/HitObject/maybereorder.hlsl  |  13 --
 .../objects/HitObject/maybereorder_ast.hlsl   |  28 ----
 14 files changed, 376 insertions(+), 93 deletions(-)
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl
 create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/lit.local.cfg
 create mode 100644 tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll
 delete mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl
 delete mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl
 delete mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl
 delete mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl

diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h
index f87d324baf..a7db8612a6 100644
--- a/include/dxc/HLSL/HLOperations.h
+++ b/include/dxc/HLSL/HLOperations.h
@@ -433,6 +433,10 @@ const unsigned kNodeHandleToResCastOpIdx = 1;
 const unsigned kAnnotateNodeHandleNodePropIdx = 2;
 const unsigned kAnnotateNodeRecordHandleNodeRecordPropIdx = 2;
 
+// HitObject::MakeMiss
+const unsigned kHitObjectMakeMiss_NumOp = 8;
+const unsigned kHitObjectMakeMissRayDescOpIdx = 4;
+
 } // namespace HLOperandIndex
 
 llvm::Function *GetOrCreateHLFunction(llvm::Module &M,
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index b5114fa34b..be45021e41 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -12,6 +12,7 @@
 //                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
 
+#include "dxc/DXIL/DxilConstants.h"
 #define _USE_MATH_DEFINES
 #include <array>
 #include <cmath>
@@ -6183,19 +6184,114 @@ Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 
 // Shader Execution Reordering.
 namespace {
-Value *TranslateHitObjectMake(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
-                              HLOperationLowerHelper &helper,
-                              HLObjectOperationLowerHelper *pObjHelper,
+Value *TranslateHitObjectMake(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode,
+                              HLOperationLowerHelper &Helper,
+                              HLObjectOperationLowerHelper *ObjHelper,
                               bool &Translated) {
-  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+  hlsl::OP *HlslOP = &Helper.hlslOP;
+  IRBuilder<> Builder(CI);
+  unsigned SrcIdx = 1;
+  Value *HitObjectPtr = CI->getArgOperand(SrcIdx++);
+  if (Opcode == OP::OpCode::HitObject_MakeNop) {
+    Value *HitObject = TrivialDxilOperation(
+        Opcode, {nullptr}, Type::getVoidTy(CI->getContext()), CI, HlslOP);
+    Builder.CreateStore(HitObject, HitObjectPtr);
+    DXASSERT(
+        CI->use_empty(),
+        "Default ctor return type is a Clang artifact. Value must not be used");
+    return nullptr;
+  }
+
+  DXASSERT_NOMSG(CI->getNumArgOperands() ==
+                 HLOperandIndex::kHitObjectMakeMiss_NumOp);
+  Value *RayFlags = CI->getArgOperand(SrcIdx++);
+  Value *MissShaderIdx = CI->getArgOperand(SrcIdx++);
+  DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectMakeMissRayDescOpIdx);
+  Value *RayDescOrigin = CI->getArgOperand(SrcIdx++);
+  Value *RayDescOriginX =
+      Builder.CreateExtractElement(RayDescOrigin, (uint64_t)0);
+  Value *RayDescOriginY =
+      Builder.CreateExtractElement(RayDescOrigin, (uint64_t)1);
+  Value *RayDescOriginZ =
+      Builder.CreateExtractElement(RayDescOrigin, (uint64_t)2);
+
+  Value *RayDescTMin = CI->getArgOperand(SrcIdx++);
+  Value *RayDescDirection = CI->getArgOperand(SrcIdx++);
+  Value *RayDescDirectionX =
+      Builder.CreateExtractElement(RayDescDirection, (uint64_t)0);
+  Value *RayDescDirectionY =
+      Builder.CreateExtractElement(RayDescDirection, (uint64_t)1);
+  Value *RayDescDirectionZ =
+      Builder.CreateExtractElement(RayDescDirection, (uint64_t)2);
+
+  Value *RayDescTMax = CI->getArgOperand(SrcIdx++);
+  DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands());
+
+  Value *OutHitObject = TrivialDxilOperation(
+      Opcode,
+      {nullptr, RayFlags, MissShaderIdx, RayDescOriginX, RayDescOriginY,
+       RayDescOriginZ, RayDescTMin, RayDescDirectionX, RayDescDirectionY,
+       RayDescDirectionZ, RayDescTMax},
+      Helper.voidTy, CI, HlslOP);
+  Builder.CreateStore(OutHitObject, HitObjectPtr);
+  return nullptr;
 }
 
 Value *TranslateMaybeReorderThread(CallInst *CI, IntrinsicOp IOP,
-                                   OP::OpCode opcode,
-                                   HLOperationLowerHelper &helper,
+                                   OP::OpCode OpCode,
+                                   HLOperationLowerHelper &Helper,
                                    HLObjectOperationLowerHelper *pObjHelper,
                                    bool &Translated) {
-  return nullptr; // TODO: Merge SER DXIL patches
+  hlsl::OP *OP = &Helper.hlslOP;
+
+  // clang-format off
+  // Match MaybeReorderThread overload variants:
+  // void MaybeReorderThread(<Op>,
+  //                    HitObject Hit);
+  // void MaybeReorderThread(<Op>,
+  //                    uint CoherenceHint,
+  //                    uint NumCoherenceHintBitsFromLSB );
+  // void MaybeReorderThread(<Op>,
+  //                    HitObject Hit,
+  //                    uint CoherenceHint,
+  //                    uint NumCoherenceHintBitsFromLSB);
+  // clang-format on
+  const unsigned NumHLArgs = CI->getNumArgOperands();
+  DXASSERT_NOMSG(NumHLArgs >= 2);
+
+  // Use a NOP HitObject for MaybeReorderThread without HitObject.
+  Value *HitObject = nullptr;
+  unsigned HLIndex = 1;
+  if (3 == NumHLArgs) {
+    HitObject = TrivialDxilOperation(DXIL::OpCode::HitObject_MakeNop, {nullptr},
+                                     Type::getVoidTy(CI->getContext()), CI, OP);
+  } else {
+    Value *FirstParam = CI->getArgOperand(HLIndex);
+    DXASSERT_NOMSG(isa<PointerType>(FirstParam->getType()));
+    IRBuilder<> Builder(CI);
+    HitObject = Builder.CreateLoad(FirstParam);
+    HLIndex++;
+  }
+
+  // If there are trailing parameters, these have to be the two coherence bit
+  // parameters
+  Value *CoherenceHint = nullptr;
+  Value *NumCoherenceHintBits = nullptr;
+  if (2 != NumHLArgs) {
+    DXASSERT_NOMSG(HLIndex + 2 == NumHLArgs);
+    CoherenceHint = CI->getArgOperand(HLIndex++);
+    NumCoherenceHintBits = CI->getArgOperand(HLIndex++);
+    DXASSERT_NOMSG(Helper.i32Ty == CoherenceHint->getType());
+    DXASSERT_NOMSG(Helper.i32Ty == NumCoherenceHintBits->getType());
+  } else {
+    CoherenceHint = UndefValue::get(Helper.i32Ty);
+    NumCoherenceHintBits = OP->GetU32Const(0);
+  }
+
+  TrivialDxilOperation(
+      OpCode, {nullptr, HitObject, CoherenceHint, NumCoherenceHintBits},
+      Type::getVoidTy(CI->getContext()), CI, OP);
+  return nullptr;
 }
 
 Value *TranslateHitObjectFromRayQuery(CallInst *CI, IntrinsicOp IOP,
@@ -6968,11 +7064,9 @@ IntrinsicLower gLowerTable[] = {
     {IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation,
      DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMake,
-     DXIL::OpCode::NumOpCodes_Dxil_1_8}, // FIXME: Just a placeholder Dxil
-                                         // opcode
+     DXIL::OpCode::HitObject_MakeNop},
     {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread,
-     DXIL::OpCode::NumOpCodes_Dxil_1_8}, // FIXME: Just a placeholder Dxil
-                                         // opcode
+     DXIL::OpCode::MaybeReorderThread},
     {IntrinsicOp::IOP_Vkstatic_pointer_cast, UnsupportedVulkanIntrinsic,
      DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_Vkreinterpret_pointer_cast, UnsupportedVulkanIntrinsic,
diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
index ec17fce9c8..e487079b94 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
@@ -2775,6 +2775,14 @@ void SROA_Helper::RewriteCall(CallInst *CI) {
         RewriteCallArg(CI, HLOperandIndex::kCallShaderPayloadOpIdx,
                        /*bIn*/ true, /*bOut*/ true);
       } break;
+      case IntrinsicOp::MOP_DxHitObject_MakeMiss: {
+        if (OldVal ==
+            CI->getArgOperand(HLOperandIndex::kHitObjectMakeMissRayDescOpIdx)) {
+          RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts,
+                                              /*loadElts*/ true);
+          DeadInsts.push_back(CI);
+        }
+      } break;
       case IntrinsicOp::MOP_TraceRayInline: {
         if (OldVal ==
             CI->getArgOperand(HLOperandIndex::kTraceRayInlineRayDescOpIdx)) {
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index 230c7e65d9..418425a468 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -12066,6 +12066,7 @@ void Sema::DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM,
   case hlsl::IntrinsicOp::MOP_TraceRayInline:
     DiagnoseTraceRayInline(*this, CE);
     break;
+  case hlsl::IntrinsicOp::MOP_DxHitObject_MakeMiss:
   case hlsl::IntrinsicOp::MOP_DxHitObject_MakeNop:
     DiagnoseReachableSERCall(*this, CE, EntrySK, EntryDecl, false);
     break;
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder.hlsl
new file mode 100644
index 0000000000..08836dfbaf
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder.hlsl
@@ -0,0 +1,37 @@
+// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL
+// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL
+// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST
+
+// AST: |-FunctionDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit used MaybeReorderThread 'void (dx::HitObject)' extern
+// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> HitObject 'dx::HitObject':'dx::HitObject'
+// AST-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 359
+// AST-NEXT: | `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+// AST: |-FunctionDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit used MaybeReorderThread 'void (dx::HitObject, unsigned int, unsigned int)' extern
+// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> HitObject 'dx::HitObject':'dx::HitObject'
+// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> CoherenceHint 'unsigned int'
+// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> NumCoherenceHintBitsFromLSB 'unsigned int'
+// AST-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 359
+// AST-NEXT: | `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+// AST: `-FunctionDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit used MaybeReorderThread 'void (unsigned int, unsigned int)' extern
+// AST-NEXT:   |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> CoherenceHint 'unsigned int'
+// AST-NEXT:   |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> NumCoherenceHintBitsFromLSB 'unsigned int'
+// AST-NEXT:   |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 359
+// AST-NEXT:   `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %[[NOP:[^ ]+]])
+// FCGL-NEXT: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32)"(i32 359, %dx.types.HitObject* %[[NOP]], i32 241, i32 3)
+// FCGL-NEXT: call void @"dx.hl.op..void (i32, i32, i32)"(i32 359, i32 242, i32 7)
+
+// DXIL:  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[NOP:[^ ]+]], i32 undef, i32 0)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+// DXIL-NEXT:  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[NOP]], i32 241, i32 3)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+// DXIL-NEXT:  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[NOP]], i32 242, i32 7)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+
+[shader("raygeneration")]
+void main() {
+  dx::HitObject hit;
+  dx::MaybeReorderThread(hit);
+  dx::MaybeReorderThread(hit, 0xf1, 3);
+  dx::MaybeReorderThread(0xf2, 7);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl
new file mode 100644
index 0000000000..1e947b2296
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl
@@ -0,0 +1,75 @@
+// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL
+// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL
+// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST
+
+// AST: | |-CXXRecordDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit referenced class HitObject definition
+// AST-NEXT: | | |-FinalAttr {{[^ ]+}} <<invalid sloc>> Implicit final
+// AST-NEXT: | | |-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+// AST-NEXT: | | |-HLSLHitObjectAttr {{[^ ]+}} <<invalid sloc>> Implicit
+// AST-NEXT: | | |-FieldDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit h 'int'
+// AST-NEXT: | | |-CXXConstructorDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> used HitObject 'void ()'
+// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 358
+// AST-NEXT: | | | `-HLSLCXXOverloadAttr {{[^ ]+}} <<invalid sloc>> Implicit
+
+// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> MakeMiss
+// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> class TResult
+// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> class TRayFlags
+// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> class TMissShaderIndex
+// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> class TRay
+// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit MakeMiss 'TResult (TRayFlags, TMissShaderIndex, TRay) const' static
+// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> RayFlags 'TRayFlags'
+// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> MissShaderIndex 'TMissShaderIndex'
+// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> Ray 'TRay'
+// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> used MakeMiss 'dx::HitObject (unsigned int, unsigned int, RayDesc)' static
+// AST-NEXT: | | |   |-TemplateArgument type 'dx::HitObject'
+// AST-NEXT: | | |   |-TemplateArgument type 'unsigned int'
+// AST-NEXT: | | |   |-TemplateArgument type 'unsigned int'
+// AST-NEXT: | | |   |-TemplateArgument type 'RayDesc'
+// AST-NEXT: | | |   |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> MakeMiss 'unsigned int'
+// AST-NEXT: | | |   |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> RayFlags 'unsigned int'
+// AST-NEXT: | | |   |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> MissShaderIndex 'RayDesc'
+// AST-NEXT: | | |   |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 387
+// AST-NEXT: | | |   `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> MakeNop
+// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> class TResult
+// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit MakeNop 'TResult () const' static
+// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> used MakeNop 'dx::HitObject ()' static
+// AST-NEXT: | | |   |-TemplateArgument type 'dx::HitObject'
+// AST-NEXT: | | |   |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 358
+// AST-NEXT: | | |   `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+// FCGL: %{{[^ ]+}} = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %{{[^ ]+}})
+// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %{{[^ ]+}})
+// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32 387, %dx.types.HitObject* %{{[^ ]+}}, i32 0, i32 1, %struct.RayDesc* %{{[^ ]+}})
+// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32 387, %dx.types.HitObject* %{{[^ ]+}}, i32 0, i32 2, %struct.RayDesc* %{{[^ ]+}})
+
+// Expect HitObject_Make* calls with identical parameters to be folded.
+// DXIL:  {{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)  ; HitObject_MakeNop()
+// DXIL-NOT:  {{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop
+// DXIL:  %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 0, i32 1, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0x3FA99999A0000000, float 1.000000e+03)  ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax)
+// DXIL-NOT:  %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 0, i32 1
+// DXIL:  %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 0, i32 2, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0x3FA99999A0000000, float 1.000000e+03)  ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax)
+
+void Use(in dx::HitObject hit) {
+  dx::MaybeReorderThread(hit);
+}
+
+[shader("raygeneration")]
+void main() {
+  dx::HitObject nop;
+  Use(nop);
+
+  dx::HitObject nop2 = dx::HitObject::MakeNop();
+  Use(nop2);
+
+  RayDesc ray = {{0,0,0}, {0,0,1}, 0.05, 1000.0};
+  dx::HitObject miss = dx::HitObject::MakeMiss(0, 1, ray);
+  Use(miss);
+
+  dx::HitObject miss2 = dx::HitObject::MakeMiss(0, 1, ray);
+  Use(miss2);
+
+  dx::HitObject miss3 = dx::HitObject::MakeMiss(0, 2, ray);
+  Use(miss3);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/lit.local.cfg b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/lit.local.cfg
new file mode 100644
index 0000000000..ba86568f9a
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/lit.local.cfg
@@ -0,0 +1 @@
+config.unsupported = 'dxil-1-9' not in config.available_features
diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll
index 01dafe5e86..17a968675f 100644
--- a/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll
+++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll
@@ -1,9 +1,6 @@
 ; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
 ; REQUIRES: dxil-1-9
 
-; CHECK-NOT:  @dx.op.hitObject_
-; CHECK-NOT:  @dx.op.maybeReorderThread
-
 ;
 ; Buffer Definitions:
 ;
@@ -37,9 +34,11 @@ entry:
   %tmp = alloca %dx.types.HitObject, align 4
   %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !19 ; line:9 col:3
   call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !19 ; line:9 col:3
+; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)
   %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !23 ; line:9 col:17
   %2 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !24 ; line:10 col:3
   call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !24 ; line:10 col:3
+; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)
   call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %tmp), !dbg !24 ; line:10 col:3
   %3 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !24 ; line:10 col:3
   call void @llvm.lifetime.end(i64 4, i8* %3) #0, !dbg !24 ; line:10 col:3
diff --git a/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll
index f5130bca3f..ca25b1e115 100644
--- a/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll
+++ b/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll
@@ -1,8 +1,6 @@
 ; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
 ; REQUIRES: dxil-1-9
 
-; CHECK-NOT:  @dx.op.hitObject_
-; CHECK-NOT:  @dx.op.maybeReorderThread
 
 ;
 ; Buffer Definitions:
@@ -36,6 +34,7 @@ entry:
   %hit = alloca %dx.types.HitObject, align 4
   %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !19 ; line:9 col:3
   call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !19 ; line:9 col:3
+; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)
   %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !23 ; line:9 col:17
   call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %hit), !dbg !24 ; line:10 col:3
   call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32)"(i32 359, %dx.types.HitObject* %hit, i32 241, i32 3), !dbg !25 ; line:11 col:3
diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll
new file mode 100644
index 0000000000..89ee886c2e
--- /dev/null
+++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll
@@ -0,0 +1,142 @@
+; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s
+
+;
+; Buffer Definitions:
+;
+; cbuffer $Globals
+; {
+;
+;   [0 x i8] (type annotation not present)
+;
+; }
+;
+;
+; Resource Bindings:
+;
+; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
+; ------------------------------ ---------- ------- ----------- ------- -------------- ------
+; $Globals                          cbuffer      NA          NA     CB0   cb4294967295     1
+;
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%ConstantBuffer = type opaque
+%dx.types.HitObject = type { i8* }
+%"class.dx::HitObject" = type { i32 }
+%struct.RayDesc = type { <3 x float>, float, <3 x float>, float }
+
+@"$Globals" = external constant %ConstantBuffer
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+entry:
+  %hit = alloca %dx.types.HitObject, align 4
+  %tmp = alloca %dx.types.HitObject, align 4
+  %ray = alloca %struct.RayDesc, align 4
+; CHECK-NOT: %{{[^ ]+}} = alloca %struct.RayDesc
+  %tmp2 = alloca %dx.types.HitObject, align 4
+; CHECK: %[[HIT0:[^ ]+]] = alloca %dx.types.HitObject, align 4
+; CHECK: %[[HIT1:[^ ]+]] = alloca %dx.types.HitObject, align 4
+; CHECK: %[[HIT2:[^ ]+]] = alloca %dx.types.HitObject, align 4
+  %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !23 ; line:42 col:3
+  call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !23 ; line:42 col:3
+; CHECK:  %[[THIS0:[^ ]+]] = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %[[HIT0]])
+; CHECK-NOT: %[[THIS0]]
+  %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !27 ; line:42 col:17
+  %2 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !28 ; line:43 col:3
+  call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !28 ; line:43 col:3
+; CHECK:  call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %[[HIT1]])
+  call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %tmp), !dbg !28 ; line:43 col:3
+  %3 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !28 ; line:43 col:3
+  call void @llvm.lifetime.end(i64 4, i8* %3) #0, !dbg !28 ; line:43 col:3
+  %4 = bitcast %struct.RayDesc* %ray to i8*, !dbg !29 ; line:44 col:3
+  call void @llvm.lifetime.start(i64 32, i8* %4) #0, !dbg !29 ; line:44 col:3
+  %5 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 0, !dbg !30 ; line:44 col:17
+  store <3 x float> zeroinitializer, <3 x float>* %5, !dbg !30 ; line:44 col:17
+  %6 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 1, !dbg !30 ; line:44 col:17
+  store float 0.000000e+00, float* %6, !dbg !30 ; line:44 col:17
+  %7 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 2, !dbg !30 ; line:44 col:17
+  store <3 x float> <float 0.000000e+00, float 1.000000e+00, float 0x3FA99999A0000000>, <3 x float>* %7, !dbg !30 ; line:44 col:17
+  %8 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 3, !dbg !30 ; line:44 col:17
+  store float 1.000000e+03, float* %8, !dbg !30 ; line:44 col:17
+  %9 = bitcast %dx.types.HitObject* %tmp2 to i8*, !dbg !31 ; line:45 col:3
+  call void @llvm.lifetime.start(i64 4, i8* %9) #0, !dbg !31 ; line:45 col:3
+; CHECK: store <3 x float> zeroinitializer, <3 x float>* %[[pRDO:[^ ]+]],
+; CHECK: store float 0.000000e+00, float* %[[pRDTMIN:[^ ]+]],
+; CHECK: store <3 x float> <float 0.000000e+00, float 1.000000e+00, float 0x3FA99999A0000000>, <3 x float>* %[[pRDD:[^ ]+]],
+; CHECK: store float 1.000000e+03, float* %[[pRDTMAX:[^ ]+]],
+; CHECK-DAG: %[[RDO:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDO]],
+; CHECK-DAG: %[[RDTMIN:[^ ]+]] = load float, float* %[[pRDTMIN]],
+; CHECK-DAG: %[[RDD:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDD]],
+; CHECK-DAG: %[[RDTMAX:[^ ]+]] = load float, float* %[[pRDTMAX]],
+; CHECK:  call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 387, %dx.types.HitObject* %[[HIT2]], i32 0, i32 1, <3 x float> %[[RDO]], float %[[RDTMIN]], <3 x float> %[[RDD]], float %[[RDTMAX]])
+  call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32 387, %dx.types.HitObject* %tmp2, i32 0, i32 1, %struct.RayDesc* %ray), !dbg !31 ; line:45 col:3
+  %10 = bitcast %dx.types.HitObject* %tmp2 to i8*, !dbg !31 ; line:45 col:3
+  call void @llvm.lifetime.end(i64 4, i8* %10) #0, !dbg !31 ; line:45 col:3
+  %11 = bitcast %struct.RayDesc* %ray to i8*, !dbg !32 ; line:46 col:1
+  call void @llvm.lifetime.end(i64 32, i8* %11) #0, !dbg !32 ; line:46 col:1
+  %12 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !32 ; line:46 col:1
+  call void @llvm.lifetime.end(i64 4, i8* %12) #0, !dbg !32 ; line:46 col:1
+  ret void, !dbg !32 ; line:46 col:1
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0
+
+; Function Attrs: nounwind
+declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0
+
+; Function Attrs: nounwind
+declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*) #0
+
+attributes #0 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!pauseresume = !{!1}
+!dx.version = !{!2}
+!dx.valver = !{!2}
+!dx.shaderModel = !{!3}
+!dx.typeAnnotations = !{!4, !12}
+!dx.entryPoints = !{!16}
+!dx.fnprops = !{!20}
+!dx.options = !{!21, !22}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!2 = !{i32 1, i32 9}
+!3 = !{!"lib", i32 6, i32 9}
+!4 = !{i32 0, %"class.dx::HitObject" undef, !5, %struct.RayDesc undef, !7}
+!5 = !{i32 4, !6}
+!6 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4}
+!7 = !{i32 32, !8, !9, !10, !11}
+!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3}
+!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9}
+!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3}
+!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9}
+!12 = !{i32 1, void ()* @"\01?main@@YAXXZ", !13}
+!13 = !{!14}
+!14 = !{i32 1, !15, !15}
+!15 = !{}
+!16 = !{null, !"", null, !17, null}
+!17 = !{null, null, !18, null}
+!18 = !{!19}
+!19 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null}
+!20 = !{void ()* @"\01?main@@YAXXZ", i32 7}
+!21 = !{i32 -2147483584}
+!22 = !{i32 -1}
+!23 = !DILocation(line: 42, column: 3, scope: !24)
+!24 = !DISubprogram(name: "main", scope: !25, file: !25, line: 41, type: !26, isLocal: false, isDefinition: true, scopeLine: 41, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ")
+!25 = !DIFile(filename: "tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl", directory: "")
+!26 = !DISubroutineType(types: !15)
+!27 = !DILocation(line: 42, column: 17, scope: !24)
+!28 = !DILocation(line: 43, column: 3, scope: !24)
+!29 = !DILocation(line: 44, column: 3, scope: !24)
+!30 = !DILocation(line: 44, column: 17, scope: !24)
+!31 = !DILocation(line: 45, column: 3, scope: !24)
+!32 = !DILocation(line: 46, column: 1, scope: !24)
\ No newline at end of file
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl
deleted file mode 100644
index 4e09b770ec..0000000000
--- a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl
+++ /dev/null
@@ -1,12 +0,0 @@
-// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s
-// REQUIRES: dxil-1-9
-
-// TODO: Implement lowering for dx::HitObject::MakeNop
-
-// CHECK-NOT: call
-
-[shader("raygeneration")]
-void main() {
-  dx::HitObject hit;
-  dx::HitObject::MakeNop();
-}
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl
deleted file mode 100644
index fd2fbc5974..0000000000
--- a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl
+++ /dev/null
@@ -1,24 +0,0 @@
-// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s
-
-// CHECK: | |-CXXRecordDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit referenced class HitObject definition
-// CHECK-NEXT: | | |-FinalAttr {{[^ ]+}} <<invalid sloc>> Implicit final
-// CHECK-NEXT: | | |-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
-// CHECK-NEXT: | | |-HLSLHitObjectAttr {{[^ ]+}} <<invalid sloc>> Implicit
-// CHECK-NEXT: | | |-FieldDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit h 'int'
-// CHECK-NEXT: | | |-CXXConstructorDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> used HitObject 'void ()'
-// CHECK-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 358
-// CHECK-NEXT: | | | `-HLSLCXXOverloadAttr {{[^ ]+}} <<invalid sloc>> Implicit
-
-// CHECK: | | |-FunctionTemplateDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> MakeNop
-// CHECK-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> class TResult
-// CHECK-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit MakeNop 'TResult () const' static
-// CHECK-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> used MakeNop 'dx::HitObject ()' static
-// CHECK-NEXT: | | |   |-TemplateArgument type 'dx::HitObject'
-// CHECK-NEXT: | | |   |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 358
-// CHECK-NEXT: | | |   `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
-
-[shader("raygeneration")]
-void main() {
-  dx::HitObject hit;
-  dx::HitObject::MakeNop();
-}
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl
deleted file mode 100644
index 8824cffaec..0000000000
--- a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl
+++ /dev/null
@@ -1,13 +0,0 @@
-// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s
-
-// TODO: Implement lowering for dx::MaybeReorderThread
-
-// CHECK-NOT:   call
-
-[shader("raygeneration")]
-void main() {
-  dx::HitObject hit;
-  dx::MaybeReorderThread(hit);
-  dx::MaybeReorderThread(hit, 0xf1, 3);
-  dx::MaybeReorderThread(0xf2, 7);
-}
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl
deleted file mode 100644
index d570ef021f..0000000000
--- a/tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder_ast.hlsl
+++ /dev/null
@@ -1,28 +0,0 @@
-// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s
-
-// CHECK: |-FunctionDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit used MaybeReorderThread 'void (dx::HitObject)' extern
-// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> HitObject 'dx::HitObject':'dx::HitObject'
-// CHECK-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 359
-// CHECK-NEXT: | `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
-
-// CHECK: |-FunctionDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit used MaybeReorderThread 'void (dx::HitObject, unsigned int, unsigned int)' extern
-// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> HitObject 'dx::HitObject':'dx::HitObject'
-// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> CoherenceHint 'unsigned int'
-// CHECK-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> NumCoherenceHintBitsFromLSB 'unsigned int'
-// CHECK-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 359
-// CHECK-NEXT: | `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
-
-// CHECK: `-FunctionDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit used MaybeReorderThread 'void (unsigned int, unsigned int)' extern
-// CHECK-NEXT:   |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> CoherenceHint 'unsigned int'
-// CHECK-NEXT:   |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> NumCoherenceHintBitsFromLSB 'unsigned int'
-// CHECK-NEXT:   |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 359
-// CHECK-NEXT:   `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
-
-
-[shader("raygeneration")]
-void main() {
-  dx::HitObject hit;
-  dx::MaybeReorderThread(hit);
-  dx::MaybeReorderThread(hit, 0xf1, 3);
-  dx::MaybeReorderThread(0xf2, 7);
-}
\ No newline at end of file