diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 01ad1577b7..6cbdeb6270 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1 +1,2 @@
-* @microsoft/hlsl-release
+# Uncomment the next line in release branches after ask-mode begins
+# * @microsoft/hlsl-release
diff --git a/.github/workflows/clang-format-checker.yml b/.github/workflows/clang-format-checker.yml
index 7e39a5b0be..d1887e4519 100644
--- a/.github/workflows/clang-format-checker.yml
+++ b/.github/workflows/clang-format-checker.yml
@@ -13,16 +13,23 @@ jobs:
       pull-requests: write
     steps:
       - name: Fetch LLVM sources
-        uses: actions/checkout@v4
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
-          fetch-depth: 2
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: Checkout through merge base
+        uses: rmacklin/fetch-through-merge-base@bfe4d03a86f9afa52bc1a70e9814fc92a07f7b75 # v0.3.0
+        with:
+          base_ref: ${{ github.event.pull_request.base.ref }}
+          head_ref: ${{ github.event.pull_request.head.sha }}
+          deepen_length: 500
 
       - name: Get changed files
         id: changed-files
-        uses: tj-actions/changed-files@v41
+        uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
         with:
           separator: ","
-          fetch_depth: 100 # Fetches only the last 10 commits
+          skip_initial_fetch: true
 
       - name: "Listed files"
         env:
diff --git a/.github/workflows/coverage-gh-pages.yml b/.github/workflows/coverage-gh-pages.yml
index 4c7b2c2018..07e63584e3 100644
--- a/.github/workflows/coverage-gh-pages.yml
+++ b/.github/workflows/coverage-gh-pages.yml
@@ -26,11 +26,11 @@ jobs:
     timeout-minutes: 240
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           submodules: true
       - name: Setup Pages
-        uses: actions/configure-pages@v2
+        uses: actions/configure-pages@v5
       - name: Install dependencies
         run: sudo apt install -y ninja-build
       - name: Configure
@@ -44,7 +44,7 @@ jobs:
       - name: Force artifact permissions
         run: chmod -c -R +rX ${{github.workspace}}/build/report
       - name: Upload artifact
-        uses: actions/upload-pages-artifact@v1
+        uses: actions/upload-pages-artifact@v3
         with:
           path: ${{github.workspace}}/build/report
         
@@ -60,4 +60,4 @@ jobs:
     steps:
       - name: Deploy to GitHub Pages
         id: deployment
-        uses: actions/deploy-pages@v1
+        uses: actions/deploy-pages@v4
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8f7db99784..5210718005 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,15 +17,6 @@ if(POLICY CMP0022)
   cmake_policy(SET CMP0022 NEW) # automatic when 2.8.12 is required
 endif()
 
-if (POLICY CMP0051)
-  # CMake 3.1 and higher include generator expressions of the form
-  # $<TARGETLIB:obj> in the SOURCES property.  These need to be
-  # stripped everywhere that access the SOURCES property, so we just
-  # defer to the OLD behavior of not including generator expressions
-  # in the output for now.
-  cmake_policy(SET CMP0051 OLD)
-endif()
-
 if(CMAKE_VERSION VERSION_LESS 3.1.20141117)
   set(cmake_3_2_USES_TERMINAL)
 else()
@@ -686,6 +677,8 @@ add_subdirectory(include/dxc)
 # really depend on anything else in the build it is safe.
 list(APPEND LLVM_COMMON_DEPENDS HCTGen) 
 
+add_subdirectory(utils/hct)
+
 if(EXISTS "${LLVM_MAIN_SRC_DIR}/external")
   add_subdirectory(external) # SPIRV change
 endif()
@@ -769,9 +762,7 @@ if (LLVM_INCLUDE_DOCS)
   add_subdirectory(docs)
 endif()
 
-if (LLVM_BUILD_DOCS)
-  add_hlsl_hctgen(DxilDocs OUTPUT docs/DXIL.rst CODE_TAG) # HLSL Change
-endif()
+add_hlsl_hctgen(DxilDocs OUTPUT docs/DXIL.rst CODE_TAG) # HLSL Change
 
 add_subdirectory(cmake/modules)
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 233211f150..840b4f0f17 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -40,10 +40,32 @@ Before submitting a feature or substantial code contribution please discuss it w
 
 ### Coding guidelines
 
-The coding, style, and general engineering guidelines follow those described in the docs/CodingStandards.rst. For additional guidelines in code specific to HLSL, see the docs/HLSLChanges.rst file.
+The coding, style, and general engineering guidelines follow those described in the [LLVM Coding Standards](docs/CodingStandards.rst). For additional guidelines in code specific to HLSL, see the [HLSL Changes](docs/HLSLChanges.rst) docs.
 
 DXC has adopted a clang-format requirement for all incoming changes to C and C++ files. PRs to DXC should have the *changed code* clang formatted to the LLVM style, and leave the remaining portions of the file unchanged. This can be done using the `git-clang-format` tool or IDE driven workflows. A GitHub action will run on all PRs to validate that the change is properly formatted.
 
+#### Applying LLVM Standards
+
+All new code contributed to DXC should follow the LLVM coding standards.
+
+Note that the LLVM Coding Standards have a golden rule:
+
+> **If you are extending, enhancing, or bug fixing already implemented code, use the style that is already being used so that the source is uniform and easy to follow.**
+
+The golden rule should continue to be applied to places where DXC is self-consistent. A good example is DXC's common use of `PascalCase` instead of `camelCase` for APIs in some parts of the HLSL implementation. In any place where DXC is not self-consistent new code should follow the LLVM Coding Standard.
+
+A good secondary rule to follow is:
+
+> **When in doubt, follow LLVM.**
+
+Adopting LLVM's coding standards provides a consistent set of rules and guidelines to hold all contributions to. This allows patch authors to clearly understand the expectations placed on contributions, and allows reviewers to have a bar to measure contributions against. Aligning with LLVM by default ensures the path of least resistance for everyone.
+
+Since many of the LLVM Coding Standards are not enforced automatically we rely on code reviews to provide feedback and ensure contributions align with the expected coding standards. Since we rely on reviewers for enforcement and humans make mistakes, please keep in mind:
+
+> **Code review is a conversation.**
+
+It is completely reasonable for a patch author to question feedback and provide additional context about why something was done the way it was. Reviewers often see narrow slices in diffs rather than the full context of a file or part of the compiler, so they may not always provide perfect feedback. This is especially true with the application of the "golden rule" since it depends on understanding a wider context.
+
 ### Documenting Pull Requests
 
 Pull request descriptions should have the following format:
diff --git a/README.md b/README.md
index 35c0132068..ddafde2115 100644
--- a/README.md
+++ b/README.md
@@ -34,6 +34,16 @@ Development kits containing only the dxc.exe driver app, the dxcompiler.dll, and
 
 As an example of community contribution, this project can also target the [SPIR-V](https://www.khronos.org/registry/spir-v/) intermediate representation. Please see the [doc](docs/SPIR-V.rst) for how HLSL features are mapped to SPIR-V, and the [wiki](https://github.com/microsoft/DirectXShaderCompiler/wiki/SPIR%E2%80%90V-CodeGen) page for how to build, use, and contribute to the SPIR-V CodeGen.
 
+### Metal CodeGen
+
+When built from source DXC can utilize the [Metal Shader
+Converter](https://developer.apple.com/metal/shader-converter/) if it is
+available during build and configuration time. This allows DXC to generate Metal
+shader libraries directly using the `-metal` flag.
+
+Note: DXC cannot currently disassemble Metal shaders so the `-Fc` flag cannot be
+used in conjunction with the `-Fo` flag.
+
 ## Building Sources
 
 See the full documentation for [Building and testing DXC](docs/BuildingAndTestingDXC.rst) for detailed instructions.
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 01b30568a9..4541d08162 100644
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -568,3 +568,12 @@ else()
 endif()
 
 string(REPLACE " " ";" LLVM_BINDINGS_LIST "${LLVM_BINDINGS}")
+
+# HLSL Change Begin - Metal IR Converter
+find_package(MetalIRConverter)
+if (METAL_IRCONVERTER_FOUND)
+  set(ENABLE_METAL_CODEGEN On)
+  message(STATUS "Enabling Metal Support")
+  add_definitions(-DENABLE_METAL_CODEGEN)
+endif()
+# HLSL Change End - Metal IR Converter
diff --git a/cmake/modules/FindMetalIRConverter.cmake b/cmake/modules/FindMetalIRConverter.cmake
new file mode 100644
index 0000000000..fc7df1d6cc
--- /dev/null
+++ b/cmake/modules/FindMetalIRConverter.cmake
@@ -0,0 +1,16 @@
+find_path(METAL_IRCONVERTER_INCLUDE_DIR metal_irconverter.h
+          HINTS /usr/local/include/metal_irconverter
+          DOC "Path to metal IR converter headers"
+          )
+
+find_library(METAL_IRCONVERTER_LIB NAMES metalirconverter
+  PATH_SUFFIXES lib
+  )
+
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(METAL_IRCONVERTER
+                                    REQUIRED_VARS METAL_IRCONVERTER_LIB METAL_IRCONVERTER_INCLUDE_DIR)
+
+message(STATUS "Metal IR Converter Include Dir: ${METAL_IRCONVERTER_INCLUDE_DIR}")
+message(STATUS "Metal IR Converter Library: ${METAL_IRCONVERTER_LIB}")
+mark_as_advanced(METAL_IRCONVERTER_LIB METAL_IRCONVERTER_INCLUDE_DIR)
diff --git a/docs/DXIL.rst b/docs/DXIL.rst
index c3baf4e454..a1c5055085 100644
--- a/docs/DXIL.rst
+++ b/docs/DXIL.rst
@@ -225,10 +225,10 @@ DXIL uses 32-bit pointers in its representation.
 Out-of-bounds behavior
 ----------------------
 
-Indexable thread-local accesses are done via LLVM pointer and have C-like OOB semantics.
-Groupshared accesses are done via LLVM pointer too. The origin of a groupshared pointer must be a single TGSM allocation.
-If a groupshared pointer uses in-bound GEP instruction, it should not OOB. The behavior for an OOB access for in-bound pointer is undefined.
-For groupshared pointer from regular GEP, OOB will has same behavior as DXBC. Loads return 0 for OOB accesses; OOB stores are silently dropped.
+Indexable thread-local accesses are done via LLVM pointers and have C-like OOB semantics.
+Groupshared accesses are done via LLVM pointers too. The origin of a groupshared pointer must be a single TGSM allocation.
+If a groupshared pointer uses an in-bound GEP instruction, it should not OOB. The behavior for an OOB access for in-bound pointer is undefined.
+For a groupshared pointer from regular GEP, OOB will have the same behavior as DXBC. Loads return 0 for OOB accesses; OOB stores are silently dropped.
 
 Resource accesses keeps the same out-of-bounds behavior as DXBC. Loads return 0 for OOB accesses; OOB stores are silently dropped.
 
@@ -1984,54 +1984,57 @@ The following LLVM instructions are valid in a DXIL program, with the specified
 .. <py::lines('INSTR-RST')>hctdb_instrhelp.get_instrs_rst()</py>
 .. INSTR-RST:BEGIN
 
-============= ======================================================================= =================
-Instruction   Action                                                                  Operand overloads
-============= ======================================================================= =================
-Ret           returns a value (possibly void), from a function.                       vhfd1wil
-Br            branches (conditional or unconditional)
-Switch        performs a multiway switch
-Add           returns the sum of its two operands                                     wil
-FAdd          returns the sum of its two operands                                     hfd
-Sub           returns the difference of its two operands                              wil
-FSub          returns the difference of its two operands                              hfd
-Mul           returns the product of its two operands                                 wil
-FMul          returns the product of its two operands                                 hfd
-UDiv          returns the quotient of its two unsigned operands                       wil
-SDiv          returns the quotient of its two signed operands                         wil
-FDiv          returns the quotient of its two operands                                hfd
-URem          returns the remainder from the unsigned division of its two operands    wil
-SRem          returns the remainder from the signed division of its two operands      wil
-FRem          returns the remainder from the division of its two operands             hfd
-Shl           shifts left (logical)                                                   wil
-LShr          shifts right (logical), with zero bit fill                              wil
-AShr          shifts right (arithmetic), with 'a' operand sign bit fill               wil
-And           returns a  bitwise logical and of its two operands                      1wil
-Or            returns a bitwise logical or of its two operands                        1wil
-Xor           returns a bitwise logical xor of its two operands                       1wil
-Alloca        allocates memory on the stack frame of the currently executing function
-Load          reads from memory
-Store         writes to memory
-GetElementPtr gets the address of a subelement of an aggregate value
-AtomicCmpXchg atomically modifies memory
-AtomicRMW     atomically modifies memory
-Trunc         truncates an integer                                                    1wil
-ZExt          zero extends an integer                                                 1wil
-SExt          sign extends an integer                                                 1wil
-FPToUI        converts a floating point to UInt                                       hfd1wil
-FPToSI        converts a floating point to SInt                                       hfd1wil
-UIToFP        converts a UInt to floating point                                       hfd1wil
-SIToFP        converts a SInt to floating point                                       hfd1wil
-FPTrunc       truncates a floating point                                              hfd
-FPExt         extends a floating point                                                hfd
-BitCast       performs a bit-preserving type cast                                     hfd1wil
-AddrSpaceCast casts a value addrspace
-ICmp          compares integers                                                       1wil
-FCmp          compares floating points                                                hfd
-PHI           is a PHI node instruction
-Call          calls a function
-Select        selects an instruction
-ExtractValue  extracts from aggregate
-============= ======================================================================= =================
+============== ======================================================================= =================
+Instruction    Action                                                                  Operand overloads
+============== ======================================================================= =================
+Ret            returns a value (possibly void), from a function.                       vhfd1wil
+Br             branches (conditional or unconditional)
+Switch         performs a multiway switch
+Add            returns the sum of its two operands                                     wil
+FAdd           returns the sum of its two operands                                     hfd
+Sub            returns the difference of its two operands                              wil
+FSub           returns the difference of its two operands                              hfd
+Mul            returns the product of its two operands                                 wil
+FMul           returns the product of its two operands                                 hfd
+UDiv           returns the quotient of its two unsigned operands                       wil
+SDiv           returns the quotient of its two signed operands                         wil
+FDiv           returns the quotient of its two operands                                hfd
+URem           returns the remainder from the unsigned division of its two operands    wil
+SRem           returns the remainder from the signed division of its two operands      wil
+FRem           returns the remainder from the division of its two operands             hfd
+Shl            shifts left (logical)                                                   wil
+LShr           shifts right (logical), with zero bit fill                              wil
+AShr           shifts right (arithmetic), with 'a' operand sign bit fill               wil
+And            returns a  bitwise logical and of its two operands                      1wil
+Or             returns a bitwise logical or of its two operands                        1wil
+Xor            returns a bitwise logical xor of its two operands                       1wil
+Alloca         allocates memory on the stack frame of the currently executing function
+Load           reads from memory
+Store          writes to memory
+GetElementPtr  gets the address of a subelement of an aggregate value
+AtomicCmpXchg  atomically modifies memory
+AtomicRMW      atomically modifies memory
+Trunc          truncates an integer                                                    1wil
+ZExt           zero extends an integer                                                 1wil
+SExt           sign extends an integer                                                 1wil
+FPToUI         converts a floating point to UInt                                       hfd1wil
+FPToSI         converts a floating point to SInt                                       hfd1wil
+UIToFP         converts a UInt to floating point                                       hfd1wil
+SIToFP         converts a SInt to floating point                                       hfd1wil
+FPTrunc        truncates a floating point                                              hfd
+FPExt          extends a floating point                                                hfd
+BitCast        performs a bit-preserving type cast                                     hfd1wil
+AddrSpaceCast  casts a value addrspace
+ICmp           compares integers                                                       1wil
+FCmp           compares floating points                                                hfd
+PHI            is a PHI node instruction
+Call           calls a function
+Select         selects an instruction
+ExtractElement extracts from vector
+InsertElement  inserts into vector
+ShuffleVector  Shuffle two vectors
+ExtractValue   extracts from aggregate
+============== ======================================================================= =================
 
 
 FAdd
@@ -2369,6 +2372,53 @@ ID  Name                                                  Description
 255 SampleCmpBias                                         samples a texture after applying the input bias to the mipmap level and compares a single component against the specified comparison value
 256 StartVertexLocation                                   returns the BaseVertexLocation from DrawIndexedInstanced or StartVertexLocation from DrawInstanced
 257 StartInstanceLocation                                 returns the StartInstanceLocation from Draw*Instanced
+258 AllocateRayQuery2                                     allocates space for RayQuery and return handle
+259 ReservedA0                                            reserved
+260 ReservedA1                                            reserved
+261 ReservedA2                                            reserved
+262 HitObject_TraceRay                                    Analogous to TraceRay but without invoking CH/MS and returns the intermediate state as a HitObject
+263 HitObject_FromRayQuery                                Creates a new HitObject representing a committed hit from a RayQuery
+264 HitObject_FromRayQueryWithAttrs                       Creates a new HitObject representing a committed hit from a RayQuery and committed attributes
+265 HitObject_MakeMiss                                    Creates a new HitObject representing a miss
+266 HitObject_MakeNop                                     Creates an empty nop HitObject
+267 HitObject_Invoke                                      Represents the invocation of the CH/MS shader represented by the HitObject
+268 MaybeReorderThread                                    Reorders the current thread
+269 HitObject_IsMiss                                      Returns `true` if the HitObject represents a miss
+270 HitObject_IsHit                                       Returns `true` if the HitObject is a NOP-HitObject
+271 HitObject_IsNop                                       Returns `true` if the HitObject represents a nop
+272 HitObject_RayFlags                                    Returns the ray flags set in the HitObject
+273 HitObject_RayTMin                                     Returns the TMin value set in the HitObject
+274 HitObject_RayTCurrent                                 Returns the current T value set in the HitObject
+275 HitObject_WorldRayOrigin                              Returns the ray origin in world space
+276 HitObject_WorldRayDirection                           Returns the ray direction in world space
+277 HitObject_ObjectRayOrigin                             Returns the ray origin in object space
+278 HitObject_ObjectRayDirection                          Returns the ray direction in object space
+279 HitObject_ObjectToWorld3x4                            Returns the object to world space transformation matrix in 3x4 form
+280 HitObject_WorldToObject3x4                            Returns the world to object space transformation matrix in 3x4 form
+281 HitObject_GeometryIndex                               Returns the geometry index committed on hit
+282 HitObject_InstanceIndex                               Returns the instance index committed on hit
+283 HitObject_InstanceID                                  Returns the instance id committed on hit
+284 HitObject_PrimitiveIndex                              Returns the primitive index committed on hit
+285 HitObject_HitKind                                     Returns the HitKind of the hit
+286 HitObject_ShaderTableIndex                            Returns the shader table index set for this HitObject
+287 HitObject_SetShaderTableIndex                         Returns a HitObject with updated shader table index
+288 HitObject_LoadLocalRootTableConstant                  Returns the root table constant for this HitObject and offset
+289 HitObject_Attributes                                  Returns the attributes set for this HitObject
+290 ReservedB28                                           reserved
+291 ReservedB29                                           reserved
+292 ReservedB30                                           reserved
+293 ReservedC0                                            reserved
+294 ReservedC1                                            reserved
+295 ReservedC2                                            reserved
+296 ReservedC3                                            reserved
+297 ReservedC4                                            reserved
+298 ReservedC5                                            reserved
+299 ReservedC6                                            reserved
+300 ReservedC7                                            reserved
+301 ReservedC8                                            reserved
+302 ReservedC9                                            reserved
+303 RawBufferVectorLoad                                   reads from a raw buffer and structured buffer
+304 RawBufferVectorStore                                  writes to a RWByteAddressBuffer or RWStructuredBuffer
 === ===================================================== =======================================================================================================================================================================================================================
 
 
@@ -3015,277 +3065,287 @@ The set of validation rules that are known to hold for a DXIL program is identif
 .. <py::lines('VALRULES-RST')>hctdb_instrhelp.get_valrules_rst()</py>
 .. VALRULES-RST:BEGIN
 
-========================================= ========================================================================================================================================================================================================================================================================================================
-Rule Code                                 Description
-========================================= ========================================================================================================================================================================================================================================================================================================
-BITCODE.VALID                             Module must be bitcode-valid
-CONTAINER.PARTINVALID                     DXIL Container must not contain unknown parts
-CONTAINER.PARTMATCHES                     DXIL Container Parts must match Module
-CONTAINER.PARTMISSING                     DXIL Container requires certain parts, corresponding to module
-CONTAINER.PARTREPEATED                    DXIL Container must have only one of each part type
-CONTAINER.ROOTSIGNATUREINCOMPATIBLE       Root Signature in DXIL Container must be compatible with shader
-DECL.ATTRSTRUCT                           Attributes parameter must be struct type
-DECL.DXILFNEXTERN                         External function must be a DXIL function
-DECL.DXILNSRESERVED                       The DXIL reserved prefixes must only be used by built-in functions and types
-DECL.EXTRAARGS                            Extra arguments not allowed for shader functions
-DECL.FNATTRIBUTE                          Functions should only contain known function attributes
-DECL.FNFLATTENPARAM                       Function parameters must not use struct types
-DECL.FNISCALLED                           Functions can only be used by call instructions
-DECL.MULTIPLENODEINPUTS                   A node shader may not have more than one input record
-DECL.NODELAUNCHINPUTTYPE                  Invalid input record type for node launch type
-DECL.NOTUSEDEXTERNAL                      External declaration should not be used
-DECL.PARAMSTRUCT                          Callable function parameter must be struct type
-DECL.PAYLOADSTRUCT                        Payload parameter must be struct type
-DECL.RAYQUERYINFNSIG                      Rayquery objects not allowed in function signatures
-DECL.RESOURCEINFNSIG                      Resources not allowed in function signatures
-DECL.SHADERMISSINGARG                     payload/params/attributes parameter is required for certain shader types
-DECL.SHADERRETURNVOID                     Shader functions must return void
-DECL.USEDEXTERNALFUNCTION                 External function must be used
-DECL.USEDINTERNAL                         Internal declaration must be used
-FLOW.DEADLOOP                             Loop must have break.
-FLOW.FUNCTIONCALL                         Function with parameter is not permitted
-FLOW.NORECURSION                          Recursion is not permitted.
-FLOW.REDUCIBLE                            Execution flow must be reducible.
-INSTR.ALLOWED                             Instructions must be of an allowed type.
-INSTR.ATOMICCONST                         Constant destination to atomic.
-INSTR.ATOMICINTRINNONUAV                  Non-UAV destination to atomic intrinsic.
-INSTR.ATOMICOPNONGROUPSHAREDORRECORD      Non-groupshared or node record destination to atomic operation.
-INSTR.ATTRIBUTEATVERTEXNOINTERPOLATION    Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function.
-INSTR.BARRIERFLAGINVALID                  Invalid %0 flags on DXIL operation '%1'
-INSTR.BARRIERMODEFORNONCS                 sync in a non-Compute/Amplification/Mesh/Node Shader must only sync UAV (sync_uglobal).
-INSTR.BARRIERMODENOMEMORY                 sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory).  Only _t (thread group sync) is optional.
-INSTR.BARRIERMODEUSELESSUGROUP            sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal.
-INSTR.BARRIERNONCONSTANTFLAGARGUMENT      Memory type, access, or sync flag is not constant
-INSTR.BARRIERREQUIRESNODE                 sync in a non-Node Shader must not sync node record memory.
-INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER  BufferUpdateCounter valid only when HasCounter is true.
-INSTR.BUFFERUPDATECOUNTERONUAV            BufferUpdateCounter valid only on UAV.
-INSTR.CALLOLOAD                           Call to DXIL intrinsic must match overload signature
-INSTR.CANNOTPULLPOSITION                  pull-model evaluation of position disallowed
-INSTR.CBUFFERCLASSFORCBUFFERHANDLE        Expect Cbuffer for CBufferLoad handle.
-INSTR.CBUFFEROUTOFBOUND                   Cbuffer access out of bound.
-INSTR.CHECKACCESSFULLYMAPPED              CheckAccessFullyMapped should only be used on resource status.
-INSTR.COORDINATECOUNTFORRAWTYPEDBUF       raw/typed buffer don't need 2 coordinates.
-INSTR.COORDINATECOUNTFORSTRUCTBUF         structured buffer require 2 coordinates.
-INSTR.CREATEHANDLEIMMRANGEID              Local resource must map to global resource.
-INSTR.DXILSTRUCTUSER                      Dxil struct types should only be used by ExtractValue.
-INSTR.DXILSTRUCTUSEROUTOFBOUND            Index out of bound when extract value from dxil struct types.
-INSTR.EVALINTERPOLATIONMODE               Interpolation mode on %0 used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample.
-INSTR.EXTRACTVALUE                        ExtractValue should only be used on dxil struct types and cmpxchg.
-INSTR.FAILTORESLOVETGSMPOINTER            TGSM pointers must originate from an unambiguous TGSM global variable.
-INSTR.HANDLENOTFROMCREATEHANDLE           Resource handle should returned by createHandle.
-INSTR.ILLEGALDXILOPCODE                   DXILOpCode must be [0..%0].  %1 specified.
-INSTR.ILLEGALDXILOPFUNCTION               '%0' is not a DXILOpFuncition for DXILOpcode '%1'.
-INSTR.IMMBIASFORSAMPLEB                   bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate.
-INSTR.INBOUNDSACCESS                      Access to out-of-bounds memory is disallowed.
-INSTR.MINPRECISIONNOTPRECISE              Instructions marked precise may not refer to minprecision values.
-INSTR.MINPRECISONBITCAST                  Bitcast on minprecison types is not allowed.
-INSTR.MIPLEVELFORGETDIMENSION             Use mip level on buffer when GetDimensions.
-INSTR.MIPONUAVLOAD                        uav load don't support mipLevel/sampleIndex.
-INSTR.MISSINGSETMESHOUTPUTCOUNTS          Missing SetMeshOutputCounts call.
-INSTR.MULTIPLEGETMESHPAYLOAD              GetMeshPayload cannot be called multiple times.
-INSTR.MULTIPLESETMESHOUTPUTCOUNTS         SetMeshOUtputCounts cannot be called multiple times.
-INSTR.NODERECORDHANDLEUSEAFTERCOMPLETE    Invalid use of completed record handle.
-INSTR.NOGENERICPTRADDRSPACECAST           Address space cast between pointer types must have one part to be generic address space.
-INSTR.NOIDIVBYZERO                        No signed integer division by zero.
-INSTR.NOINDEFINITEACOS                    No indefinite arccosine.
-INSTR.NOINDEFINITEASIN                    No indefinite arcsine.
-INSTR.NOINDEFINITEDSXY                    No indefinite derivative calculation.
-INSTR.NOINDEFINITELOG                     No indefinite logarithm.
-INSTR.NONDOMINATINGDISPATCHMESH           Non-Dominating DispatchMesh call.
-INSTR.NONDOMINATINGSETMESHOUTPUTCOUNTS    Non-Dominating SetMeshOutputCounts call.
-INSTR.NOREADINGUNINITIALIZED              Instructions should not read uninitialized value.
-INSTR.NOTONCEDISPATCHMESH                 DispatchMesh must be called exactly once in an Amplification shader.
-INSTR.NOUDIVBYZERO                        No unsigned integer division by zero.
-INSTR.OFFSETONUAVLOAD                     uav load don't support offset.
-INSTR.OLOAD                               DXIL intrinsic overload must be valid.
-INSTR.ONLYONEALLOCCONSUME                 RWStructuredBuffers may increment or decrement their counters, but not both.
-INSTR.OPCODERESERVED                      Instructions must not reference reserved opcodes.
-INSTR.OPCONST                             DXIL intrinsic requires an immediate constant operand
-INSTR.OPCONSTRANGE                        Constant values must be in-range for operation.
-INSTR.OPERANDRANGE                        DXIL intrinsic operand must be within defined range
-INSTR.PTRBITCAST                          Pointer type bitcast must be have same size.
-INSTR.RESOURCECLASSFORLOAD                load can only run on UAV/SRV resource.
-INSTR.RESOURCECLASSFORSAMPLERGATHER       sample, lod and gather should be on srv resource.
-INSTR.RESOURCECLASSFORUAVSTORE            store should be on uav resource.
-INSTR.RESOURCECOORDINATEMISS              coord uninitialized.
-INSTR.RESOURCECOORDINATETOOMANY           out of bound coord must be undef.
-INSTR.RESOURCEKINDFORBUFFERLOADSTORE      buffer load/store only works on Raw/Typed/StructuredBuffer.
-INSTR.RESOURCEKINDFORCALCLOD              lod requires resource declared as texture1D/2D/3D/Cube/CubeArray/1DArray/2DArray.
-INSTR.RESOURCEKINDFORGATHER               gather requires resource declared as texture/2D/Cube/2DArray/CubeArray.
-INSTR.RESOURCEKINDFORGETDIM               Invalid resource kind on GetDimensions.
-INSTR.RESOURCEKINDFORSAMPLE               sample/_l/_d requires resource declared as texture1D/2D/3D/Cube/1DArray/2DArray/CubeArray.
-INSTR.RESOURCEKINDFORSAMPLEC              samplec requires resource declared as texture1D/2D/Cube/1DArray/2DArray/CubeArray.
-INSTR.RESOURCEKINDFORTEXTURELOAD          texture load only works on Texture1D/1DArray/2D/2DArray/3D/MS2D/MS2DArray.
-INSTR.RESOURCEKINDFORTEXTURESTORE         texture store only works on Texture1D/1DArray/2D/2DArray/3D.
-INSTR.RESOURCEKINDFORTRACERAY             TraceRay should only use RTAccelerationStructure.
-INSTR.RESOURCEMAPTOSINGLEENTRY            Fail to map resource to resource table.
-INSTR.RESOURCEOFFSETMISS                  offset uninitialized.
-INSTR.RESOURCEOFFSETTOOMANY               out of bound offset must be undef.
-INSTR.RESOURCEUSER                        Resource should only be used by Load/GEP/Call.
-INSTR.SAMPLECOMPTYPE                      sample_* instructions require resource to be declared to return UNORM, SNORM or FLOAT.
-INSTR.SAMPLEINDEXFORLOAD2DMS              load on Texture2DMS/2DMSArray require sampleIndex.
-INSTR.SAMPLERMODEFORLOD                   lod instruction requires sampler declared in default mode.
-INSTR.SAMPLERMODEFORSAMPLE                sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode.
-INSTR.SAMPLERMODEFORSAMPLEC               sample_c_*/gather_c instructions require sampler declared in comparison mode.
-INSTR.SIGNATUREOPERATIONNOTINENTRY        Dxil operation for input output signature must be in entryPoints.
-INSTR.STATUS                              Resource status should only be used by CheckAccessFullyMapped.
-INSTR.STRUCTBITCAST                       Bitcast on struct types is not allowed.
-INSTR.SVCONFLICTINGLAUNCHMODE             Input system values are compatible with node shader launch mode.
-INSTR.TEXTUREOFFSET                       offset texture instructions must take offset which can resolve to integer literal in the range -8 to 7.
-INSTR.TGSMRACECOND                        Race condition writing to shared memory detected, consider making this write conditional.
-INSTR.UNDEFINEDVALUEFORUAVSTORE           Assignment of undefined values to UAV.
-INSTR.UNDEFRESULTFORGETDIMENSION          GetDimensions used undef dimension %0 on %1.
-INSTR.WRITEMASKFORTYPEDUAVSTORE           store on typed uav must write to all four components of the UAV.
-INSTR.WRITEMASKGAPFORUAV                  UAV write mask must be contiguous, starting at x: .x, .xy, .xyz, or .xyzw.
-INSTR.WRITEMASKMATCHVALUEFORUAVSTORE      uav store write mask must match store value mask, write mask is %0 and store value mask is %1.
-META.BARYCENTRICSFLOAT3                   only 'float3' type is allowed for SV_Barycentrics.
-META.BARYCENTRICSINTERPOLATION            SV_Barycentrics cannot be used with 'nointerpolation' type.
-META.BARYCENTRICSTWOPERSPECTIVES          There can only be up to two input attributes of SV_Barycentrics with different perspective interpolation mode.
-META.BRANCHFLATTEN                        Can't use branch and flatten attributes together.
-META.CLIPCULLMAXCOMPONENTS                Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components
-META.CLIPCULLMAXROWS                      Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows.
-META.COMPUTEWITHNODE                      Compute entry must not have node metadata
-META.CONTROLFLOWHINTNOTONCONTROLFLOW      Control flow hint only works on control flow inst.
-META.DENSERESIDS                          Resource identifiers must be zero-based and dense.
-META.DUPLICATESYSVALUE                    System value may only appear once in signature
-META.ENTRYFUNCTION                        entrypoint not found.
-META.FLAGSUSAGE                           Flags must match usage.
-META.FORCECASEONSWITCH                    Attribute forcecase only works for switch.
-META.GLCNOTONAPPENDCONSUME                globallycoherent cannot be used with append/consume buffers: '%0'.
-META.INTEGERINTERPMODE                    Interpolation mode on integer must be Constant
-META.INTERPMODEINONEROW                   Interpolation mode must be identical for all elements packed into the same row.
-META.INTERPMODEVALID                      Interpolation mode must be valid
-META.INVALIDCONTROLFLOWHINT               Invalid control flow hint.
-META.KNOWN                                Named metadata should be known
-META.MAXTESSFACTOR                        Hull Shader MaxTessFactor must be [%0..%1].  %2 specified.
-META.NOENTRYPROPSFORENTRY                 Entry point %0 must have entry properties.
-META.NOSEMANTICOVERLAP                    Semantics must not overlap
-META.REQUIRED                             Required metadata missing.
-META.SEMAKINDMATCHESNAME                  Semantic name must match system value, when defined.
-META.SEMAKINDVALID                        Semantic kind must be valid
-META.SEMANTICCOMPTYPE                     %0 must be %1.
-META.SEMANTICINDEXMAX                     System value semantics have a maximum valid semantic index
-META.SEMANTICLEN                          Semantic length must be at least 1 and at most 64.
-META.SEMANTICSHOULDBEALLOCATED            Semantic should have a valid packing location
-META.SEMANTICSHOULDNOTBEALLOCATED         Semantic should have a packing location of -1
-META.SIGNATURECOMPTYPE                    signature %0 specifies unrecognized or invalid component type.
-META.SIGNATUREDATAWIDTH                   Data width must be identical for all elements packed into the same row.
-META.SIGNATUREILLEGALCOMPONENTORDER       Component ordering for packed elements must be: arbitrary < system value < system generated value
-META.SIGNATUREINDEXCONFLICT               Only elements with compatible indexing rules may be packed together
-META.SIGNATUREOUTOFRANGE                  Signature elements must fit within maximum signature size
-META.SIGNATUREOVERLAP                     Signature elements may not overlap in packing location.
-META.STRUCTBUFALIGNMENT                   StructuredBuffer stride not aligned
-META.STRUCTBUFALIGNMENTOUTOFBOUND         StructuredBuffer stride out of bounds
-META.SYSTEMVALUEROWS                      System value may only have 1 row
-META.TARGET                               Target triple must be 'dxil-ms-dx'
-META.TESSELLATOROUTPUTPRIMITIVE           Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW.
-META.TESSELLATORPARTITION                 Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even.
-META.TEXTURETYPE                          elements of typed buffers and textures must fit in four 32-bit quantities.
-META.USED                                 All metadata must be used by dxil.
-META.VALIDSAMPLERMODE                     Invalid sampler mode on sampler .
-META.VALUERANGE                           Metadata value must be within range.
-META.VERSIONSUPPORTED                     Version in metadata must be supported.
-META.WELLFORMED                           Metadata must be well-formed in operand count and types.
-SM.64BITRAWBUFFERLOADSTORE                i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3.
-SM.AMPLIFICATIONSHADERPAYLOADSIZE         For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.
-SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.
-SM.APPENDANDCONSUMEONSAMEUAV              BufferUpdateCounter inc and dec on a given UAV (%d) cannot both be in the same shader for shader model less than 5.1.
-SM.CBUFFERARRAYOFFSETALIGNMENT            CBuffer array offset must be aligned to 16-bytes
-SM.CBUFFERELEMENTOVERFLOW                 CBuffer elements must not overflow
-SM.CBUFFEROFFSETOVERLAP                   CBuffer offsets must not overlap
-SM.CBUFFERSIZE                            CBuffer size must not exceed 65536 bytes
-SM.CBUFFERTEMPLATETYPEMUSTBESTRUCT        D3D12 constant/texture buffer template element can only be a struct.
-SM.COMPLETEPOSITION                       Not all elements of SV_Position were written.
-SM.CONSTANTINTERPMODE                     Interpolation mode must be constant for MS primitive output.
-SM.COUNTERONLYONSTRUCTBUF                 BufferUpdateCounter valid only on structured buffers.
-SM.CSNOSIGNATURES                         Compute shaders must not have shader signatures.
-SM.DOMAINLOCATIONIDXOOB                   DomainLocation component index out of bounds for the domain.
-SM.DSINPUTCONTROLPOINTCOUNTRANGE          DS input control point count must be [0..%0].  %1 specified.
-SM.DXILVERSION                            Target shader model requires specific Dxil Version
-SM.GSINSTANCECOUNTRANGE                   GS instance count must be [1..%0].  %1 specified.
-SM.GSOUTPUTVERTEXCOUNTRANGE               GS output vertex count must be [0..%0].  %1 specified.
-SM.GSTOTALOUTPUTVERTEXDATARANGE           Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2.  This value cannot be greater than %3.
-SM.GSVALIDINPUTPRIMITIVE                  GS input primitive unrecognized.
-SM.GSVALIDOUTPUTPRIMITIVETOPOLOGY         GS output primitive topology unrecognized.
-SM.HSINPUTCONTROLPOINTCOUNTRANGE          HS input control point count must be [0..%0].  %1 specified.
-SM.HULLPASSTHRUCONTROLPOINTCOUNTMATCH     For pass thru hull shader, input control point count must match output control point count
-SM.INCOMPATIBLECALLINENTRY                Features used in internal function calls must be compatible with entry
-SM.INCOMPATIBLEDERIVINCOMPUTESHADERMODEL  Derivatives in compute-model shaders require shader model 6.6 and above
-SM.INCOMPATIBLEDERIVLAUNCH                Node shaders only support derivatives in broadcasting launch mode
-SM.INCOMPATIBLEOPERATION                  Operations used in entry function must be compatible with shader stage and other properties
-SM.INCOMPATIBLEREQUIRESGROUP              Functions requiring groupshared memory must be called from shaders with a visible group
-SM.INCOMPATIBLESHADERMODEL                Functions may only use features available in the current shader model
-SM.INCOMPATIBLESTAGE                      Functions may only use features available in the entry function's stage
-SM.INCOMPATIBLETHREADGROUPDIM             When derivatives are used in compute-model shaders, the thread group dimensions must be compatible
-SM.INSIDETESSFACTORSIZEMATCHDOMAIN        InsideTessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
-SM.INVALIDRESOURCECOMPTYPE                Invalid resource return type.
-SM.INVALIDRESOURCEKIND                    Invalid resources kind.
-SM.INVALIDSAMPLERFEEDBACKTYPE             Invalid sampler feedback type.
-SM.INVALIDTEXTUREKINDONUAV                TextureCube[Array] resources are not supported with UAVs.
-SM.ISOLINEOUTPUTPRIMITIVEMISMATCH         Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain.
-SM.MAXMSSMSIZE                            Total Thread Group Shared Memory storage is %0, exceeded %1.
-SM.MAXTGSMSIZE                            Total Thread Group Shared Memory storage is %0, exceeded %1.
-SM.MAXTHEADGROUP                          Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1.
-SM.MESHPSIGROWCOUNT                       For shader '%0', primitive output signatures are taking up more than %1 rows.
-SM.MESHSHADERINOUTSIZE                    For shader '%0', payload plus output size is greater than %1.
-SM.MESHSHADERMAXPRIMITIVECOUNT            MS max primitive output count must be [0..%0].  %1 specified.
-SM.MESHSHADERMAXVERTEXCOUNT               MS max vertex output count must be [0..%0].  %1 specified.
-SM.MESHSHADEROUTPUTSIZE                   For shader '%0', vertex plus primitive output size is greater than %1.
-SM.MESHSHADERPAYLOADSIZE                  For mesh shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.
-SM.MESHSHADERPAYLOADSIZEDECLARED          For mesh shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.
-SM.MESHTOTALSIGROWCOUNT                   For shader '%0', vertex and primitive output signatures are taking up more than %1 rows.
-SM.MESHVSIGROWCOUNT                       For shader '%0', vertex output signatures are taking up more than %1 rows.
-SM.MULTISTREAMMUSTBEPOINT                 When multiple GS output streams are used they must be pointlists
-SM.NAME                                   Target shader model name must be known
-SM.NOINTERPMODE                           Interpolation mode must be undefined for VS input/PS output/patch constant.
-SM.NOPSOUTPUTIDX                          Pixel shader output registers are not indexable.
-SM.OPCODE                                 Opcode must be defined in target shader model
-SM.OPCODEININVALIDFUNCTION                Invalid DXIL opcode usage like StorePatchConstant in patch constant function
-SM.OPERAND                                Operand must be defined in target shader model.
-SM.OUTPUTCONTROLPOINTCOUNTRANGE           output control point count must be [%0..%1].  %2 specified.
-SM.OUTPUTCONTROLPOINTSTOTALSCALARS        Total number of scalars across all HS output control points must not exceed .
-SM.PATCHCONSTANTONLYFORHSDS               patch constant signature only valid in HS and DS.
-SM.PSCONSISTENTINTERP                     Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample).
-SM.PSCOVERAGEANDINNERCOVERAGE             InnerCoverage and Coverage are mutually exclusive.
-SM.PSMULTIPLEDEPTHSEMANTIC                Pixel Shader only allows one type of depth semantic to be declared.
-SM.PSOUTPUTSEMANTIC                       Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found.
-SM.PSTARGETCOL0                           SV_Target packed location must start at column 0.
-SM.PSTARGETINDEXMATCHESROW                SV_Target semantic index must match packed row location.
-SM.RAYSHADERPAYLOADSIZE                   For shader '%0', %1 size is smaller than argument's allocation size.
-SM.RAYSHADERSIGNATURES                    Ray tracing shader '%0' should not have any shader signatures.
-SM.RESOURCERANGEOVERLAP                   Resource ranges must not overlap
-SM.ROVONLYINPS                            RasterizerOrdered objects are only allowed in 5.0+ pixel shaders.
-SM.SAMPLECOUNTONLYON2DMS                  Only Texture2DMS/2DMSArray could has sample count.
-SM.SEMANTIC                               Semantic must be defined in target shader model
-SM.STREAMINDEXRANGE                       Stream index (%0) must between 0 and %1.
-SM.TESSFACTORFORDOMAIN                    Required TessFactor for domain not found declared anywhere in Patch Constant data.
-SM.TESSFACTORSIZEMATCHDOMAIN              TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
-SM.TGSMUNSUPPORTED                        Thread Group Shared Memory not supported %0.
-SM.THREADGROUPCHANNELRANGE                Declared Thread Group %0 size %1 outside valid range [%2..%3].
-SM.TRIOUTPUTPRIMITIVEMISMATCH             Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain.
-SM.UNDEFINEDOUTPUT                        Not all elements of output %0 were written.
-SM.VALIDDOMAIN                            Invalid Tessellator Domain specified. Must be isoline, tri or quad.
-SM.VIEWIDNEEDSSLOT                        ViewID requires compatible space in pixel shader input signature
-SM.WAVESIZEALLZEROWHENUNDEFINED           WaveSize Max and Preferred must be 0 when Min is 0
-SM.WAVESIZEEXPECTSONEPARAM                WaveSize tag expects exactly 1 parameter.
-SM.WAVESIZEMAXANDPREFERREDZEROWHENNORANGE WaveSize Max and Preferred must be 0 to encode min==max
-SM.WAVESIZEMAXGREATERTHANMIN              WaveSize Max must greater than Min
-SM.WAVESIZENEEDSCONSTANTOPERANDS          WaveSize metadata operands must be constant values.
-SM.WAVESIZENEEDSSM66OR67                  WaveSize is valid only for Shader Model 6.6 and 6.7.
-SM.WAVESIZEONCOMPUTEORNODE                WaveSize only allowed on compute or node shaders
-SM.WAVESIZEPREFERREDINRANGE               WaveSize Preferred must be within Min..Max range
-SM.WAVESIZERANGEEXPECTSTHREEPARAMS        WaveSize Range tag expects exactly 3 parameters.
-SM.WAVESIZERANGENEEDSSM68PLUS             WaveSize Range is valid only for Shader Model 6.8 and higher.
-SM.WAVESIZETAGDUPLICATE                   WaveSize or WaveSizeRange tag may only appear once per entry point.
-SM.WAVESIZEVALUE                          WaveSize value must be a power of 2 in range [4..128]
-SM.ZEROHSINPUTCONTROLPOINTWITHINPUT       When HS input control point count is 0, no input signature should exist.
-TYPES.DEFINED                             Type must be defined based on DXIL primitives
-TYPES.I8                                  I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics.
-TYPES.INTWIDTH                            Int type must be of valid width
-TYPES.NOMULTIDIM                          Only one dimension allowed for array type.
-TYPES.NOPTRTOPTR                          Pointers to pointers, or pointers in structures are not allowed.
-TYPES.NOVECTOR                            Vector types must not be present
-========================================= ========================================================================================================================================================================================================================================================================================================
+===================================================== ========================================================================================================================================================================================================================================================================================================
+Rule Code                                             Description
+===================================================== ========================================================================================================================================================================================================================================================================================================
+BITCODE.VALID                                         Module must be bitcode-valid
+CONTAINER.CONTENTINVALID                              DXIL Container Content is well-formed
+CONTAINER.CONTENTMATCHES                              DXIL Container Content must match Module
+CONTAINER.PARTINVALID                                 DXIL Container must not contain unknown parts
+CONTAINER.PARTMATCHES                                 DXIL Container Parts must match Module
+CONTAINER.PARTMISSING                                 DXIL Container requires certain parts, corresponding to module
+CONTAINER.PARTREPEATED                                DXIL Container must have only one of each part type
+CONTAINER.ROOTSIGNATUREINCOMPATIBLE                   Root Signature in DXIL Container must be compatible with shader
+CONTAINER.UNUSEDITEMINTABLE                           Items in Table must be used
+DECL.ALLOCATERAYQUERY2FLAGSARECONST                   constRayFlags and RayQueryFlags for AllocateRayQuery2 must be constant
+DECL.ALLOCATERAYQUERYFLAGSARECONST                    RayFlags for AllocateRayQuery must be constant
+DECL.ALLOWOPACITYMICROMAPSEXPECTEDGIVENFORCEOMM2STATE When the ForceOMM2State ConstRayFlag is given as an argument to a RayQuery object, AllowOpacityMicromaps is expected as a RayQueryFlag argument
+DECL.ATTRSTRUCT                                       Attributes parameter must be struct type
+DECL.DXILFNEXTERN                                     External function must be a DXIL function
+DECL.DXILNSRESERVED                                   The DXIL reserved prefixes must only be used by built-in functions and types
+DECL.EXTRAARGS                                        Extra arguments not allowed for shader functions
+DECL.FNATTRIBUTE                                      Functions should only contain known function attributes
+DECL.FNFLATTENPARAM                                   Function parameters must not use struct types
+DECL.FNISCALLED                                       Functions can only be used by call instructions
+DECL.MULTIPLENODEINPUTS                               A node shader may not have more than one input record
+DECL.NODELAUNCHINPUTTYPE                              Invalid input record type for node launch type
+DECL.NOTUSEDEXTERNAL                                  External declaration should not be used
+DECL.PARAMSTRUCT                                      Callable function parameter must be struct type
+DECL.PAYLOADSTRUCT                                    Payload parameter must be struct type
+DECL.RAYQUERYINFNSIG                                  Rayquery objects not allowed in function signatures
+DECL.RESOURCEINFNSIG                                  Resources not allowed in function signatures
+DECL.SHADERMISSINGARG                                 payload/params/attributes parameter is required for certain shader types
+DECL.SHADERRETURNVOID                                 Shader functions must return void
+DECL.USEDEXTERNALFUNCTION                             External function must be used
+DECL.USEDINTERNAL                                     Internal declaration must be used
+FLOW.DEADLOOP                                         Loop must have break.
+FLOW.FUNCTIONCALL                                     Function with parameter is not permitted
+FLOW.NORECURSION                                      Recursion is not permitted.
+FLOW.REDUCIBLE                                        Execution flow must be reducible.
+INSTR.ALLOWED                                         Instructions must be of an allowed type.
+INSTR.ATOMICCONST                                     Constant destination to atomic.
+INSTR.ATOMICINTRINNONUAV                              Non-UAV destination to atomic intrinsic.
+INSTR.ATOMICOPNONGROUPSHAREDORRECORD                  Non-groupshared or node record destination to atomic operation.
+INSTR.ATTRIBUTEATVERTEXNOINTERPOLATION                Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function.
+INSTR.BARRIERFLAGINVALID                              Invalid %0 flags on DXIL operation '%1'
+INSTR.BARRIERMODEFORNONCS                             sync in a non-Compute/Amplification/Mesh/Node Shader must only sync UAV (sync_uglobal).
+INSTR.BARRIERMODENOMEMORY                             sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory).  Only _t (thread group sync) is optional.
+INSTR.BARRIERMODEUSELESSUGROUP                        sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal.
+INSTR.BARRIERNONCONSTANTFLAGARGUMENT                  Memory type, access, or sync flag is not constant
+INSTR.BARRIERREQUIRESNODE                             sync in a non-Node Shader must not sync node record memory.
+INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER              BufferUpdateCounter valid only when HasCounter is true.
+INSTR.BUFFERUPDATECOUNTERONUAV                        BufferUpdateCounter valid only on UAV.
+INSTR.CALLOLOAD                                       Call to DXIL intrinsic must match overload signature
+INSTR.CANNOTPULLPOSITION                              pull-model evaluation of position disallowed
+INSTR.CBUFFERCLASSFORCBUFFERHANDLE                    Expect Cbuffer for CBufferLoad handle.
+INSTR.CBUFFEROUTOFBOUND                               Cbuffer access out of bound.
+INSTR.CHECKACCESSFULLYMAPPED                          CheckAccessFullyMapped should only be used on resource status.
+INSTR.CONSTALIGNFORRAWBUF                             Raw Buffer alignment value must be a constant.
+INSTR.COORDINATECOUNTFORRAWTYPEDBUF                   raw/typed buffer offset must be undef.
+INSTR.COORDINATECOUNTFORSTRUCTBUF                     structured buffer requires defined index and offset coordinates.
+INSTR.CREATEHANDLEIMMRANGEID                          Local resource must map to global resource.
+INSTR.DXILSTRUCTUSER                                  Dxil struct types should only be used by ExtractValue.
+INSTR.DXILSTRUCTUSEROUTOFBOUND                        Index out of bound when extract value from dxil struct types.
+INSTR.EVALINTERPOLATIONMODE                           Interpolation mode on %0 used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample.
+INSTR.EXTRACTVALUE                                    ExtractValue should only be used on dxil struct types and cmpxchg.
+INSTR.FAILTORESLOVETGSMPOINTER                        TGSM pointers must originate from an unambiguous TGSM global variable.
+INSTR.HANDLENOTFROMCREATEHANDLE                       Resource handle should returned by createHandle.
+INSTR.ILLEGALDXILOPCODE                               DXILOpCode must be [0..%0].  %1 specified.
+INSTR.ILLEGALDXILOPFUNCTION                           '%0' is not a DXILOpFuncition for DXILOpcode '%1'.
+INSTR.IMMBIASFORSAMPLEB                               bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate.
+INSTR.INBOUNDSACCESS                                  Access to out-of-bounds memory is disallowed.
+INSTR.MAYREORDERTHREADUNDEFCOHERENCEHINTPARAM         Use of undef coherence hint or num coherence hint bits in MaybeReorderThread.
+INSTR.MINPRECISIONNOTPRECISE                          Instructions marked precise may not refer to minprecision values.
+INSTR.MINPRECISONBITCAST                              Bitcast on minprecison types is not allowed.
+INSTR.MIPLEVELFORGETDIMENSION                         Use mip level on buffer when GetDimensions.
+INSTR.MIPONUAVLOAD                                    uav load don't support mipLevel/sampleIndex.
+INSTR.MISSINGSETMESHOUTPUTCOUNTS                      Missing SetMeshOutputCounts call.
+INSTR.MULTIPLEGETMESHPAYLOAD                          GetMeshPayload cannot be called multiple times.
+INSTR.MULTIPLESETMESHOUTPUTCOUNTS                     SetMeshOUtputCounts cannot be called multiple times.
+INSTR.NODERECORDHANDLEUSEAFTERCOMPLETE                Invalid use of completed record handle.
+INSTR.NOGENERICPTRADDRSPACECAST                       Address space cast between pointer types must have one part to be generic address space.
+INSTR.NOIDIVBYZERO                                    No signed integer division by zero.
+INSTR.NOINDEFINITEACOS                                No indefinite arccosine.
+INSTR.NOINDEFINITEASIN                                No indefinite arcsine.
+INSTR.NOINDEFINITEDSXY                                No indefinite derivative calculation.
+INSTR.NOINDEFINITELOG                                 No indefinite logarithm.
+INSTR.NONDOMINATINGDISPATCHMESH                       Non-Dominating DispatchMesh call.
+INSTR.NONDOMINATINGSETMESHOUTPUTCOUNTS                Non-Dominating SetMeshOutputCounts call.
+INSTR.NOREADINGUNINITIALIZED                          Instructions should not read uninitialized value.
+INSTR.NOTONCEDISPATCHMESH                             DispatchMesh must be called exactly once in an Amplification shader.
+INSTR.NOUDIVBYZERO                                    No unsigned integer division by zero.
+INSTR.OFFSETONUAVLOAD                                 uav load don't support offset.
+INSTR.OLOAD                                           DXIL intrinsic overload must be valid.
+INSTR.ONLYONEALLOCCONSUME                             RWStructuredBuffers may increment or decrement their counters, but not both.
+INSTR.OPCODERESERVED                                  Instructions must not reference reserved opcodes.
+INSTR.OPCONST                                         DXIL intrinsic requires an immediate constant operand
+INSTR.OPCONSTRANGE                                    Constant values must be in-range for operation.
+INSTR.OPERANDRANGE                                    DXIL intrinsic operand must be within defined range
+INSTR.PTRBITCAST                                      Pointer type bitcast must be have same size.
+INSTR.RESOURCECLASSFORLOAD                            load can only run on UAV/SRV resource.
+INSTR.RESOURCECLASSFORSAMPLERGATHER                   sample, lod and gather should be on srv resource.
+INSTR.RESOURCECLASSFORUAVSTORE                        store should be on uav resource.
+INSTR.RESOURCECOORDINATEMISS                          coord uninitialized.
+INSTR.RESOURCECOORDINATETOOMANY                       out of bound coord must be undef.
+INSTR.RESOURCEKINDFORBUFFERLOADSTORE                  buffer load/store only works on Raw/Typed/StructuredBuffer.
+INSTR.RESOURCEKINDFORCALCLOD                          lod requires resource declared as texture1D/2D/3D/Cube/CubeArray/1DArray/2DArray.
+INSTR.RESOURCEKINDFORGATHER                           gather requires resource declared as texture/2D/Cube/2DArray/CubeArray.
+INSTR.RESOURCEKINDFORGETDIM                           Invalid resource kind on GetDimensions.
+INSTR.RESOURCEKINDFORSAMPLE                           sample/_l/_d requires resource declared as texture1D/2D/3D/Cube/1DArray/2DArray/CubeArray.
+INSTR.RESOURCEKINDFORSAMPLEC                          samplec requires resource declared as texture1D/2D/Cube/1DArray/2DArray/CubeArray.
+INSTR.RESOURCEKINDFORTEXTURELOAD                      texture load only works on Texture1D/1DArray/2D/2DArray/3D/MS2D/MS2DArray.
+INSTR.RESOURCEKINDFORTEXTURESTORE                     texture store only works on Texture1D/1DArray/2D/2DArray/3D.
+INSTR.RESOURCEKINDFORTRACERAY                         TraceRay should only use RTAccelerationStructure.
+INSTR.RESOURCEMAPTOSINGLEENTRY                        Fail to map resource to resource table.
+INSTR.RESOURCEOFFSETMISS                              offset uninitialized.
+INSTR.RESOURCEOFFSETTOOMANY                           out of bound offset must be undef.
+INSTR.RESOURCEUSER                                    Resource should only be used by Load/GEP/Call.
+INSTR.SAMPLECOMPTYPE                                  sample_* instructions require resource to be declared to return UNORM, SNORM or FLOAT.
+INSTR.SAMPLEINDEXFORLOAD2DMS                          load on Texture2DMS/2DMSArray require sampleIndex.
+INSTR.SAMPLERMODEFORLOD                               lod instruction requires sampler declared in default mode.
+INSTR.SAMPLERMODEFORSAMPLE                            sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode.
+INSTR.SAMPLERMODEFORSAMPLEC                           sample_c_*/gather_c instructions require sampler declared in comparison mode.
+INSTR.SIGNATUREOPERATIONNOTINENTRY                    Dxil operation for input output signature must be in entryPoints.
+INSTR.STATUS                                          Resource status should only be used by CheckAccessFullyMapped.
+INSTR.STRUCTBITCAST                                   Bitcast on struct types is not allowed.
+INSTR.SVCONFLICTINGLAUNCHMODE                         Input system values are compatible with node shader launch mode.
+INSTR.TEXTUREOFFSET                                   offset texture instructions must take offset which can resolve to integer literal in the range -8 to 7.
+INSTR.TGSMRACECOND                                    Race condition writing to shared memory detected, consider making this write conditional.
+INSTR.UNDEFHITOBJECT                                  HitObject is undef.
+INSTR.UNDEFINEDVALUEFORUAVSTORE                       Assignment of undefined values to UAV.
+INSTR.UNDEFRESULTFORGETDIMENSION                      GetDimensions used undef dimension %0 on %1.
+INSTR.WRITEMASKFORTYPEDUAVSTORE                       store on typed uav must write to all four components of the UAV.
+INSTR.WRITEMASKGAPFORUAV                              UAV write mask must be contiguous, starting at x: .x, .xy, .xyz, or .xyzw.
+INSTR.WRITEMASKMATCHVALUEFORUAVSTORE                  uav store write mask must match store value mask, write mask is %0 and store value mask is %1.
+META.BARYCENTRICSFLOAT3                               only 'float3' type is allowed for SV_Barycentrics.
+META.BARYCENTRICSINTERPOLATION                        SV_Barycentrics cannot be used with 'nointerpolation' type.
+META.BARYCENTRICSTWOPERSPECTIVES                      There can only be up to two input attributes of SV_Barycentrics with different perspective interpolation mode.
+META.BRANCHFLATTEN                                    Can't use branch and flatten attributes together.
+META.CLIPCULLMAXCOMPONENTS                            Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components
+META.CLIPCULLMAXROWS                                  Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows.
+META.COMPUTEWITHNODE                                  Compute entry must not have node metadata
+META.CONTROLFLOWHINTNOTONCONTROLFLOW                  Control flow hint only works on control flow inst.
+META.DENSERESIDS                                      Resource identifiers must be zero-based and dense.
+META.DUPLICATESYSVALUE                                System value may only appear once in signature
+META.ENTRYFUNCTION                                    entrypoint not found.
+META.FLAGSUSAGE                                       Flags must match usage.
+META.FORCECASEONSWITCH                                Attribute forcecase only works for switch.
+META.GLCNOTONAPPENDCONSUME                            globallycoherent cannot be used with append/consume buffers: '%0'.
+META.INTEGERINTERPMODE                                Interpolation mode on integer must be Constant
+META.INTERPMODEINONEROW                               Interpolation mode must be identical for all elements packed into the same row.
+META.INTERPMODEVALID                                  Interpolation mode must be valid
+META.INVALIDCONTROLFLOWHINT                           Invalid control flow hint.
+META.KNOWN                                            Named metadata should be known
+META.MAXTESSFACTOR                                    Hull Shader MaxTessFactor must be [%0..%1].  %2 specified.
+META.NOENTRYPROPSFORENTRY                             Entry point %0 must have entry properties.
+META.NOSEMANTICOVERLAP                                Semantics must not overlap
+META.REQUIRED                                         Required metadata missing.
+META.SEMAKINDMATCHESNAME                              Semantic name must match system value, when defined.
+META.SEMAKINDVALID                                    Semantic kind must be valid
+META.SEMANTICCOMPTYPE                                 %0 must be %1.
+META.SEMANTICINDEXMAX                                 System value semantics have a maximum valid semantic index
+META.SEMANTICLEN                                      Semantic length must be at least 1 and at most 64.
+META.SEMANTICSHOULDBEALLOCATED                        Semantic should have a valid packing location
+META.SEMANTICSHOULDNOTBEALLOCATED                     Semantic should have a packing location of -1
+META.SIGNATURECOMPTYPE                                signature %0 specifies unrecognized or invalid component type.
+META.SIGNATUREDATAWIDTH                               Data width must be identical for all elements packed into the same row.
+META.SIGNATUREILLEGALCOMPONENTORDER                   Component ordering for packed elements must be: arbitrary < system value < system generated value
+META.SIGNATUREINDEXCONFLICT                           Only elements with compatible indexing rules may be packed together
+META.SIGNATUREOUTOFRANGE                              Signature elements must fit within maximum signature size
+META.SIGNATUREOVERLAP                                 Signature elements may not overlap in packing location.
+META.STRUCTBUFALIGNMENT                               StructuredBuffer stride not aligned
+META.STRUCTBUFALIGNMENTOUTOFBOUND                     StructuredBuffer stride out of bounds
+META.SYSTEMVALUEROWS                                  System value may only have 1 row
+META.TARGET                                           Target triple must be 'dxil-ms-dx'
+META.TESSELLATOROUTPUTPRIMITIVE                       Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW.
+META.TESSELLATORPARTITION                             Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even.
+META.TEXTURETYPE                                      elements of typed buffers and textures must fit in four 32-bit quantities.
+META.USED                                             All metadata must be used by dxil.
+META.VALIDSAMPLERMODE                                 Invalid sampler mode on sampler .
+META.VALUERANGE                                       Metadata value must be within range.
+META.VERSIONSUPPORTED                                 Version in metadata must be supported.
+META.WELLFORMED                                       Metadata must be well-formed in operand count and types.
+SM.64BITRAWBUFFERLOADSTORE                            i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3.
+SM.AMPLIFICATIONSHADERPAYLOADSIZE                     For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.
+SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED             For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.
+SM.APPENDANDCONSUMEONSAMEUAV                          BufferUpdateCounter inc and dec on a given UAV (%d) cannot both be in the same shader for shader model less than 5.1.
+SM.CBUFFERARRAYOFFSETALIGNMENT                        CBuffer array offset must be aligned to 16-bytes
+SM.CBUFFERELEMENTOVERFLOW                             CBuffer elements must not overflow
+SM.CBUFFEROFFSETOVERLAP                               CBuffer offsets must not overlap
+SM.CBUFFERSIZE                                        CBuffer size must not exceed 65536 bytes
+SM.CBUFFERTEMPLATETYPEMUSTBESTRUCT                    D3D12 constant/texture buffer template element can only be a struct.
+SM.COMPLETEPOSITION                                   Not all elements of SV_Position were written.
+SM.CONSTANTINTERPMODE                                 Interpolation mode must be constant for MS primitive output.
+SM.COUNTERONLYONSTRUCTBUF                             BufferUpdateCounter valid only on structured buffers.
+SM.CSNOSIGNATURES                                     Compute shaders must not have shader signatures.
+SM.DOMAINLOCATIONIDXOOB                               DomainLocation component index out of bounds for the domain.
+SM.DSINPUTCONTROLPOINTCOUNTRANGE                      DS input control point count must be [0..%0].  %1 specified.
+SM.DXILVERSION                                        Target shader model requires specific Dxil Version
+SM.GSINSTANCECOUNTRANGE                               GS instance count must be [1..%0].  %1 specified.
+SM.GSOUTPUTVERTEXCOUNTRANGE                           GS output vertex count must be [0..%0].  %1 specified.
+SM.GSTOTALOUTPUTVERTEXDATARANGE                       Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2.  This value cannot be greater than %3.
+SM.GSVALIDINPUTPRIMITIVE                              GS input primitive unrecognized.
+SM.GSVALIDOUTPUTPRIMITIVETOPOLOGY                     GS output primitive topology unrecognized.
+SM.HSINPUTCONTROLPOINTCOUNTRANGE                      HS input control point count must be [0..%0].  %1 specified.
+SM.HULLPASSTHRUCONTROLPOINTCOUNTMATCH                 For pass thru hull shader, input control point count must match output control point count
+SM.INCOMPATIBLECALLINENTRY                            Features used in internal function calls must be compatible with entry
+SM.INCOMPATIBLEDERIVINCOMPUTESHADERMODEL              Derivatives in compute-model shaders require shader model 6.6 and above
+SM.INCOMPATIBLEDERIVLAUNCH                            Node shaders only support derivatives in broadcasting launch mode
+SM.INCOMPATIBLEOPERATION                              Operations used in entry function must be compatible with shader stage and other properties
+SM.INCOMPATIBLEREQUIRESGROUP                          Functions requiring groupshared memory must be called from shaders with a visible group
+SM.INCOMPATIBLESHADERMODEL                            Functions may only use features available in the current shader model
+SM.INCOMPATIBLESTAGE                                  Functions may only use features available in the entry function's stage
+SM.INCOMPATIBLETHREADGROUPDIM                         When derivatives are used in compute-model shaders, the thread group dimensions must be compatible
+SM.INSIDETESSFACTORSIZEMATCHDOMAIN                    InsideTessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
+SM.INVALIDRESOURCECOMPTYPE                            Invalid resource return type.
+SM.INVALIDRESOURCEKIND                                Invalid resources kind.
+SM.INVALIDSAMPLERFEEDBACKTYPE                         Invalid sampler feedback type.
+SM.INVALIDTEXTUREKINDONUAV                            TextureCube[Array] resources are not supported with UAVs.
+SM.ISOLINEOUTPUTPRIMITIVEMISMATCH                     Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain.
+SM.MAXMSSMSIZE                                        Total Thread Group Shared Memory storage is %0, exceeded %1.
+SM.MAXTGSMSIZE                                        Total Thread Group Shared Memory storage is %0, exceeded %1.
+SM.MAXTHEADGROUP                                      Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1.
+SM.MESHPSIGROWCOUNT                                   For shader '%0', primitive output signatures are taking up more than %1 rows.
+SM.MESHSHADERINOUTSIZE                                For shader '%0', payload plus output size is greater than %1.
+SM.MESHSHADERMAXPRIMITIVECOUNT                        MS max primitive output count must be [0..%0].  %1 specified.
+SM.MESHSHADERMAXVERTEXCOUNT                           MS max vertex output count must be [0..%0].  %1 specified.
+SM.MESHSHADEROUTPUTSIZE                               For shader '%0', vertex plus primitive output size is greater than %1.
+SM.MESHSHADERPAYLOADSIZE                              For mesh shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.
+SM.MESHSHADERPAYLOADSIZEDECLARED                      For mesh shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.
+SM.MESHTOTALSIGROWCOUNT                               For shader '%0', vertex and primitive output signatures are taking up more than %1 rows.
+SM.MESHVSIGROWCOUNT                                   For shader '%0', vertex output signatures are taking up more than %1 rows.
+SM.MULTISTREAMMUSTBEPOINT                             When multiple GS output streams are used they must be pointlists
+SM.NAME                                               Target shader model name must be known
+SM.NOINTERPMODE                                       Interpolation mode must be undefined for VS input/PS output/patch constant.
+SM.NOPSOUTPUTIDX                                      Pixel shader output registers are not indexable.
+SM.OPCODE                                             Opcode must be defined in target shader model
+SM.OPCODEININVALIDFUNCTION                            Invalid DXIL opcode usage like StorePatchConstant in patch constant function
+SM.OPERAND                                            Operand must be defined in target shader model.
+SM.OUTPUTCONTROLPOINTCOUNTRANGE                       output control point count must be [%0..%1].  %2 specified.
+SM.OUTPUTCONTROLPOINTSTOTALSCALARS                    Total number of scalars across all HS output control points must not exceed .
+SM.PATCHCONSTANTONLYFORHSDS                           patch constant signature only valid in HS and DS.
+SM.PROGRAMVERSION                                     Program Version in Dxil Container does not match Dxil Module shader model version
+SM.PSCONSISTENTINTERP                                 Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample).
+SM.PSCOVERAGEANDINNERCOVERAGE                         InnerCoverage and Coverage are mutually exclusive.
+SM.PSMULTIPLEDEPTHSEMANTIC                            Pixel Shader only allows one type of depth semantic to be declared.
+SM.PSOUTPUTSEMANTIC                                   Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found.
+SM.PSTARGETCOL0                                       SV_Target packed location must start at column 0.
+SM.PSTARGETINDEXMATCHESROW                            SV_Target semantic index must match packed row location.
+SM.RAYSHADERPAYLOADSIZE                               For shader '%0', %1 size is smaller than argument's allocation size.
+SM.RAYSHADERSIGNATURES                                Ray tracing shader '%0' should not have any shader signatures.
+SM.RESOURCERANGEOVERLAP                               Resource ranges must not overlap
+SM.ROVONLYINPS                                        RasterizerOrdered objects are only allowed in 5.0+ pixel shaders.
+SM.SAMPLECOUNTONLYON2DMS                              Only Texture2DMS/2DMSArray could has sample count.
+SM.SEMANTIC                                           Semantic must be defined in target shader model
+SM.STREAMINDEXRANGE                                   Stream index (%0) must between 0 and %1.
+SM.TESSFACTORFORDOMAIN                                Required TessFactor for domain not found declared anywhere in Patch Constant data.
+SM.TESSFACTORSIZEMATCHDOMAIN                          TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
+SM.TGSMUNSUPPORTED                                    Thread Group Shared Memory not supported %0.
+SM.THREADGROUPCHANNELRANGE                            Declared Thread Group %0 size %1 outside valid range [%2..%3].
+SM.TRIOUTPUTPRIMITIVEMISMATCH                         Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain.
+SM.UNDEFINEDOUTPUT                                    Not all elements of output %0 were written.
+SM.VALIDDOMAIN                                        Invalid Tessellator Domain specified. Must be isoline, tri or quad.
+SM.VIEWIDNEEDSSLOT                                    ViewID requires compatible space in pixel shader input signature
+SM.WAVESIZEALLZEROWHENUNDEFINED                       WaveSize Max and Preferred must be 0 when Min is 0
+SM.WAVESIZEEXPECTSONEPARAM                            WaveSize tag expects exactly 1 parameter.
+SM.WAVESIZEMAXANDPREFERREDZEROWHENNORANGE             WaveSize Max and Preferred must be 0 to encode min==max
+SM.WAVESIZEMAXGREATERTHANMIN                          WaveSize Max must greater than Min
+SM.WAVESIZENEEDSCONSTANTOPERANDS                      WaveSize metadata operands must be constant values.
+SM.WAVESIZENEEDSSM66OR67                              WaveSize is valid only for Shader Model 6.6 and 6.7.
+SM.WAVESIZEONCOMPUTEORNODE                            WaveSize only allowed on compute or node shaders
+SM.WAVESIZEPREFERREDINRANGE                           WaveSize Preferred must be within Min..Max range
+SM.WAVESIZERANGEEXPECTSTHREEPARAMS                    WaveSize Range tag expects exactly 3 parameters.
+SM.WAVESIZERANGENEEDSSM68PLUS                         WaveSize Range is valid only for Shader Model 6.8 and higher.
+SM.WAVESIZETAGDUPLICATE                               WaveSize or WaveSizeRange tag may only appear once per entry point.
+SM.WAVESIZEVALUE                                      WaveSize value must be a power of 2 in range [4..128]
+SM.ZEROHSINPUTCONTROLPOINTWITHINPUT                   When HS input control point count is 0, no input signature should exist.
+TYPES.DEFINED                                         Type must be defined based on DXIL primitives
+TYPES.I8                                              I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics.
+TYPES.INTWIDTH                                        Int type must be of valid width
+TYPES.NOMULTIDIM                                      Only one dimension allowed for array type.
+TYPES.NOPTRTOPTR                                      Pointers to pointers, or pointers in structures are not allowed.
+TYPES.NOVECTOR                                        Vector types must not be present
+===================================================== ========================================================================================================================================================================================================================================================================================================
 
 .. VALRULES-RST:END
 
@@ -3294,9 +3354,9 @@ Modules and Linking
 ===================
 
 HLSL has linking capabilities to enable third-party libraries. The linking step happens before shader DXIL is given to the driver compilers.
-Experimental library generation is added in DXIL1.1. A library could be created by compile with lib_6_1 profile.
-A library is a dxil container like the compile result of other shader profiles. The difference is library will keep information for linking like resource link info and entry function signatures.
-Library support is not part of DXIL spec. Only requirement is linked shader must be valid DXIL.
+Experimental library generation is added in DXIL1.1. A library could be created by compiling with the lib_6_1 profile.
+A library is a dxil container like the compile result of other shader profiles. The difference is a library will keep information for linking like resource link info and entry function signatures.
+Library support is not part of the DXIL spec. The only requirement is that the linked shader must be valid DXIL.
 
 
 Additional Notes
diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst
index c30286e4e6..b5e9c05079 100644
--- a/docs/SPIR-V.rst
+++ b/docs/SPIR-V.rst
@@ -282,7 +282,7 @@ Right now the following ``<builtin>`` are supported:
   Need ``SPV_KHR_device_group`` extension.
 * ``ViewportMaskNV``: The GLSL equivalent is ``gl_ViewportMask``.
 
-Please see Vulkan spec. `14.6. Built-In Variables <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#interfaces-builtin-variables>`_
+Please see Vulkan spec. `15.9. Built-In Variables <https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#interfaces-builtin-variables>`_
 for detailed explanation of these builtins.
 
 Supported extensions
@@ -312,13 +312,15 @@ Supported extensions
 * SPV_NV_mesh_shader
 * SPV_KHR_ray_query
 * SPV_EXT_shader_image_int64
-* SPV_KHR_fragment_shading_barycentric
+* SPV_KHR_fragment_shader_barycentric
 * SPV_KHR_physical_storage_buffer
 * SPV_KHR_vulkan_memory_model
+* SPV_KHR_compute_shader_derivatives
 * SPV_NV_compute_shader_derivatives
 * SPV_KHR_maximal_reconvergence
 * SPV_KHR_float_controls
 * SPV_NV_shader_subgroup_partitioned
+* SPV_KHR_quad_control
 
 Vulkan specific attributes
 --------------------------
@@ -446,7 +448,7 @@ environment (hence SPIR-V version) and SPIR-V extension control:
 ``-fspv-target-env=`` accepts a Vulkan target environment (see ``-help`` for
 supported values). If such an option is not given, the CodeGen defaults to
 ``vulkan1.0``. When targeting ``vulkan1.0``, trying to use features that are only
-available in Vulkan 1.1 (SPIR-V 1.3), like `Shader Model 6.0 wave intrinsics`_,
+available in Vulkan 1.1 (SPIR-V 1.3), like `Shader Model 6.0 wave intrinsic <https://github.com/microsoft/directxshadercompiler/wiki/wave-intrinsics>`_,
 will trigger a compiler error.
 
 If ``-fspv-extension=`` is not specified, the CodeGen will select suitable
@@ -494,7 +496,7 @@ Specifically, we need to legalize the following HLSL source code patterns:
 Legalization transformations will not run unless the above patterns are
 encountered in the source code.
 
-For more details, please see the `SPIR-V cookbook <https://github.com/Microsoft/DirectXShaderCompiler/tree/master/docs/SPIRV-Cookbook.rst>`_,
+For more details, please see the `SPIR-V cookbook <https://github.com/Microsoft/DirectXShaderCompiler/tree/main/docs/SPIRV-Cookbook.rst>`_,
 which contains examples of what HLSL code patterns will be accepted and
 generate valid SPIR-V for Vulkan.
 
@@ -561,7 +563,7 @@ So if you want to run loop unrolling additionally after the default optimization
 recipe, you can specify ``-Oconfig=-O,--loop-unroll``.
 
 For the whole list of accepted passes and details about each one, please see
-``spirv-opt``'s help manual (``spirv-opt --help``), or the SPIRV-Tools `optimizer header file <https://github.com/KhronosGroup/SPIRV-Tools/blob/master/include/spirv-tools/optimizer.hpp>`_.
+``spirv-opt``'s help manual (``spirv-opt --help``), or the SPIRV-Tools `optimizer header file <https://github.com/KhronosGroup/SPIRV-Tools/blob/main/include/spirv-tools/optimizer.hpp>`_.
 
 Validation
 ~~~~~~~~~~
@@ -640,7 +642,7 @@ HLSL Semantic
 
 HLSL semantic strings are by default not emitted into the SPIR-V binary module.
 If you need them, by specifying ``-fspv-reflect``, the compiler will use
-the ``Op*DecorateStringGOOGLE`` instruction in `SPV_GOOGLE_hlsl_funtionality1 <https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/GOOGLE/SPV_GOOGLE_hlsl_functionality1.asciidoc>`_
+the ``Op*DecorateStringGOOGLE`` instruction in `SPV_GOOGLE_hlsl_funtionality1 <https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/GOOGLE/SPV_GOOGLE_hlsl_functionality1.asciidoc>`_
 extension to emit them.
 
 HLSL User Types
@@ -661,7 +663,7 @@ Counter buffers for RW/Append/Consume StructuredBuffer
 The association between a counter buffer and its main RW/Append/Consume
 StructuredBuffer is conveyed by ``OpDecorateId <structured-buffer-id>
 HLSLCounterBufferGOOGLE <counter-buffer-id>`` instruction from the
-`SPV_GOOGLE_hlsl_funtionality1 <https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/GOOGLE/SPV_GOOGLE_hlsl_functionality1.asciidoc>`_
+`SPV_GOOGLE_hlsl_funtionality1 <https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/GOOGLE/SPV_GOOGLE_hlsl_functionality1.asciidoc>`_
 extension. This information is by default missing; you need to specify
 ``-fspv-reflect`` to direct the compiler to emit them.
 
@@ -911,7 +913,7 @@ For example,
 
   RWTexture2D<float2> Tex2; // Works like before
 
-``rgba8`` means ``Rgba8`` `SPIR-V Image Format <https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_image_format_a_image_format>`_.
+``rgba8`` means ``Rgba8`` `SPIR-V Image Format <https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#Image_Format>`_.
 The following table lists the mapping between ``FORMAT`` of
 ``[[vk::image_format("FORMAT")]]`` and its corresponding SPIR-V Image Format.
 
@@ -994,7 +996,7 @@ Please see the following sections for the details of each type. As a summary:
 =========================== ================== ================================ ==================== =================
 
 To know more about the Vulkan buffer types, please refer to the Vulkan spec
-`13.1 Descriptor Types <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#descriptorsets-types>`_.
+`14.1 Descriptor Types <https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#descriptorsets-types>`_.
 
 Memory layout rules
 ~~~~~~~~~~~~~~~~~~~
@@ -1004,7 +1006,7 @@ right now:
 
 1. Vector-relaxed OpenGL ``std140`` for uniform buffers and vector-relaxed
    OpenGL ``std430`` for storage buffers: these rules satisfy Vulkan `"Standard
-   Uniform Buffer Layout" and "Standard Storage Buffer Layout" <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#interfaces-resources-layout>`_,
+   Uniform Buffer Layout" and "Standard Storage Buffer Layout" <https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#interfaces-resources-layout>`_,
    respectively.
    They are the default.
 2. DirectX memory layout rules for uniform buffers and storage buffers:
@@ -1027,7 +1029,7 @@ In the above, "vector-relaxed OpenGL ``std140``/``std430``" rules mean OpenGL
 alignment:
 
 1. The alignment of a vector type is set to be the alignment of its element type
-2. If the above causes an `improper straddle <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#interfaces-resources-layout>`_,
+2. If the above causes an `improper straddle <https://registry.khronos.org/vulkan/specs/latest/html/vkspec.html#interfaces-resources-layout>`_,
    the alignment will be set to 16 bytes.
 
 As an exmaple, for the following HLSL definition:
@@ -1471,7 +1473,7 @@ Without hints from the developer, the compiler will try its best to map
 semantics to ``Location`` numbers. However, there is no single rule for this
 mapping; semantic strings should be handled case by case.
 
-Firstly, under certain `SigPoints <https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#hlsl-signatures-and-semantics>`_,
+Firstly, under certain `SigPoints <https://github.com/Microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#hlsl-signatures-and-semantics>`_,
 some system-value (SV) semantic strings will be translated into SPIR-V
 ``BuiltIn`` decorations:
 
@@ -1655,7 +1657,7 @@ some system-value (SV) semantic strings will be translated into SPIR-V
 |                           +-------------+----------------------------------------+-----------------------+-----------------------------+
 |                           | MSOut       | ``PrimitiveShadingRateKHR``            | N/A                   | ``FragmentShadingRate``     |
 +---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
-| SV_CullPrimitive          | MSOut       | ``CullPrimitiveEXT``                   | N/A                   | ``MeshShadingEXT ``         |
+| SV_CullPrimitive          | MSOut       | ``CullPrimitiveEXT``                   | N/A                   | ``MeshShadingEXT``          |
 +---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
 
 
@@ -3596,8 +3598,8 @@ Mesh and Amplification Shaders
 | Amplification shaders corresponds to Task Shaders in Vulkan.
 |
 | Refer to following HLSL and SPIR-V specs for details:
-| https://docs.microsoft.com/<TBD>
-| https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/NV/SPV_NV_mesh_shader.asciidoc
+| https://microsoft.github.io/DirectX-Specs/d3d/MeshShader.html
+| https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_mesh_shader.asciidoc
 |
 | This section describes how Mesh and Amplification shaders are translated to SPIR-V for Vulkan.
 
@@ -3704,8 +3706,8 @@ Raytracing in Vulkan and SPIRV
 | SPIR-V codegen is currently supported for NVIDIA platforms via SPV_NV_ray_tracing extension or
 | on other platforms via provisional cross vendor SPV_KHR_ray_tracing extension.
 | SPIR-V specification for reference:
-| https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/NV/SPV_NV_ray_tracing.asciidoc
-| https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/KHR/SPV_KHR_ray_tracing.asciidoc
+| https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_ray_tracing.asciidoc
+| https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/KHR/SPV_KHR_ray_tracing.asciidoc
 
 | Vulkan ray tracing samples:
 | https://developer.nvidia.com/rtx/raytracing/vkray
@@ -3868,7 +3870,7 @@ Ray Query in SPIRV
 ~~~~~~~~~~~~~~~~~~
 RayQuery SPIR-V codegen is currently supported via SPV_KHR_ray_query extension
 SPIR-V specification for reference:
-https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/KHR/SPV_KHR_ray_query.asciidoc
+https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/KHR/SPV_KHR_ray_query.asciidoc
 
 Object Type
 ~~~~~~~~~~~
@@ -4007,6 +4009,8 @@ Quad          ``QuadReadAcrossX()``        ``OpGroupNonUniformQuadSwap``
 Quad          ``QuadReadAcrossY()``        ``OpGroupNonUniformQuadSwap``
 Quad          ``QuadReadAcrossDiagonal()`` ``OpGroupNonUniformQuadSwap``
 Quad          ``QuadReadLaneAt()``         ``OpGroupNonUniformQuadBroadcast``
+Quad          ``QuadAny()``                ``OpGroupNonUniformQuadAnyKHR``
+Quad          ``QuadAll()``                ``OpGroupNonUniformQuadAllKHR``
 N/A           ``WaveMatch()``              ``OpGroupNonUniformPartitionNV``
 Multiprefix   ``WaveMultiPrefixSum()``     ``OpGroupNonUniform*Add``           ``PartitionedExclusiveScanNV``
 Multiprefix   ``WaveMultiPrefixProduct()`` ``OpGroupNonUniform*Mul``           ``PartitionedExclusiveScanNV``
@@ -4015,6 +4019,11 @@ Multiprefix   ``WaveMultiPrefixBitOr()``   ``OpGroupNonUniformLogicalOr``      `
 Multiprefix   ``WaveMultiPrefixBitXor()``  ``OpGroupNonUniformLogicalXor``     ``PartitionedExclusiveScanNV``
 ============= ============================ =================================== ==============================
 
+``QuadAny`` and ``QuadAll`` will use the ``OpGroupNonUniformQuadAnyKHR`` and
+``OpGroupNonUniformQuadAllKHR`` instructions if the ``SPV_KHR_quad_control``
+extension is enabled. If it is not, they will fall back to constructing the
+value using multiple calls to ``OpGroupNonUniformQuadBroadcast``.
+
 The Implicit ``vk`` Namespace
 =============================
 
@@ -4081,7 +4090,7 @@ This intrinsic funcion has the following signature:
 
   uint64_t ReadClock(in uint scope);
 
-It translates to performing ``OpReadClockKHR`` defined in `VK_KHR_shader_clock <https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_KHR_shader_clock.html>`_.
+It translates to performing ``OpReadClockKHR`` defined in `VK_KHR_shader_clock <https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_clock.html>`_.
 One can use the predefined scopes in the ``vk`` namepsace to specify the scope argument.
 For example:
 
@@ -4091,11 +4100,11 @@ For example:
 
 RawBufferLoad and RawBufferStore
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The Vulkan extension `VK_KHR_buffer_device_address <https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_KHR_buffer_device_address.html>`_
+The Vulkan extension `VK_KHR_buffer_device_address <https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_buffer_device_address.html>`_
 supports getting the 64-bit address of a buffer and passing it to SPIR-V as a
 Uniform buffer. SPIR-V can use the address to load and store data without a descriptor.
 We add the following intrinsic functions to expose a subset of the
-`VK_KHR_buffer_device_address <https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_KHR_buffer_device_address.html>`_
+`VK_KHR_buffer_device_address <https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_buffer_device_address.html>`_
 and `SPV_KHR_physical_storage_buffer <https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/KHR/SPV_KHR_physical_storage_buffer.asciidoc>`_
 functionality to HLSL:
 
diff --git a/external/SPIRV-Headers b/external/SPIRV-Headers
index 54a521dd13..0e71067798 160000
--- a/external/SPIRV-Headers
+++ b/external/SPIRV-Headers
@@ -1 +1 @@
-Subproject commit 54a521dd130ae1b2f38fef79b09515702d135bdd
+Subproject commit 0e710677989b4326ac974fd80c5308191ed80965
diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools
index f289d047f4..4bd1536ed7 160000
--- a/external/SPIRV-Tools
+++ b/external/SPIRV-Tools
@@ -1 +1 @@
-Subproject commit f289d047f49fb60488301ec62bafab85573668cc
+Subproject commit 4bd1536ed79003a5194a4bd8c9aa2fa17a84c15b
diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
index f8d5b740f7..8c73328fbd 100644
--- a/include/dxc/DXIL/DxilConstants.h
+++ b/include/dxc/DXIL/DxilConstants.h
@@ -147,12 +147,19 @@ const unsigned kMaxMSTotalSigRows = 32;
 const unsigned kMaxMSSMSize = 1024 * 28;
 const unsigned kMinWaveSize = 4;
 const unsigned kMaxWaveSize = 128;
+const unsigned kDefaultMaxVectorLength = 4;
+const unsigned kSM69MaxVectorLength = 1024;
 
 const float kMaxMipLodBias = 15.99f;
 const float kMinMipLodBias = -16.0f;
 
 const unsigned kResRetStatusIndex = 4;
 
+/* <py::lines('OLOAD_DIMS-TEXT')>hctdb_instrhelp.get_max_oload_dims()</py>*/
+// OLOAD_DIMS-TEXT:BEGIN
+const unsigned kDxilMaxOloadDims = 2;
+// OLOAD_DIMS-TEXT:END
+
 enum class ComponentType : uint32_t {
   Invalid = 0,
   I1,
@@ -463,6 +470,11 @@ inline bool IsTBuffer(DXIL::ResourceKind ResourceKind) {
   return ResourceKind == DXIL::ResourceKind::TBuffer;
 }
 
+inline bool IsCTBuffer(DXIL::ResourceKind ResourceKind) {
+  return ResourceKind == DXIL::ResourceKind::CBuffer ||
+         ResourceKind == DXIL::ResourceKind::TBuffer;
+}
+
 /// Whether the resource kind is a FeedbackTexture.
 inline bool IsFeedbackTexture(DXIL::ResourceKind ResourceKind) {
   return ResourceKind == DXIL::ResourceKind::FeedbackTexture2D ||
@@ -490,37 +502,9 @@ enum class OpCode : unsigned {
   ReservedA0 = 259,  // reserved
   ReservedA1 = 260,  // reserved
   ReservedA2 = 261,  // reserved
-  ReservedB0 = 262,  // reserved
-  ReservedB1 = 263,  // reserved
-  ReservedB10 = 272, // reserved
-  ReservedB11 = 273, // reserved
-  ReservedB12 = 274, // reserved
-  ReservedB13 = 275, // reserved
-  ReservedB14 = 276, // reserved
-  ReservedB15 = 277, // reserved
-  ReservedB16 = 278, // reserved
-  ReservedB17 = 279, // reserved
-  ReservedB18 = 280, // reserved
-  ReservedB19 = 281, // reserved
-  ReservedB2 = 264,  // reserved
-  ReservedB20 = 282, // reserved
-  ReservedB21 = 283, // reserved
-  ReservedB22 = 284, // reserved
-  ReservedB23 = 285, // reserved
-  ReservedB24 = 286, // reserved
-  ReservedB25 = 287, // reserved
-  ReservedB26 = 288, // reserved
-  ReservedB27 = 289, // reserved
   ReservedB28 = 290, // reserved
   ReservedB29 = 291, // reserved
-  ReservedB3 = 265,  // reserved
   ReservedB30 = 292, // reserved
-  ReservedB4 = 266,  // reserved
-  ReservedB5 = 267,  // reserved
-  ReservedB6 = 268,  // reserved
-  ReservedB7 = 269,  // reserved
-  ReservedB8 = 270,  // reserved
-  ReservedB9 = 271,  // reserved
   ReservedC0 = 293,  // reserved
   ReservedC1 = 294,  // reserved
   ReservedC2 = 295,  // reserved
@@ -888,8 +872,11 @@ enum class OpCode : unsigned {
   GetDimensions = 72,   // gets texture size information
   RawBufferLoad = 139,  // reads from a raw buffer and structured buffer
   RawBufferStore = 140, // writes to a RWByteAddressBuffer or RWStructuredBuffer
-  TextureLoad = 66,     // reads texel data without any filtering or sampling
-  TextureStore = 67,    // reads texel data without any filtering or sampling
+  RawBufferVectorLoad = 303, // reads from a raw buffer and structured buffer
+  RawBufferVectorStore =
+      304,           // writes to a RWByteAddressBuffer or RWStructuredBuffer
+  TextureLoad = 66,  // reads texel data without any filtering or sampling
+  TextureStore = 67, // reads texel data without any filtering or sampling
   TextureStoreSample = 225, // stores texel data at specified sample index
 
   // Sampler Feedback
@@ -902,6 +889,49 @@ enum class OpCode : unsigned {
   WriteSamplerFeedbackLevel = 176, // updates a feedback texture for a sampling
                                    // operation with a mipmap-level offset
 
+  // Shader Execution Reordering
+  HitObject_Attributes = 289,   // Returns the attributes set for this HitObject
+  HitObject_FromRayQuery = 263, // Creates a new HitObject representing a
+                                // committed hit from a RayQuery
+  HitObject_FromRayQueryWithAttrs =
+      264, // Creates a new HitObject representing a committed hit from a
+           // RayQuery and committed attributes
+  HitObject_GeometryIndex = 281, // Returns the geometry index committed on hit
+  HitObject_HitKind = 285,       // Returns the HitKind of the hit
+  HitObject_InstanceID = 283,    // Returns the instance id committed on hit
+  HitObject_InstanceIndex = 282, // Returns the instance index committed on hit
+  HitObject_Invoke = 267, // Represents the invocation of the CH/MS shader
+                          // represented by the HitObject
+  HitObject_IsHit = 270,  // Returns `true` if the HitObject is a NOP-HitObject
+  HitObject_IsMiss = 269, // Returns `true` if the HitObject represents a miss
+  HitObject_IsNop = 271,  // Returns `true` if the HitObject represents a nop
+  HitObject_LoadLocalRootTableConstant =
+      288, // Returns the root table constant for this HitObject and offset
+  HitObject_MakeMiss = 265, // Creates a new HitObject representing a miss
+  HitObject_MakeNop = 266,  // Creates an empty nop HitObject
+  HitObject_ObjectRayDirection =
+      278,                          // Returns the ray direction in object space
+  HitObject_ObjectRayOrigin = 277,  // Returns the ray origin in object space
+  HitObject_ObjectToWorld3x4 = 279, // Returns the object to world space
+                                    // transformation matrix in 3x4 form
+  HitObject_PrimitiveIndex =
+      284,                  // Returns the primitive index committed on hit
+  HitObject_RayFlags = 272, // Returns the ray flags set in the HitObject
+  HitObject_RayTCurrent =
+      274,                 // Returns the current T value set in the HitObject
+  HitObject_RayTMin = 273, // Returns the TMin value set in the HitObject
+  HitObject_SetShaderTableIndex =
+      287, // Returns a HitObject with updated shader table index
+  HitObject_ShaderTableIndex =
+      286, // Returns the shader table index set for this HitObject
+  HitObject_TraceRay = 262, // Analogous to TraceRay but without invoking CH/MS
+                            // and returns the intermediate state as a HitObject
+  HitObject_WorldRayDirection = 276, // Returns the ray direction in world space
+  HitObject_WorldRayOrigin = 275,    // Returns the ray origin in world space
+  HitObject_WorldToObject3x4 = 280,  // Returns the world to object space
+                                     // transformation matrix in 3x4 form
+  MaybeReorderThread = 268,          // Reorders the current thread
+
   // Synchronization
   AtomicBinOp = 78,           // performs an atomic operation on two operands
   AtomicCompareExchange = 79, // atomic compare and exchange to memory
@@ -1030,7 +1060,7 @@ enum class OpCode : unsigned {
   NumOpCodes_Dxil_1_7 = 226,
   NumOpCodes_Dxil_1_8 = 258,
 
-  NumOpCodes = 303 // exclusive last value of enumeration
+  NumOpCodes = 305 // exclusive last value of enumeration
 };
 // OPCODE-ENUM:END
 
@@ -1264,6 +1294,8 @@ enum class OpCodeClass : unsigned {
   GetDimensions,
   RawBufferLoad,
   RawBufferStore,
+  RawBufferVectorLoad,
+  RawBufferVectorStore,
   TextureLoad,
   TextureStore,
   TextureStoreSample,
@@ -1274,6 +1306,21 @@ enum class OpCodeClass : unsigned {
   WriteSamplerFeedbackGrad,
   WriteSamplerFeedbackLevel,
 
+  // Shader Execution Reordering
+  HitObject_Attributes,
+  HitObject_FromRayQuery,
+  HitObject_FromRayQueryWithAttrs,
+  HitObject_Invoke,
+  HitObject_LoadLocalRootTableConstant,
+  HitObject_MakeMiss,
+  HitObject_MakeNop,
+  HitObject_SetShaderTableIndex,
+  HitObject_StateMatrix,
+  HitObject_StateScalar,
+  HitObject_StateVector,
+  HitObject_TraceRay,
+  MaybeReorderThread,
+
   // Synchronization
   AtomicBinOp,
   AtomicCompareExchange,
@@ -1338,7 +1385,7 @@ enum class OpCodeClass : unsigned {
   NumOpClasses_Dxil_1_7 = 153,
   NumOpClasses_Dxil_1_8 = 174,
 
-  NumOpClasses = 175 // exclusive last value of enumeration
+  NumOpClasses = 190 // exclusive last value of enumeration
 };
 // OPCODECLASS-ENUM:END
 
@@ -1397,6 +1444,12 @@ const unsigned kRawBufferLoadElementOffsetOpIdx = 3;
 const unsigned kRawBufferLoadMaskOpIdx = 4;
 const unsigned kRawBufferLoadAlignmentOpIdx = 5;
 
+// RawBufferVectorLoad.
+const unsigned kRawBufferVectorLoadHandleOpIdx = 1;
+const unsigned kRawBufferVectorLoadIndexOpIdx = 2;
+const unsigned kRawBufferVectorLoadElementOffsetOpIdx = 3;
+const unsigned kRawBufferVectorLoadAlignmentOpIdx = 4;
+
 // RawBufferStore
 const unsigned kRawBufferStoreHandleOpIdx = 1;
 const unsigned kRawBufferStoreIndexOpIdx = 2;
@@ -1406,7 +1459,14 @@ const unsigned kRawBufferStoreVal1OpIdx = 5;
 const unsigned kRawBufferStoreVal2OpIdx = 6;
 const unsigned kRawBufferStoreVal3OpIdx = 7;
 const unsigned kRawBufferStoreMaskOpIdx = 8;
-const unsigned kRawBufferStoreAlignmentOpIdx = 8;
+const unsigned kRawBufferStoreAlignmentOpIdx = 9;
+
+// RawBufferVectorStore
+const unsigned kRawBufferVectorStoreHandleOpIdx = 1;
+const unsigned kRawBufferVectorStoreIndexOpIdx = 2;
+const unsigned kRawBufferVectorStoreElementOffsetOpIdx = 3;
+const unsigned kRawBufferVectorStoreValOpIdx = 4;
+const unsigned kRawBufferVectorStoreAlignmentOpIdx = 5;
 
 // TextureStore.
 const unsigned kTextureStoreHandleOpIdx = 1;
@@ -1820,7 +1880,7 @@ enum class RayFlag : uint32_t {
   CullNonOpaque = 0x80,
   SkipTriangles = 0x100,
   SkipProceduralPrimitives = 0x200,
-  ForceOMM2State = 0x400, // Force 2-state in Opacity Micromaps
+  ForceOMM2State = 0x400
 };
 
 // Corresponds to RAYQUERY_FLAG_* in HLSL
@@ -1869,7 +1929,9 @@ enum class BarrierSemanticFlag : uint32_t {
   GroupSync = 0x00000001,   // GROUP_SYNC
   GroupScope = 0x00000002,  // GROUP_SCOPE
   DeviceScope = 0x00000004, // DEVICE_SCOPE
-  ValidMask = 0x00000007,
+  LegacyFlags = 0x00000007,
+  ReorderScope = 0x00000008, // REORDER_SCOPE
+  ValidMask = 0x0000000F,
   GroupFlags = GroupSync | GroupScope,
 };
 
diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h
index 11ab8e3b8d..a99c5360d4 100644
--- a/include/dxc/DXIL/DxilInstructions.h
+++ b/include/dxc/DXIL/DxilInstructions.h
@@ -645,6 +645,42 @@ struct LlvmInst_VAArg {
   bool isAllowed() const { return false; }
 };
 
+/// This instruction extracts from vector
+struct LlvmInst_ExtractElement {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  LlvmInst_ExtractElement(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return Instr->getOpcode() == llvm::Instruction::ExtractElement;
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+};
+
+/// This instruction inserts into vector
+struct LlvmInst_InsertElement {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  LlvmInst_InsertElement(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return Instr->getOpcode() == llvm::Instruction::InsertElement;
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+};
+
+/// This instruction Shuffle two vectors
+struct LlvmInst_ShuffleVector {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  LlvmInst_ShuffleVector(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return Instr->getOpcode() == llvm::Instruction::ShuffleVector;
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+};
+
 /// This instruction extracts from aggregate
 struct LlvmInst_ExtractValue {
   llvm::Instruction *Instr;
@@ -8813,5 +8849,1074 @@ struct DxilInst_AllocateRayQuery2 {
                              llvm::APInt(32, (uint64_t)val)));
   }
 };
+
+/// This instruction Analogous to TraceRay but without invoking CH/MS and
+/// returns the intermediate state as a HitObject
+struct DxilInst_HitObject_TraceRay {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_TraceRay(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_TraceRay);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (16 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_accelerationStructure = 1,
+    arg_rayFlags = 2,
+    arg_instanceInclusionMask = 3,
+    arg_rayContributionToHitGroupIndex = 4,
+    arg_multiplierForGeometryContributionToHitGroupIndex = 5,
+    arg_missShaderIndex = 6,
+    arg_Origin_X = 7,
+    arg_Origin_Y = 8,
+    arg_Origin_Z = 9,
+    arg_TMin = 10,
+    arg_Direction_X = 11,
+    arg_Direction_Y = 12,
+    arg_Direction_Z = 13,
+    arg_TMax = 14,
+    arg_payload = 15,
+  };
+  // Accessors
+  llvm::Value *get_accelerationStructure() const {
+    return Instr->getOperand(1);
+  }
+  void set_accelerationStructure(llvm::Value *val) {
+    Instr->setOperand(1, val);
+  }
+  llvm::Value *get_rayFlags() const { return Instr->getOperand(2); }
+  void set_rayFlags(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_instanceInclusionMask() const {
+    return Instr->getOperand(3);
+  }
+  void set_instanceInclusionMask(llvm::Value *val) {
+    Instr->setOperand(3, val);
+  }
+  llvm::Value *get_rayContributionToHitGroupIndex() const {
+    return Instr->getOperand(4);
+  }
+  void set_rayContributionToHitGroupIndex(llvm::Value *val) {
+    Instr->setOperand(4, val);
+  }
+  llvm::Value *get_multiplierForGeometryContributionToHitGroupIndex() const {
+    return Instr->getOperand(5);
+  }
+  void set_multiplierForGeometryContributionToHitGroupIndex(llvm::Value *val) {
+    Instr->setOperand(5, val);
+  }
+  llvm::Value *get_missShaderIndex() const { return Instr->getOperand(6); }
+  void set_missShaderIndex(llvm::Value *val) { Instr->setOperand(6, val); }
+  llvm::Value *get_Origin_X() const { return Instr->getOperand(7); }
+  void set_Origin_X(llvm::Value *val) { Instr->setOperand(7, val); }
+  llvm::Value *get_Origin_Y() const { return Instr->getOperand(8); }
+  void set_Origin_Y(llvm::Value *val) { Instr->setOperand(8, val); }
+  llvm::Value *get_Origin_Z() const { return Instr->getOperand(9); }
+  void set_Origin_Z(llvm::Value *val) { Instr->setOperand(9, val); }
+  llvm::Value *get_TMin() const { return Instr->getOperand(10); }
+  void set_TMin(llvm::Value *val) { Instr->setOperand(10, val); }
+  llvm::Value *get_Direction_X() const { return Instr->getOperand(11); }
+  void set_Direction_X(llvm::Value *val) { Instr->setOperand(11, val); }
+  llvm::Value *get_Direction_Y() const { return Instr->getOperand(12); }
+  void set_Direction_Y(llvm::Value *val) { Instr->setOperand(12, val); }
+  llvm::Value *get_Direction_Z() const { return Instr->getOperand(13); }
+  void set_Direction_Z(llvm::Value *val) { Instr->setOperand(13, val); }
+  llvm::Value *get_TMax() const { return Instr->getOperand(14); }
+  void set_TMax(llvm::Value *val) { Instr->setOperand(14, val); }
+  llvm::Value *get_payload() const { return Instr->getOperand(15); }
+  void set_payload(llvm::Value *val) { Instr->setOperand(15, val); }
+};
+
+/// This instruction Creates a new HitObject representing a committed hit from a
+/// RayQuery
+struct DxilInst_HitObject_FromRayQuery {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_FromRayQuery(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_FromRayQuery);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_rayQueryHandle = 1,
+  };
+  // Accessors
+  llvm::Value *get_rayQueryHandle() const { return Instr->getOperand(1); }
+  void set_rayQueryHandle(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Creates a new HitObject representing a committed hit from a
+/// RayQuery and committed attributes
+struct DxilInst_HitObject_FromRayQueryWithAttrs {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_FromRayQueryWithAttrs(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_FromRayQueryWithAttrs);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_rayQueryHandle = 1,
+    arg_HitKind = 2,
+    arg_CommittedAttribs = 3,
+  };
+  // Accessors
+  llvm::Value *get_rayQueryHandle() const { return Instr->getOperand(1); }
+  void set_rayQueryHandle(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_HitKind() const { return Instr->getOperand(2); }
+  void set_HitKind(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_CommittedAttribs() const { return Instr->getOperand(3); }
+  void set_CommittedAttribs(llvm::Value *val) { Instr->setOperand(3, val); }
+};
+
+/// This instruction Creates a new HitObject representing a miss
+struct DxilInst_HitObject_MakeMiss {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_MakeMiss(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_MakeMiss);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (11 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_RayFlags = 1,
+    arg_MissShaderIndex = 2,
+    arg_Origin_X = 3,
+    arg_Origin_Y = 4,
+    arg_Origin_Z = 5,
+    arg_TMin = 6,
+    arg_Direction_X = 7,
+    arg_Direction_Y = 8,
+    arg_Direction_Z = 9,
+    arg_TMax = 10,
+  };
+  // Accessors
+  llvm::Value *get_RayFlags() const { return Instr->getOperand(1); }
+  void set_RayFlags(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_MissShaderIndex() const { return Instr->getOperand(2); }
+  void set_MissShaderIndex(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_Origin_X() const { return Instr->getOperand(3); }
+  void set_Origin_X(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_Origin_Y() const { return Instr->getOperand(4); }
+  void set_Origin_Y(llvm::Value *val) { Instr->setOperand(4, val); }
+  llvm::Value *get_Origin_Z() const { return Instr->getOperand(5); }
+  void set_Origin_Z(llvm::Value *val) { Instr->setOperand(5, val); }
+  llvm::Value *get_TMin() const { return Instr->getOperand(6); }
+  void set_TMin(llvm::Value *val) { Instr->setOperand(6, val); }
+  llvm::Value *get_Direction_X() const { return Instr->getOperand(7); }
+  void set_Direction_X(llvm::Value *val) { Instr->setOperand(7, val); }
+  llvm::Value *get_Direction_Y() const { return Instr->getOperand(8); }
+  void set_Direction_Y(llvm::Value *val) { Instr->setOperand(8, val); }
+  llvm::Value *get_Direction_Z() const { return Instr->getOperand(9); }
+  void set_Direction_Z(llvm::Value *val) { Instr->setOperand(9, val); }
+  llvm::Value *get_TMax() const { return Instr->getOperand(10); }
+  void set_TMax(llvm::Value *val) { Instr->setOperand(10, val); }
+};
+
+/// This instruction Creates an empty nop HitObject
+struct DxilInst_HitObject_MakeNop {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_MakeNop(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_MakeNop);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+};
+
+/// This instruction Represents the invocation of the CH/MS shader represented
+/// by the HitObject
+struct DxilInst_HitObject_Invoke {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_Invoke(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_Invoke);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_payload = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_payload() const { return Instr->getOperand(2); }
+  void set_payload(llvm::Value *val) { Instr->setOperand(2, val); }
+};
+
+/// This instruction Reorders the current thread
+struct DxilInst_MaybeReorderThread {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_MaybeReorderThread(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::MaybeReorderThread);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_coherenceHint = 2,
+    arg_numCoherenceHintBitsFromLSB = 3,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_coherenceHint() const { return Instr->getOperand(2); }
+  void set_coherenceHint(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_numCoherenceHintBitsFromLSB() const {
+    return Instr->getOperand(3);
+  }
+  void set_numCoherenceHintBitsFromLSB(llvm::Value *val) {
+    Instr->setOperand(3, val);
+  }
+};
+
+/// This instruction Returns `true` if the HitObject represents a miss
+struct DxilInst_HitObject_IsMiss {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_IsMiss(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_IsMiss);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns `true` if the HitObject is a NOP-HitObject
+struct DxilInst_HitObject_IsHit {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_IsHit(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_IsHit);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns `true` if the HitObject represents a nop
+struct DxilInst_HitObject_IsNop {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_IsNop(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_IsNop);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the ray flags set in the HitObject
+struct DxilInst_HitObject_RayFlags {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_RayFlags(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_RayFlags);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the TMin value set in the HitObject
+struct DxilInst_HitObject_RayTMin {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_RayTMin(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_RayTMin);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the current T value set in the HitObject
+struct DxilInst_HitObject_RayTCurrent {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_RayTCurrent(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_RayTCurrent);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the ray origin in world space
+struct DxilInst_HitObject_WorldRayOrigin {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_WorldRayOrigin(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_WorldRayOrigin);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_component = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_component() const { return Instr->getOperand(2); }
+  void set_component(llvm::Value *val) { Instr->setOperand(2, val); }
+  int32_t get_component_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(2))
+                         ->getZExtValue());
+  }
+  void set_component_val(int32_t val) {
+    Instr->setOperand(2, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
+
+/// This instruction Returns the ray direction in world space
+struct DxilInst_HitObject_WorldRayDirection {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_WorldRayDirection(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_WorldRayDirection);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_component = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_component() const { return Instr->getOperand(2); }
+  void set_component(llvm::Value *val) { Instr->setOperand(2, val); }
+  int32_t get_component_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(2))
+                         ->getZExtValue());
+  }
+  void set_component_val(int32_t val) {
+    Instr->setOperand(2, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
+
+/// This instruction Returns the ray origin in object space
+struct DxilInst_HitObject_ObjectRayOrigin {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_ObjectRayOrigin(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_ObjectRayOrigin);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_component = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_component() const { return Instr->getOperand(2); }
+  void set_component(llvm::Value *val) { Instr->setOperand(2, val); }
+  int32_t get_component_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(2))
+                         ->getZExtValue());
+  }
+  void set_component_val(int32_t val) {
+    Instr->setOperand(2, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
+
+/// This instruction Returns the ray direction in object space
+struct DxilInst_HitObject_ObjectRayDirection {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_ObjectRayDirection(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_ObjectRayDirection);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_component = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_component() const { return Instr->getOperand(2); }
+  void set_component(llvm::Value *val) { Instr->setOperand(2, val); }
+  int32_t get_component_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(2))
+                         ->getZExtValue());
+  }
+  void set_component_val(int32_t val) {
+    Instr->setOperand(2, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
+
+/// This instruction Returns the object to world space transformation matrix in
+/// 3x4 form
+struct DxilInst_HitObject_ObjectToWorld3x4 {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_ObjectToWorld3x4(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_ObjectToWorld3x4);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_row = 2,
+    arg_col = 3,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_row() const { return Instr->getOperand(2); }
+  void set_row(llvm::Value *val) { Instr->setOperand(2, val); }
+  int32_t get_row_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(2))
+                         ->getZExtValue());
+  }
+  void set_row_val(int32_t val) {
+    Instr->setOperand(2, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+  llvm::Value *get_col() const { return Instr->getOperand(3); }
+  void set_col(llvm::Value *val) { Instr->setOperand(3, val); }
+  int32_t get_col_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(3))
+                         ->getZExtValue());
+  }
+  void set_col_val(int32_t val) {
+    Instr->setOperand(3, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
+
+/// This instruction Returns the world to object space transformation matrix in
+/// 3x4 form
+struct DxilInst_HitObject_WorldToObject3x4 {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_WorldToObject3x4(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_WorldToObject3x4);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_row = 2,
+    arg_col = 3,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_row() const { return Instr->getOperand(2); }
+  void set_row(llvm::Value *val) { Instr->setOperand(2, val); }
+  int32_t get_row_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(2))
+                         ->getZExtValue());
+  }
+  void set_row_val(int32_t val) {
+    Instr->setOperand(2, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+  llvm::Value *get_col() const { return Instr->getOperand(3); }
+  void set_col(llvm::Value *val) { Instr->setOperand(3, val); }
+  int32_t get_col_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(3))
+                         ->getZExtValue());
+  }
+  void set_col_val(int32_t val) {
+    Instr->setOperand(3, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
+
+/// This instruction Returns the geometry index committed on hit
+struct DxilInst_HitObject_GeometryIndex {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_GeometryIndex(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_GeometryIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the instance index committed on hit
+struct DxilInst_HitObject_InstanceIndex {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_InstanceIndex(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_InstanceIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the instance id committed on hit
+struct DxilInst_HitObject_InstanceID {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_InstanceID(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_InstanceID);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the primitive index committed on hit
+struct DxilInst_HitObject_PrimitiveIndex {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_PrimitiveIndex(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_PrimitiveIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the HitKind of the hit
+struct DxilInst_HitObject_HitKind {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_HitKind(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
+                                          hlsl::OP::OpCode::HitObject_HitKind);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns the shader table index set for this HitObject
+struct DxilInst_HitObject_ShaderTableIndex {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_ShaderTableIndex(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_ShaderTableIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Returns a HitObject with updated shader table index
+struct DxilInst_HitObject_SetShaderTableIndex {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_SetShaderTableIndex(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_SetShaderTableIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_shaderTableIndex = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_shaderTableIndex() const { return Instr->getOperand(2); }
+  void set_shaderTableIndex(llvm::Value *val) { Instr->setOperand(2, val); }
+};
+
+/// This instruction Returns the root table constant for this HitObject and
+/// offset
+struct DxilInst_HitObject_LoadLocalRootTableConstant {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_LoadLocalRootTableConstant(llvm::Instruction *pInstr)
+      : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_LoadLocalRootTableConstant);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_offset = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_offset() const { return Instr->getOperand(2); }
+  void set_offset(llvm::Value *val) { Instr->setOperand(2, val); }
+};
+
+/// This instruction Returns the attributes set for this HitObject
+struct DxilInst_HitObject_Attributes {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitObject_Attributes(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::HitObject_Attributes);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_hitObject = 1,
+    arg_attributes = 2,
+  };
+  // Accessors
+  llvm::Value *get_hitObject() const { return Instr->getOperand(1); }
+  void set_hitObject(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_attributes() const { return Instr->getOperand(2); }
+  void set_attributes(llvm::Value *val) { Instr->setOperand(2, val); }
+};
+
+/// This instruction reads from a raw buffer and structured buffer
+struct DxilInst_RawBufferVectorLoad {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_RawBufferVectorLoad(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::RawBufferVectorLoad);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (5 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_buf = 1,
+    arg_index = 2,
+    arg_elementOffset = 3,
+    arg_alignment = 4,
+  };
+  // Accessors
+  llvm::Value *get_buf() const { return Instr->getOperand(1); }
+  void set_buf(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_index() const { return Instr->getOperand(2); }
+  void set_index(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_elementOffset() const { return Instr->getOperand(3); }
+  void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_alignment() const { return Instr->getOperand(4); }
+  void set_alignment(llvm::Value *val) { Instr->setOperand(4, val); }
+  int32_t get_alignment_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(4))
+                         ->getZExtValue());
+  }
+  void set_alignment_val(int32_t val) {
+    Instr->setOperand(4, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
+
+/// This instruction writes to a RWByteAddressBuffer or RWStructuredBuffer
+struct DxilInst_RawBufferVectorStore {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_RawBufferVectorStore(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(
+        Instr, hlsl::OP::OpCode::RawBufferVectorStore);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (6 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
+      return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_uav = 1,
+    arg_index = 2,
+    arg_elementOffset = 3,
+    arg_value0 = 4,
+    arg_alignment = 5,
+  };
+  // Accessors
+  llvm::Value *get_uav() const { return Instr->getOperand(1); }
+  void set_uav(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_index() const { return Instr->getOperand(2); }
+  void set_index(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_elementOffset() const { return Instr->getOperand(3); }
+  void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_value0() const { return Instr->getOperand(4); }
+  void set_value0(llvm::Value *val) { Instr->setOperand(4, val); }
+  llvm::Value *get_alignment() const { return Instr->getOperand(5); }
+  void set_alignment(llvm::Value *val) { Instr->setOperand(5, val); }
+  int32_t get_alignment_val() const {
+    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(5))
+                         ->getZExtValue());
+  }
+  void set_alignment_val(int32_t val) {
+    Instr->setOperand(5, llvm::Constant::getIntegerValue(
+                             llvm::IntegerType::get(Instr->getContext(), 32),
+                             llvm::APInt(32, (uint64_t)val)));
+  }
+};
 // INSTR-HELPER:END
 } // namespace hlsl
diff --git a/include/dxc/DXIL/DxilMetadataHelper.h b/include/dxc/DXIL/DxilMetadataHelper.h
index fa13f6d766..e17db016d8 100644
--- a/include/dxc/DXIL/DxilMetadataHelper.h
+++ b/include/dxc/DXIL/DxilMetadataHelper.h
@@ -233,6 +233,7 @@ class DxilMDHelper {
   static const unsigned kDxilStructuredBufferElementStrideTag = 1;
   static const unsigned kDxilSamplerFeedbackKindTag = 2;
   static const unsigned kDxilAtomic64UseTag = 3;
+  static const unsigned kDxilReorderCoherentTag = 4;
 
   // Type system.
   static const char kDxilTypeSystemMDName[];
@@ -427,6 +428,8 @@ class DxilMDHelper {
   // Dxil version.
   void EmitDxilVersion(unsigned Major, unsigned Minor);
   void LoadDxilVersion(unsigned &Major, unsigned &Minor);
+  static bool LoadDxilVersion(const llvm::Module *pModule, unsigned &Major,
+                              unsigned &Minor);
 
   // Validator version.
   void EmitValidatorVersion(unsigned Major, unsigned Minor);
diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h
index 3514701327..c8b6762b3f 100644
--- a/include/dxc/DXIL/DxilOperations.h
+++ b/include/dxc/DXIL/DxilOperations.h
@@ -57,13 +57,33 @@ class OP {
   // caches.
   void RefreshCache();
 
+  // The single llvm::Type * "OverloadType" has one of these forms:
+  // No overloads (NumOverloadDims == 0):
+  //  - TS_Void: VoidTy
+  // For single overload dimension (NumOverloadDims == 1):
+  //  - TS_F*, TS_I*: a scalar numeric type (half, float, i1, i64, etc.),
+  //  - TS_UDT: a pointer to a StructType representing a User Defined Type,
+  //  - TS_Object: a named StructType representing a built-in object, or
+  //  - TS_Vector: a vector type (<4 x float>, <16 x i16>, etc.)
+  // For multiple overload dimensions (TS_Extended, NumOverloadDims > 1):
+  //  - an unnamed StructType containing each type for the corresponding
+  //    dimension, such as: type { i32, <2 x float> }
+  //  - contained type options are the same as for single dimension.
+
   llvm::Function *GetOpFunc(OpCode OpCode, llvm::Type *pOverloadType);
+
+  // N-dimension convenience version of GetOpFunc:
+  llvm::Function *GetOpFunc(OpCode OpCode,
+                            llvm::ArrayRef<llvm::Type *> OverloadTypes);
+
   const llvm::SmallMapVector<llvm::Type *, llvm::Function *, 8> &
   GetOpFuncList(OpCode OpCode) const;
   bool IsDxilOpUsed(OpCode opcode) const;
   void RemoveFunction(llvm::Function *F);
   llvm::LLVMContext &GetCtx() { return m_Ctx; }
+  llvm::Module *GetModule() { return m_pModule; }
   llvm::Type *GetHandleType() const;
+  llvm::Type *GetHitObjectType() const;
   llvm::Type *GetNodeHandleType() const;
   llvm::Type *GetNodeRecordHandleType() const;
   llvm::Type *GetResourcePropertiesType() const;
@@ -80,9 +100,14 @@ class OP {
 
   llvm::Type *GetResRetType(llvm::Type *pOverloadType);
   llvm::Type *GetCBufferRetType(llvm::Type *pOverloadType);
-  llvm::Type *GetVectorType(unsigned numElements, llvm::Type *pOverloadType);
+  llvm::Type *GetStructVectorType(unsigned numElements,
+                                  llvm::Type *pOverloadType);
   bool IsResRetType(llvm::Type *Ty);
 
+  // Construct an unnamed struct type containing the set of member types.
+  llvm::StructType *
+  GetExtendedOverloadType(llvm::ArrayRef<llvm::Type *> OverloadTypes);
+
   // Try to get the opcode class for a function.
   // Return true and set `opClass` if the given function is a dxil function.
   // Return false if the given function is not a dxil function.
@@ -126,12 +151,8 @@ class OP {
   static bool IsDxilOpBarrier(OpCode C);
   static bool BarrierRequiresGroup(const llvm::CallInst *CI);
   static bool BarrierRequiresNode(const llvm::CallInst *CI);
+  static bool BarrierRequiresReorder(const llvm::CallInst *CI);
   static DXIL::BarrierMode TranslateToBarrierMode(const llvm::CallInst *CI);
-  static bool IsDxilOpTypeName(llvm::StringRef name);
-  static bool IsDxilOpType(llvm::StructType *ST);
-  static bool IsDupDxilOpType(llvm::StructType *ST);
-  static llvm::StructType *GetOriginalDxilOpType(llvm::StructType *ST,
-                                                 llvm::Module &M);
   static void GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
                                        unsigned &major, unsigned &minor,
                                        unsigned &mask);
@@ -140,12 +161,20 @@ class OP {
                                        unsigned valMinor, unsigned &major,
                                        unsigned &minor, unsigned &mask);
 
+  static bool IsDxilOpExtendedOverload(OpCode C);
+
+  // Return true if the overload name suffix for this operation may be
+  // constructed based on a user-defined or user-influenced type name
+  // that may not represent the same type in different linked modules.
+  static bool MayHaveNonCanonicalOverload(OpCode OC);
+
 private:
   // Per-module properties.
   llvm::LLVMContext &m_Ctx;
   llvm::Module *m_pModule;
 
   llvm::Type *m_pHandleType;
+  llvm::Type *m_pHitObjectType;
   llvm::Type *m_pNodeHandleType;
   llvm::Type *m_pNodeRecordHandleType;
   llvm::Type *m_pResourcePropertiesType;
@@ -162,13 +191,33 @@ class OP {
 
   DXIL::LowPrecisionMode m_LowPrecisionMode;
 
-  static const unsigned kUserDefineTypeSlot = 9;
-  static const unsigned kObjectTypeSlot = 10;
-  static const unsigned kNumTypeOverloads =
-      11; // void, h,f,d, i1, i8,i16,i32,i64, udt, obj
+  // Overload types are split into "basic" overload types and special types
+  // Basic: void, half, float, double, i1, i8, i16, i32, i64
+  //  - These have one canonical overload per TypeSlot
+  // Special: udt, obj, vec, extended
+  //  - These may have many overloads per type slot
+  enum TypeSlot : unsigned {
+    TS_F16 = 0,
+    TS_F32 = 1,
+    TS_F64 = 2,
+    TS_I1 = 3,
+    TS_I8 = 4,
+    TS_I16 = 5,
+    TS_I32 = 6,
+    TS_I64 = 7,
+    TS_BasicCount,
+    TS_UDT = 8,      // Ex: %"struct.MyStruct" *
+    TS_Object = 9,   // Ex: %"class.StructuredBuffer<Foo>"
+    TS_Vector = 10,  // Ex: <8 x i16>
+    TS_MaskBitCount, // Types used in Mask end here
+    // TS_Extended is only used to identify the unnamed struct type used to wrap
+    // multiple overloads when using GetTypeSlot.
+    TS_Extended, // Ex: type { float, <16 x i32> }
+    TS_Invalid = UINT_MAX,
+  };
 
-  llvm::Type *m_pResRetType[kNumTypeOverloads];
-  llvm::Type *m_pCBufferRetType[kNumTypeOverloads];
+  llvm::Type *m_pResRetType[TS_BasicCount];
+  llvm::Type *m_pCBufferRetType[TS_BasicCount];
 
   struct OpCodeCacheItem {
     llvm::SmallMapVector<llvm::Type *, llvm::Function *, 8> pOverloads;
@@ -179,27 +228,46 @@ class OP {
 
 private:
   // Static properties.
+  struct OverloadMask {
+    // mask of type slot bits as (1 << TypeSlot)
+    uint16_t SlotMask;
+    static_assert(TS_MaskBitCount <= (sizeof(SlotMask) * 8));
+    bool operator[](unsigned TypeSlot) const {
+      return (TypeSlot < TS_MaskBitCount) ? (bool)(SlotMask & (1 << TypeSlot))
+                                          : 0;
+    }
+    operator bool() const { return SlotMask != 0; }
+  };
   struct OpCodeProperty {
     OpCode opCode;
     const char *pOpCodeName;
     OpCodeClass opCodeClass;
     const char *pOpCodeClassName;
-    bool bAllowOverload[kNumTypeOverloads]; // void, h,f,d, i1, i8,i16,i32,i64,
-                                            // udt
     llvm::Attribute::AttrKind FuncAttr;
+
+    // Number of overload dimensions used by the operation.
+    unsigned int NumOverloadDims;
+
+    // Mask of supported overload types for each overload dimension.
+    OverloadMask AllowedOverloads[DXIL::kDxilMaxOloadDims];
+
+    // Mask of scalar components allowed for each demension where
+    // AllowedOverloads[n][TS_Vector] is true.
+    OverloadMask AllowedVectorElements[DXIL::kDxilMaxOloadDims];
   };
   static const OpCodeProperty m_OpCodeProps[(unsigned)OpCode::NumOpCodes];
 
-  static const char *m_OverloadTypeName[kNumTypeOverloads];
+  static const char *m_OverloadTypeName[TS_BasicCount];
   static const char *m_NamePrefix;
   static const char *m_TypePrefix;
   static const char *m_MatrixTypePrefix;
   static unsigned GetTypeSlot(llvm::Type *pType);
   static const char *GetOverloadTypeName(unsigned TypeSlot);
-  static llvm::StringRef GetTypeName(llvm::Type *Ty, std::string &str);
-  static llvm::StringRef ConstructOverloadName(llvm::Type *Ty,
-                                               DXIL::OpCode opCode,
-                                               std::string &funcNameStorage);
+  static llvm::StringRef GetTypeName(llvm::Type *Ty,
+                                     llvm::SmallVectorImpl<char> &Storage);
+  static llvm::StringRef
+  ConstructOverloadName(llvm::Type *Ty, DXIL::OpCode opCode,
+                        llvm::SmallVectorImpl<char> &Storage);
 };
 
 } // namespace hlsl
diff --git a/include/dxc/DXIL/DxilResource.h b/include/dxc/DXIL/DxilResource.h
index 49db65caed..dcf70333da 100644
--- a/include/dxc/DXIL/DxilResource.h
+++ b/include/dxc/DXIL/DxilResource.h
@@ -63,6 +63,8 @@ class DxilResource : public DxilResourceBase {
 
   bool IsGloballyCoherent() const;
   void SetGloballyCoherent(bool b);
+  bool IsReorderCoherent() const;
+  void SetReorderCoherent(bool b);
   bool HasCounter() const;
   void SetHasCounter(bool b);
 
@@ -97,6 +99,7 @@ class DxilResource : public DxilResourceBase {
   CompType m_CompType;
   DXIL::SamplerFeedbackType m_SamplerFeedbackType;
   bool m_bGloballyCoherent;
+  bool m_bReorderCoherent;
   bool m_bHasCounter;
   bool m_bROV;
   bool m_bHasAtomic64Use;
diff --git a/include/dxc/DXIL/DxilResourceProperties.h b/include/dxc/DXIL/DxilResourceProperties.h
index 21a705f077..2f4ff58969 100644
--- a/include/dxc/DXIL/DxilResourceProperties.h
+++ b/include/dxc/DXIL/DxilResourceProperties.h
@@ -47,7 +47,8 @@ struct DxilResourceProperties {
     uint8_t SamplerCmpOrHasCounter : 1;
 
     // BYTE 2
-    uint8_t Reserved2;
+    uint8_t IsReorderCoherent : 1;
+    uint8_t Reserved2 : 7;
 
     // BYTE 3
     uint8_t Reserved3;
diff --git a/include/dxc/DXIL/DxilUtil.h b/include/dxc/DXIL/DxilUtil.h
index 490f335db5..ca8f2ac755 100644
--- a/include/dxc/DXIL/DxilUtil.h
+++ b/include/dxc/DXIL/DxilUtil.h
@@ -162,6 +162,8 @@ GetHLSLResourceProperties(llvm::Type *Ty);
 bool IsHLSLResourceType(llvm::Type *Ty);
 bool IsHLSLObjectType(llvm::Type *Ty);
 bool IsHLSLRayQueryType(llvm::Type *Ty);
+llvm::Type *GetHLSLHitObjectType(llvm::Module *M);
+bool IsHLSLHitObjectType(llvm::Type *Ty);
 bool IsHLSLResourceDescType(llvm::Type *Ty);
 bool IsResourceSingleComponent(llvm::Type *Ty);
 uint8_t GetResourceComponentCount(llvm::Type *Ty);
@@ -221,6 +223,10 @@ bool DeleteDeadAllocas(llvm::Function &F);
 llvm::Value *GEPIdxToOffset(llvm::GetElementPtrInst *GEP,
                             llvm::IRBuilder<> &Builder, hlsl::OP *OP,
                             const llvm::DataLayout &DL);
+
+// Passes back Dxil version of the given module on true return.
+bool LoadDxilVersion(const llvm::Module *M, unsigned &Major, unsigned &Minor);
+
 } // namespace dxilutil
 
 } // namespace hlsl
diff --git a/include/dxc/DxilContainer/RDAT_LibraryTypes.inl b/include/dxc/DxilContainer/RDAT_LibraryTypes.inl
index 132d272a8e..4b58b406c2 100644
--- a/include/dxc/DxilContainer/RDAT_LibraryTypes.inl
+++ b/include/dxc/DxilContainer/RDAT_LibraryTypes.inl
@@ -22,6 +22,7 @@ RDAT_ENUM_START(DxilResourceFlag, uint32_t)
   RDAT_ENUM_VALUE(UAVRasterizerOrderedView, 1 << 2)
   RDAT_ENUM_VALUE(DynamicIndexing,          1 << 3)
   RDAT_ENUM_VALUE(Atomics64Use,             1 << 4)
+  RDAT_ENUM_VALUE(UAVReorderCoherent,       1 << 5)
 RDAT_ENUM_END()
 
 RDAT_ENUM_START(DxilShaderStageFlags, uint32_t)
diff --git a/include/dxc/DxilPIXPasses/DxilPIXPasses.h b/include/dxc/DxilPIXPasses/DxilPIXPasses.h
index ad0ddfdfd2..5cc7c4aa50 100644
--- a/include/dxc/DxilPIXPasses/DxilPIXPasses.h
+++ b/include/dxc/DxilPIXPasses/DxilPIXPasses.h
@@ -27,6 +27,7 @@ ModulePass *createDxilDebugInstrumentationPass();
 ModulePass *createDxilShaderAccessTrackingPass();
 ModulePass *createDxilPIXAddTidToAmplificationShaderPayloadPass();
 ModulePass *createDxilPIXDXRInvocationsLogPass();
+ModulePass *createDxilNonUniformResourceIndexInstrumentationPass();
 
 void initializeDxilAddPixelHitInstrumentationPass(llvm::PassRegistry &);
 void initializeDxilDbgValueToDbgDeclarePass(llvm::PassRegistry &);
@@ -41,5 +42,7 @@ void initializeDxilShaderAccessTrackingPass(llvm::PassRegistry &);
 void initializeDxilPIXAddTidToAmplificationShaderPayloadPass(
     llvm::PassRegistry &);
 void initializeDxilPIXDXRInvocationsLogPass(llvm::PassRegistry &);
+void initializeDxilNonUniformResourceIndexInstrumentationPass(
+    llvm::PassRegistry &);
 
 } // namespace llvm
diff --git a/include/dxc/HLSL/DxilGenerationPass.h b/include/dxc/HLSL/DxilGenerationPass.h
index c77ddab3d0..9df93e9232 100644
--- a/include/dxc/HLSL/DxilGenerationPass.h
+++ b/include/dxc/HLSL/DxilGenerationPass.h
@@ -81,6 +81,7 @@ ModulePass *createResumePassesPass();
 FunctionPass *createMatrixBitcastLowerPass();
 ModulePass *createDxilCleanupAddrSpaceCastPass();
 ModulePass *createDxilRenameResourcesPass();
+ModulePass *createDxilScalarizeVectorLoadStoresPass();
 
 void initializeDxilLowerCreateHandleForLibPass(llvm::PassRegistry &);
 void initializeDxilAllocateResourcesForLibPass(llvm::PassRegistry &);
@@ -115,6 +116,7 @@ void initializeResumePassesPass(llvm::PassRegistry &);
 void initializeMatrixBitcastLowerPassPass(llvm::PassRegistry &);
 void initializeDxilCleanupAddrSpaceCastPass(llvm::PassRegistry &);
 void initializeDxilRenameResourcesPass(llvm::PassRegistry &);
+void initializeDxilScalarizeVectorLoadStoresPass(llvm::PassRegistry &);
 
 ModulePass *createDxilValidateWaveSensitivityPass();
 void initializeDxilValidateWaveSensitivityPass(llvm::PassRegistry &);
diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h
index 1ccb7f04a2..a7db8612a6 100644
--- a/include/dxc/HLSL/HLOperations.h
+++ b/include/dxc/HLSL/HLOperations.h
@@ -398,6 +398,10 @@ const unsigned kAnnotateHandleResourceTypeOpIdx = 3;
 const unsigned kTraceRayRayDescOpIdx = 7;
 const unsigned kTraceRayPayLoadOpIdx = 8;
 
+// AllocateRayQuery
+const unsigned kAllocateRayQueryRayFlagsIdx = 1;
+const unsigned kAllocateRayQueryRayQueryFlagsIdx = 2;
+
 // CallShader.
 const unsigned kCallShaderPayloadOpIdx = 2;
 
@@ -429,6 +433,10 @@ const unsigned kNodeHandleToResCastOpIdx = 1;
 const unsigned kAnnotateNodeHandleNodePropIdx = 2;
 const unsigned kAnnotateNodeRecordHandleNodeRecordPropIdx = 2;
 
+// HitObject::MakeMiss
+const unsigned kHitObjectMakeMiss_NumOp = 8;
+const unsigned kHitObjectMakeMissRayDescOpIdx = 4;
+
 } // namespace HLOperandIndex
 
 llvm::Function *GetOrCreateHLFunction(llvm::Module &M,
diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h
index fcc9bb11b1..d37c27a38e 100644
--- a/include/dxc/HlslIntrinsicOp.h
+++ b/include/dxc/HlslIntrinsicOp.h
@@ -5,378 +5,398 @@
 #pragma once
 namespace hlsl {
 enum class IntrinsicOp {
-  IOP_AcceptHitAndEndSearch,
-  IOP_AddUint64,
-  IOP_AllMemoryBarrier,
-  IOP_AllMemoryBarrierWithGroupSync,
-  IOP_AllocateRayQuery,
-  IOP_Barrier,
-  IOP_CallShader,
-  IOP_CheckAccessFullyMapped,
-  IOP_CreateResourceFromHeap,
-  IOP_D3DCOLORtoUBYTE4,
-  IOP_DeviceMemoryBarrier,
-  IOP_DeviceMemoryBarrierWithGroupSync,
-  IOP_DispatchMesh,
-  IOP_DispatchRaysDimensions,
-  IOP_DispatchRaysIndex,
-  IOP_EvaluateAttributeAtSample,
-  IOP_EvaluateAttributeCentroid,
-  IOP_EvaluateAttributeSnapped,
-  IOP_GeometryIndex,
-  IOP_GetAttributeAtVertex,
-  IOP_GetRemainingRecursionLevels,
-  IOP_GetRenderTargetSampleCount,
-  IOP_GetRenderTargetSamplePosition,
-  IOP_GroupMemoryBarrier,
-  IOP_GroupMemoryBarrierWithGroupSync,
-  IOP_HitKind,
-  IOP_IgnoreHit,
-  IOP_InstanceID,
-  IOP_InstanceIndex,
-  IOP_InterlockedAdd,
-  IOP_InterlockedAnd,
-  IOP_InterlockedCompareExchange,
-  IOP_InterlockedCompareExchangeFloatBitwise,
-  IOP_InterlockedCompareStore,
-  IOP_InterlockedCompareStoreFloatBitwise,
-  IOP_InterlockedExchange,
-  IOP_InterlockedMax,
-  IOP_InterlockedMin,
-  IOP_InterlockedOr,
-  IOP_InterlockedXor,
-  IOP_IsHelperLane,
-  IOP_NonUniformResourceIndex,
-  IOP_ObjectRayDirection,
-  IOP_ObjectRayOrigin,
-  IOP_ObjectToWorld,
-  IOP_ObjectToWorld3x4,
-  IOP_ObjectToWorld4x3,
-  IOP_PrimitiveIndex,
-  IOP_Process2DQuadTessFactorsAvg,
-  IOP_Process2DQuadTessFactorsMax,
-  IOP_Process2DQuadTessFactorsMin,
-  IOP_ProcessIsolineTessFactors,
-  IOP_ProcessQuadTessFactorsAvg,
-  IOP_ProcessQuadTessFactorsMax,
-  IOP_ProcessQuadTessFactorsMin,
-  IOP_ProcessTriTessFactorsAvg,
-  IOP_ProcessTriTessFactorsMax,
-  IOP_ProcessTriTessFactorsMin,
-  IOP_QuadAll,
-  IOP_QuadAny,
-  IOP_QuadReadAcrossDiagonal,
-  IOP_QuadReadAcrossX,
-  IOP_QuadReadAcrossY,
-  IOP_QuadReadLaneAt,
-  IOP_RayFlags,
-  IOP_RayTCurrent,
-  IOP_RayTMin,
-  IOP_ReportHit,
-  IOP_SetMeshOutputCounts,
-  IOP_TraceRay,
-  IOP_WaveActiveAllEqual,
-  IOP_WaveActiveAllTrue,
-  IOP_WaveActiveAnyTrue,
-  IOP_WaveActiveBallot,
-  IOP_WaveActiveBitAnd,
-  IOP_WaveActiveBitOr,
-  IOP_WaveActiveBitXor,
-  IOP_WaveActiveCountBits,
-  IOP_WaveActiveMax,
-  IOP_WaveActiveMin,
-  IOP_WaveActiveProduct,
-  IOP_WaveActiveSum,
-  IOP_WaveGetLaneCount,
-  IOP_WaveGetLaneIndex,
-  IOP_WaveIsFirstLane,
-  IOP_WaveMatch,
-  IOP_WaveMultiPrefixBitAnd,
-  IOP_WaveMultiPrefixBitOr,
-  IOP_WaveMultiPrefixBitXor,
-  IOP_WaveMultiPrefixCountBits,
-  IOP_WaveMultiPrefixProduct,
-  IOP_WaveMultiPrefixSum,
-  IOP_WavePrefixCountBits,
-  IOP_WavePrefixProduct,
-  IOP_WavePrefixSum,
-  IOP_WaveReadLaneAt,
-  IOP_WaveReadLaneFirst,
-  IOP_WorldRayDirection,
-  IOP_WorldRayOrigin,
-  IOP_WorldToObject,
-  IOP_WorldToObject3x4,
-  IOP_WorldToObject4x3,
-  IOP_abort,
-  IOP_abs,
-  IOP_acos,
-  IOP_all,
-  IOP_and,
-  IOP_any,
-  IOP_asdouble,
-  IOP_asfloat,
-  IOP_asfloat16,
-  IOP_asin,
-  IOP_asint,
-  IOP_asint16,
-  IOP_asuint,
-  IOP_asuint16,
-  IOP_atan,
-  IOP_atan2,
-  IOP_ceil,
-  IOP_clamp,
-  IOP_clip,
-  IOP_cos,
-  IOP_cosh,
-  IOP_countbits,
-  IOP_cross,
-  IOP_ddx,
-  IOP_ddx_coarse,
-  IOP_ddx_fine,
-  IOP_ddy,
-  IOP_ddy_coarse,
-  IOP_ddy_fine,
-  IOP_degrees,
-  IOP_determinant,
-  IOP_distance,
-  IOP_dot,
-  IOP_dot2add,
-  IOP_dot4add_i8packed,
-  IOP_dot4add_u8packed,
-  IOP_dst,
-  IOP_exp,
-  IOP_exp2,
-  IOP_f16tof32,
-  IOP_f32tof16,
-  IOP_faceforward,
-  IOP_firstbithigh,
-  IOP_firstbitlow,
-  IOP_floor,
-  IOP_fma,
-  IOP_fmod,
-  IOP_frac,
-  IOP_frexp,
-  IOP_fwidth,
-  IOP_isfinite,
-  IOP_isinf,
-  IOP_isnan,
-  IOP_ldexp,
-  IOP_length,
-  IOP_lerp,
-  IOP_lit,
-  IOP_log,
-  IOP_log10,
-  IOP_log2,
-  IOP_mad,
-  IOP_max,
-  IOP_min,
-  IOP_modf,
-  IOP_msad4,
-  IOP_mul,
-  IOP_normalize,
-  IOP_or,
-  IOP_pack_clamp_s8,
-  IOP_pack_clamp_u8,
-  IOP_pack_s8,
-  IOP_pack_u8,
-  IOP_pow,
-  IOP_printf,
-  IOP_radians,
-  IOP_rcp,
-  IOP_reflect,
-  IOP_refract,
-  IOP_reversebits,
-  IOP_round,
-  IOP_rsqrt,
-  IOP_saturate,
-  IOP_select,
-  IOP_sign,
-  IOP_sin,
-  IOP_sincos,
-  IOP_sinh,
-  IOP_smoothstep,
-  IOP_source_mark,
-  IOP_sqrt,
-  IOP_step,
-  IOP_tan,
-  IOP_tanh,
-  IOP_tex1D,
-  IOP_tex1Dbias,
-  IOP_tex1Dgrad,
-  IOP_tex1Dlod,
-  IOP_tex1Dproj,
-  IOP_tex2D,
-  IOP_tex2Dbias,
-  IOP_tex2Dgrad,
-  IOP_tex2Dlod,
-  IOP_tex2Dproj,
-  IOP_tex3D,
-  IOP_tex3Dbias,
-  IOP_tex3Dgrad,
-  IOP_tex3Dlod,
-  IOP_tex3Dproj,
-  IOP_texCUBE,
-  IOP_texCUBEbias,
-  IOP_texCUBEgrad,
-  IOP_texCUBElod,
-  IOP_texCUBEproj,
-  IOP_transpose,
-  IOP_trunc,
-  IOP_unpack_s8s16,
-  IOP_unpack_s8s32,
-  IOP_unpack_u8u16,
-  IOP_unpack_u8u32,
-#ifdef ENABLE_SPIRV_CODEGEN
-  IOP_VkRawBufferLoad,
-#endif // ENABLE_SPIRV_CODEGEN
-#ifdef ENABLE_SPIRV_CODEGEN
-  IOP_VkRawBufferStore,
-#endif // ENABLE_SPIRV_CODEGEN
-#ifdef ENABLE_SPIRV_CODEGEN
-  IOP_VkReadClock,
-#endif // ENABLE_SPIRV_CODEGEN
-#ifdef ENABLE_SPIRV_CODEGEN
-  IOP_Vkext_execution_mode,
-#endif // ENABLE_SPIRV_CODEGEN
-#ifdef ENABLE_SPIRV_CODEGEN
-  IOP_Vkext_execution_mode_id,
-#endif // ENABLE_SPIRV_CODEGEN
-  MOP_Append,
-  MOP_RestartStrip,
-  MOP_CalculateLevelOfDetail,
-  MOP_CalculateLevelOfDetailUnclamped,
-  MOP_GetDimensions,
-  MOP_Load,
-  MOP_Sample,
-  MOP_SampleBias,
-  MOP_SampleCmp,
-  MOP_SampleCmpBias,
-  MOP_SampleCmpGrad,
-  MOP_SampleCmpLevel,
-  MOP_SampleCmpLevelZero,
-  MOP_SampleGrad,
-  MOP_SampleLevel,
-  MOP_Gather,
-  MOP_GatherAlpha,
-  MOP_GatherBlue,
-  MOP_GatherCmp,
-  MOP_GatherCmpAlpha,
-  MOP_GatherCmpBlue,
-  MOP_GatherCmpGreen,
-  MOP_GatherCmpRed,
-  MOP_GatherGreen,
-  MOP_GatherRaw,
-  MOP_GatherRed,
-  MOP_GetSamplePosition,
-  MOP_Load2,
-  MOP_Load3,
-  MOP_Load4,
-  MOP_InterlockedAdd,
-  MOP_InterlockedAdd64,
-  MOP_InterlockedAnd,
-  MOP_InterlockedAnd64,
-  MOP_InterlockedCompareExchange,
-  MOP_InterlockedCompareExchange64,
-  MOP_InterlockedCompareExchangeFloatBitwise,
-  MOP_InterlockedCompareStore,
-  MOP_InterlockedCompareStore64,
-  MOP_InterlockedCompareStoreFloatBitwise,
-  MOP_InterlockedExchange,
-  MOP_InterlockedExchange64,
-  MOP_InterlockedExchangeFloat,
-  MOP_InterlockedMax,
-  MOP_InterlockedMax64,
-  MOP_InterlockedMin,
-  MOP_InterlockedMin64,
-  MOP_InterlockedOr,
-  MOP_InterlockedOr64,
-  MOP_InterlockedXor,
-  MOP_InterlockedXor64,
-  MOP_Store,
-  MOP_Store2,
-  MOP_Store3,
-  MOP_Store4,
-  MOP_DecrementCounter,
-  MOP_IncrementCounter,
-  MOP_Consume,
-  MOP_WriteSamplerFeedback,
-  MOP_WriteSamplerFeedbackBias,
-  MOP_WriteSamplerFeedbackGrad,
-  MOP_WriteSamplerFeedbackLevel,
-  MOP_Abort,
-  MOP_CandidateGeometryIndex,
-  MOP_CandidateInstanceContributionToHitGroupIndex,
-  MOP_CandidateInstanceID,
-  MOP_CandidateInstanceIndex,
-  MOP_CandidateObjectRayDirection,
-  MOP_CandidateObjectRayOrigin,
-  MOP_CandidateObjectToWorld3x4,
-  MOP_CandidateObjectToWorld4x3,
-  MOP_CandidatePrimitiveIndex,
-  MOP_CandidateProceduralPrimitiveNonOpaque,
-  MOP_CandidateTriangleBarycentrics,
-  MOP_CandidateTriangleFrontFace,
-  MOP_CandidateTriangleRayT,
-  MOP_CandidateType,
-  MOP_CandidateWorldToObject3x4,
-  MOP_CandidateWorldToObject4x3,
-  MOP_CommitNonOpaqueTriangleHit,
-  MOP_CommitProceduralPrimitiveHit,
-  MOP_CommittedGeometryIndex,
-  MOP_CommittedInstanceContributionToHitGroupIndex,
-  MOP_CommittedInstanceID,
-  MOP_CommittedInstanceIndex,
-  MOP_CommittedObjectRayDirection,
-  MOP_CommittedObjectRayOrigin,
-  MOP_CommittedObjectToWorld3x4,
-  MOP_CommittedObjectToWorld4x3,
-  MOP_CommittedPrimitiveIndex,
-  MOP_CommittedRayT,
-  MOP_CommittedStatus,
-  MOP_CommittedTriangleBarycentrics,
-  MOP_CommittedTriangleFrontFace,
-  MOP_CommittedWorldToObject3x4,
-  MOP_CommittedWorldToObject4x3,
-  MOP_Proceed,
-  MOP_RayFlags,
-  MOP_RayTMin,
-  MOP_TraceRayInline,
-  MOP_WorldRayDirection,
-  MOP_WorldRayOrigin,
-  MOP_Count,
-  MOP_FinishedCrossGroupSharing,
-  MOP_GetGroupNodeOutputRecords,
-  MOP_GetThreadNodeOutputRecords,
-  MOP_IsValid,
-  MOP_GroupIncrementOutputCount,
-  MOP_ThreadIncrementOutputCount,
-  MOP_OutputComplete,
-#ifdef ENABLE_SPIRV_CODEGEN
-  MOP_SubpassLoad,
-#endif // ENABLE_SPIRV_CODEGEN
+  IOP_AcceptHitAndEndSearch = 0,
+  IOP_AddUint64 = 1,
+  IOP_AllMemoryBarrier = 2,
+  IOP_AllMemoryBarrierWithGroupSync = 3,
+  IOP_AllocateRayQuery = 4,
+  IOP_Barrier = 5,
+  IOP_CallShader = 6,
+  IOP_CheckAccessFullyMapped = 7,
+  IOP_CreateResourceFromHeap = 8,
+  IOP_D3DCOLORtoUBYTE4 = 9,
+  IOP_DeviceMemoryBarrier = 10,
+  IOP_DeviceMemoryBarrierWithGroupSync = 11,
+  IOP_DispatchMesh = 12,
+  IOP_DispatchRaysDimensions = 13,
+  IOP_DispatchRaysIndex = 14,
+  IOP_EvaluateAttributeAtSample = 15,
+  IOP_EvaluateAttributeCentroid = 16,
+  IOP_EvaluateAttributeSnapped = 17,
+  IOP_GeometryIndex = 18,
+  IOP_GetAttributeAtVertex = 19,
+  IOP_GetRemainingRecursionLevels = 20,
+  IOP_GetRenderTargetSampleCount = 21,
+  IOP_GetRenderTargetSamplePosition = 22,
+  IOP_GroupMemoryBarrier = 23,
+  IOP_GroupMemoryBarrierWithGroupSync = 24,
+  IOP_HitKind = 25,
+  IOP_IgnoreHit = 26,
+  IOP_InstanceID = 27,
+  IOP_InstanceIndex = 28,
+  IOP_InterlockedAdd = 29,
+  IOP_InterlockedAnd = 30,
+  IOP_InterlockedCompareExchange = 31,
+  IOP_InterlockedCompareExchangeFloatBitwise = 32,
+  IOP_InterlockedCompareStore = 33,
+  IOP_InterlockedCompareStoreFloatBitwise = 34,
+  IOP_InterlockedExchange = 35,
+  IOP_InterlockedMax = 36,
+  IOP_InterlockedMin = 37,
+  IOP_InterlockedOr = 38,
+  IOP_InterlockedXor = 39,
+  IOP_IsHelperLane = 40,
+  IOP_NonUniformResourceIndex = 41,
+  IOP_ObjectRayDirection = 42,
+  IOP_ObjectRayOrigin = 43,
+  IOP_ObjectToWorld = 44,
+  IOP_ObjectToWorld3x4 = 45,
+  IOP_ObjectToWorld4x3 = 46,
+  IOP_PrimitiveIndex = 47,
+  IOP_Process2DQuadTessFactorsAvg = 48,
+  IOP_Process2DQuadTessFactorsMax = 49,
+  IOP_Process2DQuadTessFactorsMin = 50,
+  IOP_ProcessIsolineTessFactors = 51,
+  IOP_ProcessQuadTessFactorsAvg = 52,
+  IOP_ProcessQuadTessFactorsMax = 53,
+  IOP_ProcessQuadTessFactorsMin = 54,
+  IOP_ProcessTriTessFactorsAvg = 55,
+  IOP_ProcessTriTessFactorsMax = 56,
+  IOP_ProcessTriTessFactorsMin = 57,
+  IOP_QuadAll = 58,
+  IOP_QuadAny = 59,
+  IOP_QuadReadAcrossDiagonal = 60,
+  IOP_QuadReadAcrossX = 61,
+  IOP_QuadReadAcrossY = 62,
+  IOP_QuadReadLaneAt = 63,
+  IOP_RayFlags = 64,
+  IOP_RayTCurrent = 65,
+  IOP_RayTMin = 66,
+  IOP_ReportHit = 67,
+  IOP_SetMeshOutputCounts = 68,
+  IOP_TraceRay = 69,
+  IOP_WaveActiveAllEqual = 70,
+  IOP_WaveActiveAllTrue = 71,
+  IOP_WaveActiveAnyTrue = 72,
+  IOP_WaveActiveBallot = 73,
+  IOP_WaveActiveBitAnd = 74,
+  IOP_WaveActiveBitOr = 75,
+  IOP_WaveActiveBitXor = 76,
+  IOP_WaveActiveCountBits = 77,
+  IOP_WaveActiveMax = 78,
+  IOP_WaveActiveMin = 79,
+  IOP_WaveActiveProduct = 80,
+  IOP_WaveActiveSum = 81,
+  IOP_WaveGetLaneCount = 82,
+  IOP_WaveGetLaneIndex = 83,
+  IOP_WaveIsFirstLane = 84,
+  IOP_WaveMatch = 85,
+  IOP_WaveMultiPrefixBitAnd = 86,
+  IOP_WaveMultiPrefixBitOr = 87,
+  IOP_WaveMultiPrefixBitXor = 88,
+  IOP_WaveMultiPrefixCountBits = 89,
+  IOP_WaveMultiPrefixProduct = 90,
+  IOP_WaveMultiPrefixSum = 91,
+  IOP_WavePrefixCountBits = 92,
+  IOP_WavePrefixProduct = 93,
+  IOP_WavePrefixSum = 94,
+  IOP_WaveReadLaneAt = 95,
+  IOP_WaveReadLaneFirst = 96,
+  IOP_WorldRayDirection = 97,
+  IOP_WorldRayOrigin = 98,
+  IOP_WorldToObject = 99,
+  IOP_WorldToObject3x4 = 100,
+  IOP_WorldToObject4x3 = 101,
+  IOP_abort = 102,
+  IOP_abs = 103,
+  IOP_acos = 104,
+  IOP_all = 105,
+  IOP_and = 106,
+  IOP_any = 107,
+  IOP_asdouble = 108,
+  IOP_asfloat = 109,
+  IOP_asfloat16 = 110,
+  IOP_asin = 111,
+  IOP_asint = 112,
+  IOP_asint16 = 113,
+  IOP_asuint = 114,
+  IOP_asuint16 = 115,
+  IOP_atan = 116,
+  IOP_atan2 = 117,
+  IOP_ceil = 118,
+  IOP_clamp = 119,
+  IOP_clip = 120,
+  IOP_cos = 121,
+  IOP_cosh = 122,
+  IOP_countbits = 123,
+  IOP_cross = 124,
+  IOP_ddx = 125,
+  IOP_ddx_coarse = 126,
+  IOP_ddx_fine = 127,
+  IOP_ddy = 128,
+  IOP_ddy_coarse = 129,
+  IOP_ddy_fine = 130,
+  IOP_degrees = 131,
+  IOP_determinant = 132,
+  IOP_distance = 133,
+  IOP_dot = 134,
+  IOP_dot2add = 135,
+  IOP_dot4add_i8packed = 136,
+  IOP_dot4add_u8packed = 137,
+  IOP_dst = 138,
+  IOP_exp = 139,
+  IOP_exp2 = 140,
+  IOP_f16tof32 = 141,
+  IOP_f32tof16 = 142,
+  IOP_faceforward = 143,
+  IOP_firstbithigh = 144,
+  IOP_firstbitlow = 145,
+  IOP_floor = 146,
+  IOP_fma = 147,
+  IOP_fmod = 148,
+  IOP_frac = 149,
+  IOP_frexp = 150,
+  IOP_fwidth = 151,
+  IOP_isfinite = 152,
+  IOP_isinf = 153,
+  IOP_isnan = 154,
+  IOP_ldexp = 155,
+  IOP_length = 156,
+  IOP_lerp = 157,
+  IOP_lit = 158,
+  IOP_log = 159,
+  IOP_log10 = 160,
+  IOP_log2 = 161,
+  IOP_mad = 162,
+  IOP_max = 163,
+  IOP_min = 164,
+  IOP_modf = 165,
+  IOP_msad4 = 166,
+  IOP_mul = 167,
+  IOP_normalize = 168,
+  IOP_or = 169,
+  IOP_pack_clamp_s8 = 170,
+  IOP_pack_clamp_u8 = 171,
+  IOP_pack_s8 = 172,
+  IOP_pack_u8 = 173,
+  IOP_pow = 174,
+  IOP_printf = 175,
+  IOP_radians = 176,
+  IOP_rcp = 177,
+  IOP_reflect = 178,
+  IOP_refract = 179,
+  IOP_reversebits = 180,
+  IOP_round = 181,
+  IOP_rsqrt = 182,
+  IOP_saturate = 183,
+  IOP_select = 184,
+  IOP_sign = 185,
+  IOP_sin = 186,
+  IOP_sincos = 187,
+  IOP_sinh = 188,
+  IOP_smoothstep = 189,
+  IOP_source_mark = 190,
+  IOP_sqrt = 191,
+  IOP_step = 192,
+  IOP_tan = 193,
+  IOP_tanh = 194,
+  IOP_tex1D = 195,
+  IOP_tex1Dbias = 196,
+  IOP_tex1Dgrad = 197,
+  IOP_tex1Dlod = 198,
+  IOP_tex1Dproj = 199,
+  IOP_tex2D = 200,
+  IOP_tex2Dbias = 201,
+  IOP_tex2Dgrad = 202,
+  IOP_tex2Dlod = 203,
+  IOP_tex2Dproj = 204,
+  IOP_tex3D = 205,
+  IOP_tex3Dbias = 206,
+  IOP_tex3Dgrad = 207,
+  IOP_tex3Dlod = 208,
+  IOP_tex3Dproj = 209,
+  IOP_texCUBE = 210,
+  IOP_texCUBEbias = 211,
+  IOP_texCUBEgrad = 212,
+  IOP_texCUBElod = 213,
+  IOP_texCUBEproj = 214,
+  IOP_transpose = 215,
+  IOP_trunc = 216,
+  IOP_unpack_s8s16 = 217,
+  IOP_unpack_s8s32 = 218,
+  IOP_unpack_u8u16 = 219,
+  IOP_unpack_u8u32 = 220,
+  IOP_VkRawBufferLoad = 221,
+  IOP_VkRawBufferStore = 222,
+  IOP_VkReadClock = 223,
+  IOP_Vkext_execution_mode = 224,
+  IOP_Vkext_execution_mode_id = 225,
+  IOP_Vkreinterpret_pointer_cast = 360,
+  IOP_Vkstatic_pointer_cast = 361,
+  MOP_GetBufferContents = 362,
+  MOP_Append = 226,
+  MOP_RestartStrip = 227,
+  MOP_CalculateLevelOfDetail = 228,
+  MOP_CalculateLevelOfDetailUnclamped = 229,
+  MOP_GetDimensions = 230,
+  MOP_Load = 231,
+  MOP_Sample = 232,
+  MOP_SampleBias = 233,
+  MOP_SampleCmp = 234,
+  MOP_SampleCmpBias = 235,
+  MOP_SampleCmpGrad = 236,
+  MOP_SampleCmpLevel = 237,
+  MOP_SampleCmpLevelZero = 238,
+  MOP_SampleGrad = 239,
+  MOP_SampleLevel = 240,
+  MOP_Gather = 241,
+  MOP_GatherAlpha = 242,
+  MOP_GatherBlue = 243,
+  MOP_GatherCmp = 244,
+  MOP_GatherCmpAlpha = 245,
+  MOP_GatherCmpBlue = 246,
+  MOP_GatherCmpGreen = 247,
+  MOP_GatherCmpRed = 248,
+  MOP_GatherGreen = 249,
+  MOP_GatherRaw = 250,
+  MOP_GatherRed = 251,
+  MOP_GetSamplePosition = 252,
+  MOP_Load2 = 253,
+  MOP_Load3 = 254,
+  MOP_Load4 = 255,
+  MOP_InterlockedAdd = 256,
+  MOP_InterlockedAdd64 = 257,
+  MOP_InterlockedAnd = 258,
+  MOP_InterlockedAnd64 = 259,
+  MOP_InterlockedCompareExchange = 260,
+  MOP_InterlockedCompareExchange64 = 261,
+  MOP_InterlockedCompareExchangeFloatBitwise = 262,
+  MOP_InterlockedCompareStore = 263,
+  MOP_InterlockedCompareStore64 = 264,
+  MOP_InterlockedCompareStoreFloatBitwise = 265,
+  MOP_InterlockedExchange = 266,
+  MOP_InterlockedExchange64 = 267,
+  MOP_InterlockedExchangeFloat = 268,
+  MOP_InterlockedMax = 269,
+  MOP_InterlockedMax64 = 270,
+  MOP_InterlockedMin = 271,
+  MOP_InterlockedMin64 = 272,
+  MOP_InterlockedOr = 273,
+  MOP_InterlockedOr64 = 274,
+  MOP_InterlockedXor = 275,
+  MOP_InterlockedXor64 = 276,
+  MOP_Store = 277,
+  MOP_Store2 = 278,
+  MOP_Store3 = 279,
+  MOP_Store4 = 280,
+  MOP_DecrementCounter = 281,
+  MOP_IncrementCounter = 282,
+  MOP_Consume = 283,
+  MOP_WriteSamplerFeedback = 284,
+  MOP_WriteSamplerFeedbackBias = 285,
+  MOP_WriteSamplerFeedbackGrad = 286,
+  MOP_WriteSamplerFeedbackLevel = 287,
+  MOP_Abort = 288,
+  MOP_CandidateGeometryIndex = 289,
+  MOP_CandidateInstanceContributionToHitGroupIndex = 290,
+  MOP_CandidateInstanceID = 291,
+  MOP_CandidateInstanceIndex = 292,
+  MOP_CandidateObjectRayDirection = 293,
+  MOP_CandidateObjectRayOrigin = 294,
+  MOP_CandidateObjectToWorld3x4 = 295,
+  MOP_CandidateObjectToWorld4x3 = 296,
+  MOP_CandidatePrimitiveIndex = 297,
+  MOP_CandidateProceduralPrimitiveNonOpaque = 298,
+  MOP_CandidateTriangleBarycentrics = 299,
+  MOP_CandidateTriangleFrontFace = 300,
+  MOP_CandidateTriangleRayT = 301,
+  MOP_CandidateType = 302,
+  MOP_CandidateWorldToObject3x4 = 303,
+  MOP_CandidateWorldToObject4x3 = 304,
+  MOP_CommitNonOpaqueTriangleHit = 305,
+  MOP_CommitProceduralPrimitiveHit = 306,
+  MOP_CommittedGeometryIndex = 307,
+  MOP_CommittedInstanceContributionToHitGroupIndex = 308,
+  MOP_CommittedInstanceID = 309,
+  MOP_CommittedInstanceIndex = 310,
+  MOP_CommittedObjectRayDirection = 311,
+  MOP_CommittedObjectRayOrigin = 312,
+  MOP_CommittedObjectToWorld3x4 = 313,
+  MOP_CommittedObjectToWorld4x3 = 314,
+  MOP_CommittedPrimitiveIndex = 315,
+  MOP_CommittedRayT = 316,
+  MOP_CommittedStatus = 317,
+  MOP_CommittedTriangleBarycentrics = 318,
+  MOP_CommittedTriangleFrontFace = 319,
+  MOP_CommittedWorldToObject3x4 = 320,
+  MOP_CommittedWorldToObject4x3 = 321,
+  MOP_Proceed = 322,
+  MOP_RayFlags = 323,
+  MOP_RayTMin = 324,
+  MOP_TraceRayInline = 325,
+  MOP_WorldRayDirection = 326,
+  MOP_WorldRayOrigin = 327,
+  MOP_DxHitObject_FromRayQuery = 363,
+  MOP_DxHitObject_GetAttributes = 364,
+  MOP_DxHitObject_GetGeometryIndex = 365,
+  MOP_DxHitObject_GetHitKind = 366,
+  MOP_DxHitObject_GetInstanceID = 367,
+  MOP_DxHitObject_GetInstanceIndex = 368,
+  MOP_DxHitObject_GetObjectRayDirection = 369,
+  MOP_DxHitObject_GetObjectRayOrigin = 370,
+  MOP_DxHitObject_GetObjectToWorld3x4 = 371,
+  MOP_DxHitObject_GetObjectToWorld4x3 = 372,
+  MOP_DxHitObject_GetPrimitiveIndex = 373,
+  MOP_DxHitObject_GetRayFlags = 374,
+  MOP_DxHitObject_GetRayTCurrent = 375,
+  MOP_DxHitObject_GetRayTMin = 376,
+  MOP_DxHitObject_GetShaderTableIndex = 377,
+  MOP_DxHitObject_GetWorldRayDirection = 378,
+  MOP_DxHitObject_GetWorldRayOrigin = 379,
+  MOP_DxHitObject_GetWorldToObject3x4 = 380,
+  MOP_DxHitObject_GetWorldToObject4x3 = 381,
+  MOP_DxHitObject_Invoke = 382,
+  MOP_DxHitObject_IsHit = 383,
+  MOP_DxHitObject_IsMiss = 384,
+  MOP_DxHitObject_IsNop = 385,
+  MOP_DxHitObject_LoadLocalRootTableConstant = 386,
+  MOP_DxHitObject_MakeMiss = 387,
+  MOP_DxHitObject_MakeNop = 358,
+  MOP_DxHitObject_SetShaderTableIndex = 388,
+  MOP_DxHitObject_TraceRay = 389,
+  IOP_DxMaybeReorderThread = 359,
+  MOP_Count = 328,
+  MOP_FinishedCrossGroupSharing = 329,
+  MOP_GetGroupNodeOutputRecords = 330,
+  MOP_GetThreadNodeOutputRecords = 331,
+  MOP_IsValid = 332,
+  MOP_GroupIncrementOutputCount = 333,
+  MOP_ThreadIncrementOutputCount = 334,
+  MOP_OutputComplete = 335,
+  MOP_SubpassLoad = 336,
   // unsigned
-  IOP_InterlockedUMax,
-  IOP_InterlockedUMin,
-  IOP_WaveActiveUMax,
-  IOP_WaveActiveUMin,
-  IOP_WaveActiveUProduct,
-  IOP_WaveActiveUSum,
-  IOP_WaveMultiPrefixUProduct,
-  IOP_WaveMultiPrefixUSum,
-  IOP_WavePrefixUProduct,
-  IOP_WavePrefixUSum,
-  IOP_uabs,
-  IOP_uclamp,
-  IOP_udot,
-  IOP_ufirstbithigh,
-  IOP_umad,
-  IOP_umax,
-  IOP_umin,
-  IOP_umul,
-  IOP_usign,
-  MOP_InterlockedUMax,
-  MOP_InterlockedUMin,
-  Num_Intrinsics,
+  IOP_InterlockedUMax = 337,
+  IOP_InterlockedUMin = 338,
+  IOP_WaveActiveUMax = 339,
+  IOP_WaveActiveUMin = 340,
+  IOP_WaveActiveUProduct = 341,
+  IOP_WaveActiveUSum = 342,
+  IOP_WaveMultiPrefixUProduct = 343,
+  IOP_WaveMultiPrefixUSum = 344,
+  IOP_WavePrefixUProduct = 345,
+  IOP_WavePrefixUSum = 346,
+  IOP_uabs = 347,
+  IOP_uclamp = 348,
+  IOP_udot = 349,
+  IOP_ufirstbithigh = 350,
+  IOP_umad = 351,
+  IOP_umax = 352,
+  IOP_umin = 353,
+  IOP_umul = 354,
+  IOP_usign = 355,
+  MOP_InterlockedUMax = 356,
+  MOP_InterlockedUMin = 357,
+  Num_Intrinsics = 390,
 };
 inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) {
   switch (opcode) {
diff --git a/include/dxc/Support/HLSLOptions.h b/include/dxc/Support/HLSLOptions.h
index 887591ae82..56e95a1659 100644
--- a/include/dxc/Support/HLSLOptions.h
+++ b/include/dxc/Support/HLSLOptions.h
@@ -274,6 +274,8 @@ class DxcOpts {
       SpirvOptions; // All SPIR-V CodeGen-related options
 #endif
   // SPIRV Change Ends
+
+  bool GenMetal = false; // OPT_metal
 };
 
 /// Use this class to capture, convert and handle the lifetime for the
diff --git a/include/dxc/Support/HLSLOptions.td b/include/dxc/Support/HLSLOptions.td
index 130e19a525..ea000f4877 100644
--- a/include/dxc/Support/HLSLOptions.td
+++ b/include/dxc/Support/HLSLOptions.td
@@ -346,6 +346,11 @@ def disable_exception_handling : Flag<["-", "/"], "disable-exception-handling">,
 def skip_serialization : Flag<["-", "/"], "skip-serialization">, Group<hlslcore_Group>, Flags<[CoreOption, HelpHidden]>,
   HelpText<"Return a module interface instead of serialized output">;
 
+def metal : Flag<["-"], "metal">,
+            Group<spirv_Group>,
+            Flags<[CoreOption, DriverOption]>,
+            HelpText<"Generate Metal code">;
+
 // SPIRV Change Starts
 def spirv : Flag<["-"], "spirv">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
   HelpText<"Generate SPIR-V code">;
diff --git a/include/dxc/WinAdapter.h b/include/dxc/WinAdapter.h
index b8c6646871..d02ad1ac38 100644
--- a/include/dxc/WinAdapter.h
+++ b/include/dxc/WinAdapter.h
@@ -51,7 +51,8 @@
 #define _countof(a) (sizeof(a) / sizeof(*(a)))
 
 // If it is GCC, there is no UUID support and we must emulate it.
-#ifndef __clang__
+// Clang support depends on the -fms-extensions compiler flag.
+#if !defined(__clang__) || !defined(_MSC_EXTENSIONS)
 #define __EMULATE_UUID 1
 #endif // __clang__
 
diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h
index b0f9a467a4..28bd3e7066 100644
--- a/include/dxc/dxcapi.internal.h
+++ b/include/dxc/dxcapi.internal.h
@@ -7,6 +7,9 @@
 //                                                                           //
 // Provides non-public declarations for the DirectX Compiler component.      //
 //                                                                           //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.              //
+// All rights reserved.                                                      //
+//                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
 
 #ifndef __DXC_API_INTERNAL__
@@ -35,6 +38,7 @@ typedef struct ID3D10Blob ID3D10Blob;
 static const BYTE INTRIN_TEMPLATE_FROM_TYPE = 0xff;
 static const BYTE INTRIN_TEMPLATE_VARARGS = 0xfe;
 static const BYTE INTRIN_TEMPLATE_FROM_FUNCTION = 0xfd;
+static const BYTE INTRIN_TEMPLATE_FROM_FUNCTION_2 = 0xfc;
 
 // Use this enumeration to describe allowed templates (layouts) in intrinsics.
 enum LEGAL_INTRINSIC_TEMPLATES {
@@ -126,7 +130,15 @@ enum LEGAL_INTRINSIC_COMPTYPES {
   LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS = 49,
   LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS = 50,
 
-  LICOMPTYPE_COUNT = 51
+  LICOMPTYPE_HIT_OBJECT = 51,
+  LICOMPTYPE_RAY_QUERY = 52,
+
+#ifdef ENABLE_SPIRV_CODEGEN
+  LICOMPTYPE_VK_BUFFER_POINTER = 53,
+  LICOMPTYPE_COUNT = 54
+#else
+  LICOMPTYPE_COUNT = 53
+#endif
 };
 
 static const BYTE IA_SPECIAL_BASE = 0xf0;
@@ -160,11 +172,17 @@ struct HLSL_INTRINSIC_ARGUMENT {
               // matching input constraints.
 };
 
+// HLSL_INTRINSIC flags
+static const UINT INTRIN_FLAG_READ_ONLY = 1U << 0;
+static const UINT INTRIN_FLAG_READ_NONE = 1U << 1;
+static const UINT INTRIN_FLAG_IS_WAVE = 1U << 2;
+static const UINT INTRIN_FLAG_STATIC_MEMBER = 1U << 3;
+
 struct HLSL_INTRINSIC {
   UINT Op;                 // Intrinsic Op ID
-  BOOL bReadOnly;          // Only read memory
-  BOOL bReadNone;          // Not read memory
-  BOOL bIsWave;            // Is a wave-sensitive op
+  UINT Flags;              // INTRIN_FLAG_* flags
+  UINT MinShaderModel;     // Encoded minimum shader model, 0 = no minimum
+                           // (Major << 4) + (Minor & 0xf)
   INT iOverloadParamIndex; // Parameter decide the overload type, -1 means ret
                            // type
   UINT uNumArgs;           // Count of arguments in pArgs.
diff --git a/lib/DXIL/DxilMetadataHelper.cpp b/lib/DXIL/DxilMetadataHelper.cpp
index fdd6d6b946..c1282a980a 100644
--- a/lib/DXIL/DxilMetadataHelper.cpp
+++ b/lib/DXIL/DxilMetadataHelper.cpp
@@ -177,17 +177,28 @@ void DxilMDHelper::EmitDxilVersion(unsigned Major, unsigned Minor) {
   pDxilVersionMD->addOperand(MDNode::get(m_Ctx, MDVals));
 }
 
-void DxilMDHelper::LoadDxilVersion(unsigned &Major, unsigned &Minor) {
-  NamedMDNode *pDxilVersionMD = m_pModule->getNamedMetadata(kDxilVersionMDName);
-  IFTBOOL(pDxilVersionMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
-  IFTBOOL(pDxilVersionMD->getNumOperands() == 1, DXC_E_INCORRECT_DXIL_METADATA);
+// Load dxil version from metadata contained in pModule.
+// Returns true and passes result through
+// the dxil major/minor version params if valid.
+// Returns false if metadata is missing or invalid.
+bool DxilMDHelper::LoadDxilVersion(const Module *pModule, unsigned &Major,
+                                   unsigned &Minor) {
+  NamedMDNode *pDxilVersionMD = pModule->getNamedMetadata(kDxilVersionMDName);
+  IFRBOOL(pDxilVersionMD != nullptr, false);
+  IFRBOOL(pDxilVersionMD->getNumOperands() == 1, false);
 
   MDNode *pVersionMD = pDxilVersionMD->getOperand(0);
-  IFTBOOL(pVersionMD->getNumOperands() == kDxilVersionNumFields,
-          DXC_E_INCORRECT_DXIL_METADATA);
+  IFRBOOL(pVersionMD->getNumOperands() == kDxilVersionNumFields, false);
 
   Major = ConstMDToUint32(pVersionMD->getOperand(kDxilVersionMajorIdx));
   Minor = ConstMDToUint32(pVersionMD->getOperand(kDxilVersionMinorIdx));
+
+  return true;
+}
+
+void DxilMDHelper::LoadDxilVersion(unsigned &Major, unsigned &Minor) {
+  IFTBOOL(LoadDxilVersion(m_pModule, Major, Minor),
+          DXC_E_INCORRECT_DXIL_METADATA);
 }
 
 //
@@ -3099,6 +3110,13 @@ void DxilExtraPropertyHelper::EmitUAVProperties(
         DxilMDHelper::kDxilAtomic64UseTag, m_Ctx));
     MDVals.emplace_back(DxilMDHelper::Uint32ToConstMD((unsigned)true, m_Ctx));
   }
+  // Whether resource is reordercoherent.
+  if (DXIL::CompareVersions(m_ValMajor, m_ValMinor, 1, 9) >= 0 &&
+      UAV.IsReorderCoherent()) {
+    MDVals.emplace_back(DxilMDHelper::Uint32ToConstMD(
+        DxilMDHelper::kDxilReorderCoherentTag, m_Ctx));
+    MDVals.emplace_back(DxilMDHelper::BoolToConstMD(true, m_Ctx));
+  }
 }
 
 void DxilExtraPropertyHelper::LoadUAVProperties(const MDOperand &MDO,
@@ -3136,6 +3154,9 @@ void DxilExtraPropertyHelper::LoadUAVProperties(const MDOperand &MDO,
     case DxilMDHelper::kDxilAtomic64UseTag:
       UAV.SetHasAtomic64Use(DxilMDHelper::ConstMDToBool(MDO));
       break;
+    case DxilMDHelper::kDxilReorderCoherentTag:
+      UAV.SetReorderCoherent(DxilMDHelper::ConstMDToBool(MDO));
+      break;
     default:
       DXASSERT(false, "Unknown resource record tag");
       m_bExtraMetadata = true;
diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp
index b3e552da18..f614ba9d14 100644
--- a/lib/DXIL/DxilOperations.cpp
+++ b/lib/DXIL/DxilOperations.cpp
@@ -10,6 +10,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 #include "dxc/DXIL/DxilOperations.h"
+#include "dxc/DXIL/DxilConstants.h"
 #include "dxc/DXIL/DxilInstructions.h"
 #include "dxc/DXIL/DxilModule.h"
 #include "dxc/Support/Global.h"
@@ -23,8 +24,6 @@
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
-using std::string;
-using std::vector;
 
 namespace hlsl {
 
@@ -41,2984 +40,2623 @@ import hctdb_instrhelp
 /* <py::lines('OPCODE-OLOADS')>hctdb_instrhelp.get_oloads_props()</py>*/
 // OPCODE-OLOADS:BEGIN
 const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
-    //   OpCode                       OpCode name,                OpCodeClass
-    //   OpCodeClass name,              void,     h,     f,     d,    i1,    i8,
-    //   i16,   i32,   i64,   udt,   obj,  function attribute
-    // Temporary, indexable, input, output registers void,     h,     f,     d,
-    // i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::TempRegLoad,
-        "TempRegLoad",
-        OCC::TempRegLoad,
-        "tempRegLoad",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::TempRegStore,
-        "TempRegStore",
-        OCC::TempRegStore,
-        "tempRegStore",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::MinPrecXRegLoad,
-        "MinPrecXRegLoad",
-        OCC::MinPrecXRegLoad,
-        "minPrecXRegLoad",
-        {false, true, false, false, false, false, true, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::MinPrecXRegStore,
-        "MinPrecXRegStore",
-        OCC::MinPrecXRegStore,
-        "minPrecXRegStore",
-        {false, true, false, false, false, false, true, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::LoadInput,
-        "LoadInput",
-        OCC::LoadInput,
-        "loadInput",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::StoreOutput,
-        "StoreOutput",
-        OCC::StoreOutput,
-        "storeOutput",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Unary float void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::FAbs,
-        "FAbs",
-        OCC::Unary,
-        "unary",
-        {false, true, true, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Saturate,
-        "Saturate",
-        OCC::Unary,
-        "unary",
-        {false, true, true, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::IsNaN,
-        "IsNaN",
-        OCC::IsSpecialFloat,
-        "isSpecialFloat",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::IsInf,
-        "IsInf",
-        OCC::IsSpecialFloat,
-        "isSpecialFloat",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::IsFinite,
-        "IsFinite",
-        OCC::IsSpecialFloat,
-        "isSpecialFloat",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::IsNormal,
-        "IsNormal",
-        OCC::IsSpecialFloat,
-        "isSpecialFloat",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Cos,
-        "Cos",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Sin,
-        "Sin",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Tan,
-        "Tan",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Acos,
-        "Acos",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Asin,
-        "Asin",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Atan,
-        "Atan",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Hcos,
-        "Hcos",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Hsin,
-        "Hsin",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Htan,
-        "Htan",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Exp,
-        "Exp",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Frc,
-        "Frc",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Log,
-        "Log",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Sqrt,
-        "Sqrt",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Rsqrt,
-        "Rsqrt",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Unary float - rounding void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::Round_ne,
-        "Round_ne",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Round_ni,
-        "Round_ni",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Round_pi,
-        "Round_pi",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Round_z,
-        "Round_z",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Unary int void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::Bfrev,
-        "Bfrev",
-        OCC::Unary,
-        "unary",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Countbits,
-        "Countbits",
-        OCC::UnaryBits,
-        "unaryBits",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::FirstbitLo,
-        "FirstbitLo",
-        OCC::UnaryBits,
-        "unaryBits",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Unary uint void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::FirstbitHi,
-        "FirstbitHi",
-        OCC::UnaryBits,
-        "unaryBits",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Unary int void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::FirstbitSHi,
-        "FirstbitSHi",
-        OCC::UnaryBits,
-        "unaryBits",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Binary float void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::FMax,
-        "FMax",
-        OCC::Binary,
-        "binary",
-        {false, true, true, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::FMin,
-        "FMin",
-        OCC::Binary,
-        "binary",
-        {false, true, true, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Binary int void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::IMax,
-        "IMax",
-        OCC::Binary,
-        "binary",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::IMin,
-        "IMin",
-        OCC::Binary,
-        "binary",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Binary uint void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::UMax,
-        "UMax",
-        OCC::Binary,
-        "binary",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::UMin,
-        "UMin",
-        OCC::Binary,
-        "binary",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Binary int with two outputs void,     h,     f,     d,    i1,    i8, i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::IMul,
-        "IMul",
-        OCC::BinaryWithTwoOuts,
-        "binaryWithTwoOuts",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Binary uint with two outputs void,     h,     f,     d,    i1,    i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::UMul,
-        "UMul",
-        OCC::BinaryWithTwoOuts,
-        "binaryWithTwoOuts",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::UDiv,
-        "UDiv",
-        OCC::BinaryWithTwoOuts,
-        "binaryWithTwoOuts",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Binary uint with carry or borrow void,     h,     f,     d,    i1,    i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::UAddc,
-        "UAddc",
-        OCC::BinaryWithCarryOrBorrow,
-        "binaryWithCarryOrBorrow",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::USubb,
-        "USubb",
-        OCC::BinaryWithCarryOrBorrow,
-        "binaryWithCarryOrBorrow",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Tertiary float void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::FMad,
-        "FMad",
-        OCC::Tertiary,
-        "tertiary",
-        {false, true, true, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Fma,
-        "Fma",
-        OCC::Tertiary,
-        "tertiary",
-        {false, false, false, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Tertiary int void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::IMad,
-        "IMad",
-        OCC::Tertiary,
-        "tertiary",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Tertiary uint void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::UMad,
-        "UMad",
-        OCC::Tertiary,
-        "tertiary",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Tertiary int void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::Msad,
-        "Msad",
-        OCC::Tertiary,
-        "tertiary",
-        {false, false, false, false, false, false, false, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Ibfe,
-        "Ibfe",
-        OCC::Tertiary,
-        "tertiary",
-        {false, false, false, false, false, false, false, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Tertiary uint void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::Ubfe,
-        "Ubfe",
-        OCC::Tertiary,
-        "tertiary",
-        {false, false, false, false, false, false, false, true, true, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Quaternary void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::Bfi,
-        "Bfi",
-        OCC::Quaternary,
-        "quaternary",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Dot void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,
-    // obj ,  function attribute
-    {
-        OC::Dot2,
-        "Dot2",
-        OCC::Dot2,
-        "dot2",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Dot3,
-        "Dot3",
-        OCC::Dot3,
-        "dot3",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Dot4,
-        "Dot4",
-        OCC::Dot4,
-        "dot4",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Resources void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::CreateHandle,
-        "CreateHandle",
-        OCC::CreateHandle,
-        "createHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::CBufferLoad,
-        "CBufferLoad",
-        OCC::CBufferLoad,
-        "cbufferLoad",
-        {false, true, true, true, false, true, true, true, true, false, false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::CBufferLoadLegacy,
-        "CBufferLoadLegacy",
-        OCC::CBufferLoadLegacy,
-        "cbufferLoadLegacy",
-        {false, true, true, true, false, false, true, true, true, false, false},
-        Attribute::ReadOnly,
-    },
-
-    // Resources - sample void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::Sample,
-        "Sample",
-        OCC::Sample,
-        "sample",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::SampleBias,
-        "SampleBias",
-        OCC::SampleBias,
-        "sampleBias",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::SampleLevel,
-        "SampleLevel",
-        OCC::SampleLevel,
-        "sampleLevel",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::SampleGrad,
-        "SampleGrad",
-        OCC::SampleGrad,
-        "sampleGrad",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::SampleCmp,
-        "SampleCmp",
-        OCC::SampleCmp,
-        "sampleCmp",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::SampleCmpLevelZero,
-        "SampleCmpLevelZero",
-        OCC::SampleCmpLevelZero,
-        "sampleCmpLevelZero",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Resources void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::TextureLoad,
-        "TextureLoad",
-        OCC::TextureLoad,
-        "textureLoad",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::TextureStore,
-        "TextureStore",
-        OCC::TextureStore,
-        "textureStore",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::BufferLoad,
-        "BufferLoad",
-        OCC::BufferLoad,
-        "bufferLoad",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::BufferStore,
-        "BufferStore",
-        OCC::BufferStore,
-        "bufferStore",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::BufferUpdateCounter,
-        "BufferUpdateCounter",
-        OCC::BufferUpdateCounter,
-        "bufferUpdateCounter",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::CheckAccessFullyMapped,
-        "CheckAccessFullyMapped",
-        OCC::CheckAccessFullyMapped,
-        "checkAccessFullyMapped",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::GetDimensions,
-        "GetDimensions",
-        OCC::GetDimensions,
-        "getDimensions",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Resources - gather void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::TextureGather,
-        "TextureGather",
-        OCC::TextureGather,
-        "textureGather",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::TextureGatherCmp,
-        "TextureGatherCmp",
-        OCC::TextureGatherCmp,
-        "textureGatherCmp",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Resources - sample void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::Texture2DMSGetSamplePosition,
-        "Texture2DMSGetSamplePosition",
-        OCC::Texture2DMSGetSamplePosition,
-        "texture2DMSGetSamplePosition",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RenderTargetGetSamplePosition,
-        "RenderTargetGetSamplePosition",
-        OCC::RenderTargetGetSamplePosition,
-        "renderTargetGetSamplePosition",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RenderTargetGetSampleCount,
-        "RenderTargetGetSampleCount",
-        OCC::RenderTargetGetSampleCount,
-        "renderTargetGetSampleCount",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Synchronization void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::AtomicBinOp,
-        "AtomicBinOp",
-        OCC::AtomicBinOp,
-        "atomicBinOp",
-        {false, false, false, false, false, false, false, true, true, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::AtomicCompareExchange,
-        "AtomicCompareExchange",
-        OCC::AtomicCompareExchange,
-        "atomicCompareExchange",
-        {false, false, false, false, false, false, false, true, true, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Barrier,
-        "Barrier",
-        OCC::Barrier,
-        "barrier",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::NoDuplicate,
-    },
-
-    // Derivatives void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::CalculateLOD,
-        "CalculateLOD",
-        OCC::CalculateLOD,
-        "calculateLOD",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Pixel shader void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::Discard,
-        "Discard",
-        OCC::Discard,
-        "discard",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Derivatives void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::DerivCoarseX,
-        "DerivCoarseX",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::DerivCoarseY,
-        "DerivCoarseY",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::DerivFineX,
-        "DerivFineX",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::DerivFineY,
-        "DerivFineY",
-        OCC::Unary,
-        "unary",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Pixel shader void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::EvalSnapped,
-        "EvalSnapped",
-        OCC::EvalSnapped,
-        "evalSnapped",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::EvalSampleIndex,
-        "EvalSampleIndex",
-        OCC::EvalSampleIndex,
-        "evalSampleIndex",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::EvalCentroid,
-        "EvalCentroid",
-        OCC::EvalCentroid,
-        "evalCentroid",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::SampleIndex,
-        "SampleIndex",
-        OCC::SampleIndex,
-        "sampleIndex",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Coverage,
-        "Coverage",
-        OCC::Coverage,
-        "coverage",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::InnerCoverage,
-        "InnerCoverage",
-        OCC::InnerCoverage,
-        "innerCoverage",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Compute/Mesh/Amplification/Node shader void,     h,     f,     d,    i1,
-    // i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::ThreadId,
-        "ThreadId",
-        OCC::ThreadId,
-        "threadId",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::GroupId,
-        "GroupId",
-        OCC::GroupId,
-        "groupId",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::ThreadIdInGroup,
-        "ThreadIdInGroup",
-        OCC::ThreadIdInGroup,
-        "threadIdInGroup",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::FlattenedThreadIdInGroup,
-        "FlattenedThreadIdInGroup",
-        OCC::FlattenedThreadIdInGroup,
-        "flattenedThreadIdInGroup",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Geometry shader void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::EmitStream,
-        "EmitStream",
-        OCC::EmitStream,
-        "emitStream",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::CutStream,
-        "CutStream",
-        OCC::CutStream,
-        "cutStream",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::EmitThenCutStream,
-        "EmitThenCutStream",
-        OCC::EmitThenCutStream,
-        "emitThenCutStream",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::GSInstanceID,
-        "GSInstanceID",
-        OCC::GSInstanceID,
-        "gsInstanceID",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Double precision void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::MakeDouble,
-        "MakeDouble",
-        OCC::MakeDouble,
-        "makeDouble",
-        {false, false, false, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::SplitDouble,
-        "SplitDouble",
-        OCC::SplitDouble,
-        "splitDouble",
-        {false, false, false, true, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Domain and hull shader void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::LoadOutputControlPoint,
-        "LoadOutputControlPoint",
-        OCC::LoadOutputControlPoint,
-        "loadOutputControlPoint",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::LoadPatchConstant,
-        "LoadPatchConstant",
-        OCC::LoadPatchConstant,
-        "loadPatchConstant",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Domain shader void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::DomainLocation,
-        "DomainLocation",
-        OCC::DomainLocation,
-        "domainLocation",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Hull shader void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::StorePatchConstant,
-        "StorePatchConstant",
-        OCC::StorePatchConstant,
-        "storePatchConstant",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::OutputControlPointID,
-        "OutputControlPointID",
-        OCC::OutputControlPointID,
-        "outputControlPointID",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Hull, Domain and Geometry shaders void,     h,     f,     d,    i1, i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::PrimitiveID,
-        "PrimitiveID",
-        OCC::PrimitiveID,
-        "primitiveID",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Other void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64, udt,
-    // obj ,  function attribute
-    {
-        OC::CycleCounterLegacy,
-        "CycleCounterLegacy",
-        OCC::CycleCounterLegacy,
-        "cycleCounterLegacy",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Wave void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,
-    // obj ,  function attribute
-    {
-        OC::WaveIsFirstLane,
-        "WaveIsFirstLane",
-        OCC::WaveIsFirstLane,
-        "waveIsFirstLane",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WaveGetLaneIndex,
-        "WaveGetLaneIndex",
-        OCC::WaveGetLaneIndex,
-        "waveGetLaneIndex",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::WaveGetLaneCount,
-        "WaveGetLaneCount",
-        OCC::WaveGetLaneCount,
-        "waveGetLaneCount",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::WaveAnyTrue,
-        "WaveAnyTrue",
-        OCC::WaveAnyTrue,
-        "waveAnyTrue",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WaveAllTrue,
-        "WaveAllTrue",
-        OCC::WaveAllTrue,
-        "waveAllTrue",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WaveActiveAllEqual,
-        "WaveActiveAllEqual",
-        OCC::WaveActiveAllEqual,
-        "waveActiveAllEqual",
-        {false, true, true, true, true, true, true, true, true, false, false},
-        Attribute::None,
-    },
-    {
-        OC::WaveActiveBallot,
-        "WaveActiveBallot",
-        OCC::WaveActiveBallot,
-        "waveActiveBallot",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WaveReadLaneAt,
-        "WaveReadLaneAt",
-        OCC::WaveReadLaneAt,
-        "waveReadLaneAt",
-        {false, true, true, true, true, true, true, true, true, false, false},
-        Attribute::None,
-    },
-    {
-        OC::WaveReadLaneFirst,
-        "WaveReadLaneFirst",
-        OCC::WaveReadLaneFirst,
-        "waveReadLaneFirst",
-        {false, true, true, true, true, true, true, true, true, false, false},
-        Attribute::None,
-    },
-    {
-        OC::WaveActiveOp,
-        "WaveActiveOp",
-        OCC::WaveActiveOp,
-        "waveActiveOp",
-        {false, true, true, true, true, true, true, true, true, false, false},
-        Attribute::None,
-    },
-    {
-        OC::WaveActiveBit,
-        "WaveActiveBit",
-        OCC::WaveActiveBit,
-        "waveActiveBit",
-        {false, false, false, false, false, true, true, true, true, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WavePrefixOp,
-        "WavePrefixOp",
-        OCC::WavePrefixOp,
-        "wavePrefixOp",
-        {false, true, true, true, false, true, true, true, true, false, false},
-        Attribute::None,
-    },
-
-    // Quad Wave Ops void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::QuadReadLaneAt,
-        "QuadReadLaneAt",
-        OCC::QuadReadLaneAt,
-        "quadReadLaneAt",
-        {false, true, true, true, true, true, true, true, true, false, false},
-        Attribute::None,
-    },
-    {
-        OC::QuadOp,
-        "QuadOp",
-        OCC::QuadOp,
-        "quadOp",
-        {false, true, true, true, false, true, true, true, true, false, false},
-        Attribute::None,
-    },
-
-    // Bitcasts with different sizes void,     h,     f,     d,    i1,    i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::BitcastI16toF16,
-        "BitcastI16toF16",
-        OCC::BitcastI16toF16,
-        "bitcastI16toF16",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::BitcastF16toI16,
-        "BitcastF16toI16",
-        OCC::BitcastF16toI16,
-        "bitcastF16toI16",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::BitcastI32toF32,
-        "BitcastI32toF32",
-        OCC::BitcastI32toF32,
-        "bitcastI32toF32",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::BitcastF32toI32,
-        "BitcastF32toI32",
-        OCC::BitcastF32toI32,
-        "bitcastF32toI32",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::BitcastI64toF64,
-        "BitcastI64toF64",
-        OCC::BitcastI64toF64,
-        "bitcastI64toF64",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::BitcastF64toI64,
-        "BitcastF64toI64",
-        OCC::BitcastF64toI64,
-        "bitcastF64toI64",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Legacy floating-point void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::LegacyF32ToF16,
-        "LegacyF32ToF16",
-        OCC::LegacyF32ToF16,
-        "legacyF32ToF16",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::LegacyF16ToF32,
-        "LegacyF16ToF32",
-        OCC::LegacyF16ToF32,
-        "legacyF16ToF32",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Double precision void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::LegacyDoubleToFloat,
-        "LegacyDoubleToFloat",
-        OCC::LegacyDoubleToFloat,
-        "legacyDoubleToFloat",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::LegacyDoubleToSInt32,
-        "LegacyDoubleToSInt32",
-        OCC::LegacyDoubleToSInt32,
-        "legacyDoubleToSInt32",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::LegacyDoubleToUInt32,
-        "LegacyDoubleToUInt32",
-        OCC::LegacyDoubleToUInt32,
-        "legacyDoubleToUInt32",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Wave void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,
-    // obj ,  function attribute
-    {
-        OC::WaveAllBitCount,
-        "WaveAllBitCount",
-        OCC::WaveAllOp,
-        "waveAllOp",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WavePrefixBitCount,
-        "WavePrefixBitCount",
-        OCC::WavePrefixOp,
-        "wavePrefixOp",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Pixel shader void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::AttributeAtVertex,
-        "AttributeAtVertex",
-        OCC::AttributeAtVertex,
-        "attributeAtVertex",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Graphics shader void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::ViewID,
-        "ViewID",
-        OCC::ViewID,
-        "viewID",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Resources void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::RawBufferLoad,
-        "RawBufferLoad",
-        OCC::RawBufferLoad,
-        "rawBufferLoad",
-        {false, true, true, true, false, false, true, true, true, false, false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RawBufferStore,
-        "RawBufferStore",
-        OCC::RawBufferStore,
-        "rawBufferStore",
-        {false, true, true, true, false, false, true, true, true, false, false},
-        Attribute::None,
-    },
-
-    // Raytracing object space uint System Values void,     h,     f,     d, i1,
-    // i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::InstanceID,
-        "InstanceID",
-        OCC::InstanceID,
-        "instanceID",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::InstanceIndex,
-        "InstanceIndex",
-        OCC::InstanceIndex,
-        "instanceIndex",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Raytracing hit uint System Values void,     h,     f,     d,    i1, i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::HitKind,
-        "HitKind",
-        OCC::HitKind,
-        "hitKind",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Raytracing uint System Values void,     h,     f,     d,    i1,    i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::RayFlags,
-        "RayFlags",
-        OCC::RayFlags,
-        "rayFlags",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Ray Dispatch Arguments void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::DispatchRaysIndex,
-        "DispatchRaysIndex",
-        OCC::DispatchRaysIndex,
-        "dispatchRaysIndex",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::DispatchRaysDimensions,
-        "DispatchRaysDimensions",
-        OCC::DispatchRaysDimensions,
-        "dispatchRaysDimensions",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Ray Vectors void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::WorldRayOrigin,
-        "WorldRayOrigin",
-        OCC::WorldRayOrigin,
-        "worldRayOrigin",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::WorldRayDirection,
-        "WorldRayDirection",
-        OCC::WorldRayDirection,
-        "worldRayDirection",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Ray object space Vectors void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::ObjectRayOrigin,
-        "ObjectRayOrigin",
-        OCC::ObjectRayOrigin,
-        "objectRayOrigin",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::ObjectRayDirection,
-        "ObjectRayDirection",
-        OCC::ObjectRayDirection,
-        "objectRayDirection",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Ray Transforms void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::ObjectToWorld,
-        "ObjectToWorld",
-        OCC::ObjectToWorld,
-        "objectToWorld",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::WorldToObject,
-        "WorldToObject",
-        OCC::WorldToObject,
-        "worldToObject",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // RayT void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,
-    // obj ,  function attribute
-    {
-        OC::RayTMin,
-        "RayTMin",
-        OCC::RayTMin,
-        "rayTMin",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::RayTCurrent,
-        "RayTCurrent",
-        OCC::RayTCurrent,
-        "rayTCurrent",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // AnyHit Terminals void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::IgnoreHit,
-        "IgnoreHit",
-        OCC::IgnoreHit,
-        "ignoreHit",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::NoReturn,
-    },
-    {
-        OC::AcceptHitAndEndSearch,
-        "AcceptHitAndEndSearch",
-        OCC::AcceptHitAndEndSearch,
-        "acceptHitAndEndSearch",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::NoReturn,
-    },
-
-    // Indirect Shader Invocation void,     h,     f,     d,    i1,    i8, i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::TraceRay,
-        "TraceRay",
-        OCC::TraceRay,
-        "traceRay",
-        {false, false, false, false, false, false, false, false, false, true,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReportHit,
-        "ReportHit",
-        OCC::ReportHit,
-        "reportHit",
-        {false, false, false, false, false, false, false, false, false, true,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::CallShader,
-        "CallShader",
-        OCC::CallShader,
-        "callShader",
-        {false, false, false, false, false, false, false, false, false, true,
-         false},
-        Attribute::None,
-    },
-
-    // Library create handle from resource struct (like HL intrinsic) void, h,
-    // f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function
-    // attribute
-    {
-        OC::CreateHandleForLib,
-        "CreateHandleForLib",
-        OCC::CreateHandleForLib,
-        "createHandleForLib",
-        {false, false, false, false, false, false, false, false, false, false,
-         true},
-        Attribute::ReadOnly,
-    },
-
-    // Raytracing object space uint System Values void,     h,     f,     d, i1,
-    // i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::PrimitiveIndex,
-        "PrimitiveIndex",
-        OCC::PrimitiveIndex,
-        "primitiveIndex",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Dot product with accumulate void,     h,     f,     d,    i1,    i8, i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::Dot2AddHalf,
-        "Dot2AddHalf",
-        OCC::Dot2AddHalf,
-        "dot2AddHalf",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Dot4AddI8Packed,
-        "Dot4AddI8Packed",
-        OCC::Dot4AddPacked,
-        "dot4AddPacked",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::Dot4AddU8Packed,
-        "Dot4AddU8Packed",
-        OCC::Dot4AddPacked,
-        "dot4AddPacked",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Wave void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,
-    // obj ,  function attribute
-    {
-        OC::WaveMatch,
-        "WaveMatch",
-        OCC::WaveMatch,
-        "waveMatch",
-        {false, true, true, true, false, true, true, true, true, false, false},
-        Attribute::None,
-    },
-    {
-        OC::WaveMultiPrefixOp,
-        "WaveMultiPrefixOp",
-        OCC::WaveMultiPrefixOp,
-        "waveMultiPrefixOp",
-        {false, true, true, true, false, true, true, true, true, false, false},
-        Attribute::None,
-    },
-    {
-        OC::WaveMultiPrefixBitCount,
-        "WaveMultiPrefixBitCount",
-        OCC::WaveMultiPrefixBitCount,
-        "waveMultiPrefixBitCount",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Mesh shader instructions void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::SetMeshOutputCounts,
-        "SetMeshOutputCounts",
-        OCC::SetMeshOutputCounts,
-        "setMeshOutputCounts",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::EmitIndices,
-        "EmitIndices",
-        OCC::EmitIndices,
-        "emitIndices",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::GetMeshPayload,
-        "GetMeshPayload",
-        OCC::GetMeshPayload,
-        "getMeshPayload",
-        {false, false, false, false, false, false, false, false, false, true,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::StoreVertexOutput,
-        "StoreVertexOutput",
-        OCC::StoreVertexOutput,
-        "storeVertexOutput",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::StorePrimitiveOutput,
-        "StorePrimitiveOutput",
-        OCC::StorePrimitiveOutput,
-        "storePrimitiveOutput",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Amplification shader instructions void,     h,     f,     d,    i1, i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::DispatchMesh,
-        "DispatchMesh",
-        OCC::DispatchMesh,
-        "dispatchMesh",
-        {false, false, false, false, false, false, false, false, false, true,
-         false},
-        Attribute::None,
-    },
-
-    // Sampler Feedback void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::WriteSamplerFeedback,
-        "WriteSamplerFeedback",
-        OCC::WriteSamplerFeedback,
-        "writeSamplerFeedback",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WriteSamplerFeedbackBias,
-        "WriteSamplerFeedbackBias",
-        OCC::WriteSamplerFeedbackBias,
-        "writeSamplerFeedbackBias",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WriteSamplerFeedbackLevel,
-        "WriteSamplerFeedbackLevel",
-        OCC::WriteSamplerFeedbackLevel,
-        "writeSamplerFeedbackLevel",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::WriteSamplerFeedbackGrad,
-        "WriteSamplerFeedbackGrad",
-        OCC::WriteSamplerFeedbackGrad,
-        "writeSamplerFeedbackGrad",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Inline Ray Query void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::AllocateRayQuery,
-        "AllocateRayQuery",
-        OCC::AllocateRayQuery,
-        "allocateRayQuery",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::RayQuery_TraceRayInline,
-        "RayQuery_TraceRayInline",
-        OCC::RayQuery_TraceRayInline,
-        "rayQuery_TraceRayInline",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::RayQuery_Proceed,
-        "RayQuery_Proceed",
-        OCC::RayQuery_Proceed,
-        "rayQuery_Proceed",
-        {false, false, false, false, true, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::RayQuery_Abort,
-        "RayQuery_Abort",
-        OCC::RayQuery_Abort,
-        "rayQuery_Abort",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::RayQuery_CommitNonOpaqueTriangleHit,
-        "RayQuery_CommitNonOpaqueTriangleHit",
-        OCC::RayQuery_CommitNonOpaqueTriangleHit,
-        "rayQuery_CommitNonOpaqueTriangleHit",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::RayQuery_CommitProceduralPrimitiveHit,
-        "RayQuery_CommitProceduralPrimitiveHit",
-        OCC::RayQuery_CommitProceduralPrimitiveHit,
-        "rayQuery_CommitProceduralPrimitiveHit",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::RayQuery_CommittedStatus,
-        "RayQuery_CommittedStatus",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateType,
-        "RayQuery_CandidateType",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateObjectToWorld3x4,
-        "RayQuery_CandidateObjectToWorld3x4",
-        OCC::RayQuery_StateMatrix,
-        "rayQuery_StateMatrix",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateWorldToObject3x4,
-        "RayQuery_CandidateWorldToObject3x4",
-        OCC::RayQuery_StateMatrix,
-        "rayQuery_StateMatrix",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedObjectToWorld3x4,
-        "RayQuery_CommittedObjectToWorld3x4",
-        OCC::RayQuery_StateMatrix,
-        "rayQuery_StateMatrix",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedWorldToObject3x4,
-        "RayQuery_CommittedWorldToObject3x4",
-        OCC::RayQuery_StateMatrix,
-        "rayQuery_StateMatrix",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateProceduralPrimitiveNonOpaque,
-        "RayQuery_CandidateProceduralPrimitiveNonOpaque",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, true, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateTriangleFrontFace,
-        "RayQuery_CandidateTriangleFrontFace",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, true, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedTriangleFrontFace,
-        "RayQuery_CommittedTriangleFrontFace",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, true, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateTriangleBarycentrics,
-        "RayQuery_CandidateTriangleBarycentrics",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedTriangleBarycentrics,
-        "RayQuery_CommittedTriangleBarycentrics",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_RayFlags,
-        "RayQuery_RayFlags",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_WorldRayOrigin,
-        "RayQuery_WorldRayOrigin",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_WorldRayDirection,
-        "RayQuery_WorldRayDirection",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_RayTMin,
-        "RayQuery_RayTMin",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateTriangleRayT,
-        "RayQuery_CandidateTriangleRayT",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedRayT,
-        "RayQuery_CommittedRayT",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateInstanceIndex,
-        "RayQuery_CandidateInstanceIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateInstanceID,
-        "RayQuery_CandidateInstanceID",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateGeometryIndex,
-        "RayQuery_CandidateGeometryIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidatePrimitiveIndex,
-        "RayQuery_CandidatePrimitiveIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateObjectRayOrigin,
-        "RayQuery_CandidateObjectRayOrigin",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CandidateObjectRayDirection,
-        "RayQuery_CandidateObjectRayDirection",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedInstanceIndex,
-        "RayQuery_CommittedInstanceIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedInstanceID,
-        "RayQuery_CommittedInstanceID",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedGeometryIndex,
-        "RayQuery_CommittedGeometryIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedPrimitiveIndex,
-        "RayQuery_CommittedPrimitiveIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedObjectRayOrigin,
-        "RayQuery_CommittedObjectRayOrigin",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedObjectRayDirection,
-        "RayQuery_CommittedObjectRayDirection",
-        OCC::RayQuery_StateVector,
-        "rayQuery_StateVector",
-        {false, false, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Raytracing object space uint System Values, raytracing tier 1.1 void, h,
-    // f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function
-    // attribute
-    {
-        OC::GeometryIndex,
-        "GeometryIndex",
-        OCC::GeometryIndex,
-        "geometryIndex",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Inline Ray Query void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::RayQuery_CandidateInstanceContributionToHitGroupIndex,
-        "RayQuery_CandidateInstanceContributionToHitGroupIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::RayQuery_CommittedInstanceContributionToHitGroupIndex,
-        "RayQuery_CommittedInstanceContributionToHitGroupIndex",
-        OCC::RayQuery_StateScalar,
-        "rayQuery_StateScalar",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Get handle from heap void,     h,     f,     d,    i1,    i8,   i16, i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::AnnotateHandle,
-        "AnnotateHandle",
-        OCC::AnnotateHandle,
-        "annotateHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::CreateHandleFromBinding,
-        "CreateHandleFromBinding",
-        OCC::CreateHandleFromBinding,
-        "createHandleFromBinding",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::CreateHandleFromHeap,
-        "CreateHandleFromHeap",
-        OCC::CreateHandleFromHeap,
-        "createHandleFromHeap",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Unpacking intrinsics void,     h,     f,     d,    i1,    i8,   i16, i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::Unpack4x8,
-        "Unpack4x8",
-        OCC::Unpack4x8,
-        "unpack4x8",
-        {false, false, false, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Packing intrinsics void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::Pack4x8,
-        "Pack4x8",
-        OCC::Pack4x8,
-        "pack4x8",
-        {false, false, false, false, false, false, true, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Helper Lanes void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::IsHelperLane,
-        "IsHelperLane",
-        OCC::IsHelperLane,
-        "isHelperLane",
-        {false, false, false, false, true, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Quad Wave Ops void,     h,     f,     d,    i1,    i8,   i16,   i32, i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::QuadVote,
-        "QuadVote",
-        OCC::QuadVote,
-        "quadVote",
-        {false, false, false, false, true, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Resources - gather void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::TextureGatherRaw,
-        "TextureGatherRaw",
-        OCC::TextureGatherRaw,
-        "textureGatherRaw",
-        {false, false, false, false, false, false, true, true, true, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Resources - sample void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::SampleCmpLevel,
-        "SampleCmpLevel",
-        OCC::SampleCmpLevel,
-        "sampleCmpLevel",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Resources void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,
-    // udt,   obj ,  function attribute
-    {
-        OC::TextureStoreSample,
-        "TextureStoreSample",
-        OCC::TextureStoreSample,
-        "textureStoreSample",
-        {false, true, true, false, false, false, true, true, false, false,
-         false},
-        Attribute::None,
-    },
-
-    //                                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::Reserved0,
-        "Reserved0",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved1,
-        "Reserved1",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved2,
-        "Reserved2",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved3,
-        "Reserved3",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved4,
-        "Reserved4",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved5,
-        "Reserved5",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved6,
-        "Reserved6",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved7,
-        "Reserved7",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved8,
-        "Reserved8",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved9,
-        "Reserved9",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved10,
-        "Reserved10",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::Reserved11,
-        "Reserved11",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Create/Annotate Node Handles void,     h,     f,     d,    i1,    i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::AllocateNodeOutputRecords,
-        "AllocateNodeOutputRecords",
-        OCC::AllocateNodeOutputRecords,
-        "allocateNodeOutputRecords",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Get Pointer to Node Record in Address Space 6 void,     h,     f,     d,
-    // i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::GetNodeRecordPtr,
-        "GetNodeRecordPtr",
-        OCC::GetNodeRecordPtr,
-        "getNodeRecordPtr",
-        {false, false, false, false, false, false, false, false, false, true,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Work Graph intrinsics void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::IncrementOutputCount,
-        "IncrementOutputCount",
-        OCC::IncrementOutputCount,
-        "incrementOutputCount",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::OutputComplete,
-        "OutputComplete",
-        OCC::OutputComplete,
-        "outputComplete",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::GetInputRecordCount,
-        "GetInputRecordCount",
-        OCC::GetInputRecordCount,
-        "getInputRecordCount",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::FinishedCrossGroupSharing,
-        "FinishedCrossGroupSharing",
-        OCC::FinishedCrossGroupSharing,
-        "finishedCrossGroupSharing",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    // Synchronization void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::BarrierByMemoryType,
-        "BarrierByMemoryType",
-        OCC::BarrierByMemoryType,
-        "barrierByMemoryType",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::NoDuplicate,
-    },
-    {
-        OC::BarrierByMemoryHandle,
-        "BarrierByMemoryHandle",
-        OCC::BarrierByMemoryHandle,
-        "barrierByMemoryHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::NoDuplicate,
-    },
-    {
-        OC::BarrierByNodeRecordHandle,
-        "BarrierByNodeRecordHandle",
-        OCC::BarrierByNodeRecordHandle,
-        "barrierByNodeRecordHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::NoDuplicate,
-    },
-
-    // Create/Annotate Node Handles void,     h,     f,     d,    i1,    i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::CreateNodeOutputHandle,
-        "CreateNodeOutputHandle",
-        OCC::createNodeOutputHandle,
-        "createNodeOutputHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::IndexNodeHandle,
-        "IndexNodeHandle",
-        OCC::IndexNodeHandle,
-        "indexNodeHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::AnnotateNodeHandle,
-        "AnnotateNodeHandle",
-        OCC::AnnotateNodeHandle,
-        "annotateNodeHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::CreateNodeInputRecordHandle,
-        "CreateNodeInputRecordHandle",
-        OCC::CreateNodeInputRecordHandle,
-        "createNodeInputRecordHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::AnnotateNodeRecordHandle,
-        "AnnotateNodeRecordHandle",
-        OCC::AnnotateNodeRecordHandle,
-        "annotateNodeRecordHandle",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Work Graph intrinsics void,     h,     f,     d,    i1,    i8,   i16,
-    // i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::NodeOutputIsValid,
-        "NodeOutputIsValid",
-        OCC::NodeOutputIsValid,
-        "nodeOutputIsValid",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::GetRemainingRecursionLevels,
-        "GetRemainingRecursionLevels",
-        OCC::GetRemainingRecursionLevels,
-        "getRemainingRecursionLevels",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Comparison Samples void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::SampleCmpGrad,
-        "SampleCmpGrad",
-        OCC::SampleCmpGrad,
-        "sampleCmpGrad",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-    {
-        OC::SampleCmpBias,
-        "SampleCmpBias",
-        OCC::SampleCmpBias,
-        "sampleCmpBias",
-        {false, true, true, false, false, false, false, false, false, false,
-         false},
-        Attribute::ReadOnly,
-    },
-
-    // Extended Command Information void,     h,     f,     d,    i1,    i8,
-    // i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::StartVertexLocation,
-        "StartVertexLocation",
-        OCC::StartVertexLocation,
-        "startVertexLocation",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-    {
-        OC::StartInstanceLocation,
-        "StartInstanceLocation",
-        OCC::StartInstanceLocation,
-        "startInstanceLocation",
-        {false, false, false, false, false, false, false, true, false, false,
-         false},
-        Attribute::ReadNone,
-    },
-
-    // Inline Ray Query void,     h,     f,     d,    i1,    i8,   i16,   i32,
-    // i64,   udt,   obj ,  function attribute
-    {
-        OC::AllocateRayQuery2,
-        "AllocateRayQuery2",
-        OCC::AllocateRayQuery2,
-        "allocateRayQuery2",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-
-    //                                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-    {
-        OC::ReservedA0,
-        "ReservedA0",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedA1,
-        "ReservedA1",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedA2,
-        "ReservedA2",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB0,
-        "ReservedB0",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB1,
-        "ReservedB1",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB2,
-        "ReservedB2",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB3,
-        "ReservedB3",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB4,
-        "ReservedB4",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB5,
-        "ReservedB5",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB6,
-        "ReservedB6",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB7,
-        "ReservedB7",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB8,
-        "ReservedB8",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB9,
-        "ReservedB9",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB10,
-        "ReservedB10",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB11,
-        "ReservedB11",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB12,
-        "ReservedB12",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB13,
-        "ReservedB13",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB14,
-        "ReservedB14",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB15,
-        "ReservedB15",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB16,
-        "ReservedB16",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB17,
-        "ReservedB17",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB18,
-        "ReservedB18",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB19,
-        "ReservedB19",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB20,
-        "ReservedB20",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB21,
-        "ReservedB21",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB22,
-        "ReservedB22",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB23,
-        "ReservedB23",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB24,
-        "ReservedB24",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB25,
-        "ReservedB25",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB26,
-        "ReservedB26",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB27,
-        "ReservedB27",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB28,
-        "ReservedB28",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB29,
-        "ReservedB29",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedB30,
-        "ReservedB30",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC0,
-        "ReservedC0",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC1,
-        "ReservedC1",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC2,
-        "ReservedC2",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC3,
-        "ReservedC3",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC4,
-        "ReservedC4",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC5,
-        "ReservedC5",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC6,
-        "ReservedC6",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC7,
-        "ReservedC7",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC8,
-        "ReservedC8",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
-    {
-        OC::ReservedC9,
-        "ReservedC9",
-        OCC::Reserved,
-        "reserved",
-        {true, false, false, false, false, false, false, false, false, false,
-         false},
-        Attribute::None,
-    },
+    // Temporary, indexable, input, output registers
+    {OC::TempRegLoad,
+     "TempRegLoad",
+     OCC::TempRegLoad,
+     "tempRegLoad",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::TempRegStore,
+     "TempRegStore",
+     OCC::TempRegStore,
+     "tempRegStore",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::MinPrecXRegLoad,
+     "MinPrecXRegLoad",
+     OCC::MinPrecXRegLoad,
+     "minPrecXRegLoad",
+     Attribute::ReadOnly,
+     1,
+     {{0x21}},
+     {{0x0}}}, // Overloads: hw
+    {OC::MinPrecXRegStore,
+     "MinPrecXRegStore",
+     OCC::MinPrecXRegStore,
+     "minPrecXRegStore",
+     Attribute::None,
+     1,
+     {{0x21}},
+     {{0x0}}}, // Overloads: hw
+    {OC::LoadInput,
+     "LoadInput",
+     OCC::LoadInput,
+     "loadInput",
+     Attribute::ReadNone,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::StoreOutput,
+     "StoreOutput",
+     OCC::StoreOutput,
+     "storeOutput",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+
+    // Unary float
+    {OC::FAbs,
+     "FAbs",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x407}},
+     {{0x7}}}, // Overloads: hfd<hfd
+    {OC::Saturate,
+     "Saturate",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x407}},
+     {{0x7}}}, // Overloads: hfd<hfd
+    {OC::IsNaN,
+     "IsNaN",
+     OCC::IsSpecialFloat,
+     "isSpecialFloat",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::IsInf,
+     "IsInf",
+     OCC::IsSpecialFloat,
+     "isSpecialFloat",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::IsFinite,
+     "IsFinite",
+     OCC::IsSpecialFloat,
+     "isSpecialFloat",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::IsNormal,
+     "IsNormal",
+     OCC::IsSpecialFloat,
+     "isSpecialFloat",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Cos,
+     "Cos",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Sin,
+     "Sin",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Tan,
+     "Tan",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Acos,
+     "Acos",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Asin,
+     "Asin",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Atan,
+     "Atan",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Hcos,
+     "Hcos",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Hsin,
+     "Hsin",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Htan,
+     "Htan",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Exp,
+     "Exp",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Frc,
+     "Frc",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Log,
+     "Log",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Sqrt,
+     "Sqrt",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Rsqrt,
+     "Rsqrt",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+
+    // Unary float - rounding
+    {OC::Round_ne,
+     "Round_ne",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Round_ni,
+     "Round_ni",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Round_pi,
+     "Round_pi",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::Round_z,
+     "Round_z",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+
+    // Unary int
+    {OC::Bfrev,
+     "Bfrev",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
+    {OC::Countbits,
+     "Countbits",
+     OCC::UnaryBits,
+     "unaryBits",
+     Attribute::ReadNone,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+    {OC::FirstbitLo,
+     "FirstbitLo",
+     OCC::UnaryBits,
+     "unaryBits",
+     Attribute::ReadNone,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+
+    // Unary uint
+    {OC::FirstbitHi,
+     "FirstbitHi",
+     OCC::UnaryBits,
+     "unaryBits",
+     Attribute::ReadNone,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+
+    // Unary int
+    {OC::FirstbitSHi,
+     "FirstbitSHi",
+     OCC::UnaryBits,
+     "unaryBits",
+     Attribute::ReadNone,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+
+    // Binary float
+    {OC::FMax,
+     "FMax",
+     OCC::Binary,
+     "binary",
+     Attribute::ReadNone,
+     1,
+     {{0x407}},
+     {{0x7}}}, // Overloads: hfd<hfd
+    {OC::FMin,
+     "FMin",
+     OCC::Binary,
+     "binary",
+     Attribute::ReadNone,
+     1,
+     {{0x407}},
+     {{0x7}}}, // Overloads: hfd<hfd
+
+    // Binary int
+    {OC::IMax,
+     "IMax",
+     OCC::Binary,
+     "binary",
+     Attribute::ReadNone,
+     1,
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
+    {OC::IMin,
+     "IMin",
+     OCC::Binary,
+     "binary",
+     Attribute::ReadNone,
+     1,
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
+
+    // Binary uint
+    {OC::UMax,
+     "UMax",
+     OCC::Binary,
+     "binary",
+     Attribute::ReadNone,
+     1,
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
+    {OC::UMin,
+     "UMin",
+     OCC::Binary,
+     "binary",
+     Attribute::ReadNone,
+     1,
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
+
+    // Binary int with two outputs
+    {OC::IMul,
+     "IMul",
+     OCC::BinaryWithTwoOuts,
+     "binaryWithTwoOuts",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Binary uint with two outputs
+    {OC::UMul,
+     "UMul",
+     OCC::BinaryWithTwoOuts,
+     "binaryWithTwoOuts",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::UDiv,
+     "UDiv",
+     OCC::BinaryWithTwoOuts,
+     "binaryWithTwoOuts",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Binary uint with carry or borrow
+    {OC::UAddc,
+     "UAddc",
+     OCC::BinaryWithCarryOrBorrow,
+     "binaryWithCarryOrBorrow",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::USubb,
+     "USubb",
+     OCC::BinaryWithCarryOrBorrow,
+     "binaryWithCarryOrBorrow",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Tertiary float
+    {OC::FMad,
+     "FMad",
+     OCC::Tertiary,
+     "tertiary",
+     Attribute::ReadNone,
+     1,
+     {{0x407}},
+     {{0x7}}}, // Overloads: hfd<hfd
+    {OC::Fma,
+     "Fma",
+     OCC::Tertiary,
+     "tertiary",
+     Attribute::ReadNone,
+     1,
+     {{0x404}},
+     {{0x4}}}, // Overloads: d<d
+
+    // Tertiary int
+    {OC::IMad,
+     "IMad",
+     OCC::Tertiary,
+     "tertiary",
+     Attribute::ReadNone,
+     1,
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
+
+    // Tertiary uint
+    {OC::UMad,
+     "UMad",
+     OCC::Tertiary,
+     "tertiary",
+     Attribute::ReadNone,
+     1,
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
+
+    // Tertiary int
+    {OC::Msad,
+     "Msad",
+     OCC::Tertiary,
+     "tertiary",
+     Attribute::ReadNone,
+     1,
+     {{0xc0}},
+     {{0x0}}}, // Overloads: il
+    {OC::Ibfe,
+     "Ibfe",
+     OCC::Tertiary,
+     "tertiary",
+     Attribute::ReadNone,
+     1,
+     {{0xc0}},
+     {{0x0}}}, // Overloads: il
+
+    // Tertiary uint
+    {OC::Ubfe,
+     "Ubfe",
+     OCC::Tertiary,
+     "tertiary",
+     Attribute::ReadNone,
+     1,
+     {{0xc0}},
+     {{0x0}}}, // Overloads: il
+
+    // Quaternary
+    {OC::Bfi,
+     "Bfi",
+     OCC::Quaternary,
+     "quaternary",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Dot
+    {OC::Dot2,
+     "Dot2",
+     OCC::Dot2,
+     "dot2",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Dot3,
+     "Dot3",
+     OCC::Dot3,
+     "dot3",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::Dot4,
+     "Dot4",
+     OCC::Dot4,
+     "dot4",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+
+    // Resources
+    {OC::CreateHandle,
+     "CreateHandle",
+     OCC::CreateHandle,
+     "createHandle",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::CBufferLoad,
+     "CBufferLoad",
+     OCC::CBufferLoad,
+     "cbufferLoad",
+     Attribute::ReadOnly,
+     1,
+     {{0xf7}},
+     {{0x0}}}, // Overloads: hfd8wil
+    {OC::CBufferLoadLegacy,
+     "CBufferLoadLegacy",
+     OCC::CBufferLoadLegacy,
+     "cbufferLoadLegacy",
+     Attribute::ReadOnly,
+     1,
+     {{0xe7}},
+     {{0x0}}}, // Overloads: hfdwil
+
+    // Resources - sample
+    {OC::Sample,
+     "Sample",
+     OCC::Sample,
+     "sample",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::SampleBias,
+     "SampleBias",
+     OCC::SampleBias,
+     "sampleBias",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::SampleLevel,
+     "SampleLevel",
+     OCC::SampleLevel,
+     "sampleLevel",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::SampleGrad,
+     "SampleGrad",
+     OCC::SampleGrad,
+     "sampleGrad",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::SampleCmp,
+     "SampleCmp",
+     OCC::SampleCmp,
+     "sampleCmp",
+     Attribute::ReadOnly,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::SampleCmpLevelZero,
+     "SampleCmpLevelZero",
+     OCC::SampleCmpLevelZero,
+     "sampleCmpLevelZero",
+     Attribute::ReadOnly,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+
+    // Resources
+    {OC::TextureLoad,
+     "TextureLoad",
+     OCC::TextureLoad,
+     "textureLoad",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::TextureStore,
+     "TextureStore",
+     OCC::TextureStore,
+     "textureStore",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::BufferLoad,
+     "BufferLoad",
+     OCC::BufferLoad,
+     "bufferLoad",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::BufferStore,
+     "BufferStore",
+     OCC::BufferStore,
+     "bufferStore",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::BufferUpdateCounter,
+     "BufferUpdateCounter",
+     OCC::BufferUpdateCounter,
+     "bufferUpdateCounter",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::CheckAccessFullyMapped,
+     "CheckAccessFullyMapped",
+     OCC::CheckAccessFullyMapped,
+     "checkAccessFullyMapped",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::GetDimensions,
+     "GetDimensions",
+     OCC::GetDimensions,
+     "getDimensions",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Resources - gather
+    {OC::TextureGather,
+     "TextureGather",
+     OCC::TextureGather,
+     "textureGather",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::TextureGatherCmp,
+     "TextureGatherCmp",
+     OCC::TextureGatherCmp,
+     "textureGatherCmp",
+     Attribute::ReadOnly,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+
+    // Resources - sample
+    {OC::Texture2DMSGetSamplePosition,
+     "Texture2DMSGetSamplePosition",
+     OCC::Texture2DMSGetSamplePosition,
+     "texture2DMSGetSamplePosition",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::RenderTargetGetSamplePosition,
+     "RenderTargetGetSamplePosition",
+     OCC::RenderTargetGetSamplePosition,
+     "renderTargetGetSamplePosition",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::RenderTargetGetSampleCount,
+     "RenderTargetGetSampleCount",
+     OCC::RenderTargetGetSampleCount,
+     "renderTargetGetSampleCount",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Synchronization
+    {OC::AtomicBinOp,
+     "AtomicBinOp",
+     OCC::AtomicBinOp,
+     "atomicBinOp",
+     Attribute::None,
+     1,
+     {{0xc0}},
+     {{0x0}}}, // Overloads: li
+    {OC::AtomicCompareExchange,
+     "AtomicCompareExchange",
+     OCC::AtomicCompareExchange,
+     "atomicCompareExchange",
+     Attribute::None,
+     1,
+     {{0xc0}},
+     {{0x0}}}, // Overloads: li
+    {OC::Barrier,
+     "Barrier",
+     OCC::Barrier,
+     "barrier",
+     Attribute::NoDuplicate,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Derivatives
+    {OC::CalculateLOD,
+     "CalculateLOD",
+     OCC::CalculateLOD,
+     "calculateLOD",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+
+    // Pixel shader
+    {OC::Discard,
+     "Discard",
+     OCC::Discard,
+     "discard",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Derivatives
+    {OC::DerivCoarseX,
+     "DerivCoarseX",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::DerivCoarseY,
+     "DerivCoarseY",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::DerivFineX,
+     "DerivFineX",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+    {OC::DerivFineY,
+     "DerivFineY",
+     OCC::Unary,
+     "unary",
+     Attribute::ReadNone,
+     1,
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
+
+    // Pixel shader
+    {OC::EvalSnapped,
+     "EvalSnapped",
+     OCC::EvalSnapped,
+     "evalSnapped",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::EvalSampleIndex,
+     "EvalSampleIndex",
+     OCC::EvalSampleIndex,
+     "evalSampleIndex",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::EvalCentroid,
+     "EvalCentroid",
+     OCC::EvalCentroid,
+     "evalCentroid",
+     Attribute::ReadNone,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::SampleIndex,
+     "SampleIndex",
+     OCC::SampleIndex,
+     "sampleIndex",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::Coverage,
+     "Coverage",
+     OCC::Coverage,
+     "coverage",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::InnerCoverage,
+     "InnerCoverage",
+     OCC::InnerCoverage,
+     "innerCoverage",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Compute/Mesh/Amplification/Node shader
+    {OC::ThreadId,
+     "ThreadId",
+     OCC::ThreadId,
+     "threadId",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::GroupId,
+     "GroupId",
+     OCC::GroupId,
+     "groupId",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::ThreadIdInGroup,
+     "ThreadIdInGroup",
+     OCC::ThreadIdInGroup,
+     "threadIdInGroup",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::FlattenedThreadIdInGroup,
+     "FlattenedThreadIdInGroup",
+     OCC::FlattenedThreadIdInGroup,
+     "flattenedThreadIdInGroup",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Geometry shader
+    {OC::EmitStream,
+     "EmitStream",
+     OCC::EmitStream,
+     "emitStream",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::CutStream,
+     "CutStream",
+     OCC::CutStream,
+     "cutStream",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::EmitThenCutStream,
+     "EmitThenCutStream",
+     OCC::EmitThenCutStream,
+     "emitThenCutStream",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::GSInstanceID,
+     "GSInstanceID",
+     OCC::GSInstanceID,
+     "gsInstanceID",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Double precision
+    {OC::MakeDouble,
+     "MakeDouble",
+     OCC::MakeDouble,
+     "makeDouble",
+     Attribute::ReadNone,
+     1,
+     {{0x4}},
+     {{0x0}}}, // Overloads: d
+    {OC::SplitDouble,
+     "SplitDouble",
+     OCC::SplitDouble,
+     "splitDouble",
+     Attribute::ReadNone,
+     1,
+     {{0x4}},
+     {{0x0}}}, // Overloads: d
+
+    // Domain and hull shader
+    {OC::LoadOutputControlPoint,
+     "LoadOutputControlPoint",
+     OCC::LoadOutputControlPoint,
+     "loadOutputControlPoint",
+     Attribute::ReadNone,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::LoadPatchConstant,
+     "LoadPatchConstant",
+     OCC::LoadPatchConstant,
+     "loadPatchConstant",
+     Attribute::ReadNone,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+
+    // Domain shader
+    {OC::DomainLocation,
+     "DomainLocation",
+     OCC::DomainLocation,
+     "domainLocation",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+
+    // Hull shader
+    {OC::StorePatchConstant,
+     "StorePatchConstant",
+     OCC::StorePatchConstant,
+     "storePatchConstant",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::OutputControlPointID,
+     "OutputControlPointID",
+     OCC::OutputControlPointID,
+     "outputControlPointID",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Hull, Domain and Geometry shaders
+    {OC::PrimitiveID,
+     "PrimitiveID",
+     OCC::PrimitiveID,
+     "primitiveID",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Other
+    {OC::CycleCounterLegacy,
+     "CycleCounterLegacy",
+     OCC::CycleCounterLegacy,
+     "cycleCounterLegacy",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Wave
+    {OC::WaveIsFirstLane,
+     "WaveIsFirstLane",
+     OCC::WaveIsFirstLane,
+     "waveIsFirstLane",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WaveGetLaneIndex,
+     "WaveGetLaneIndex",
+     OCC::WaveGetLaneIndex,
+     "waveGetLaneIndex",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WaveGetLaneCount,
+     "WaveGetLaneCount",
+     OCC::WaveGetLaneCount,
+     "waveGetLaneCount",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WaveAnyTrue,
+     "WaveAnyTrue",
+     OCC::WaveAnyTrue,
+     "waveAnyTrue",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WaveAllTrue,
+     "WaveAllTrue",
+     OCC::WaveAllTrue,
+     "waveAllTrue",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WaveActiveAllEqual,
+     "WaveActiveAllEqual",
+     OCC::WaveActiveAllEqual,
+     "waveActiveAllEqual",
+     Attribute::None,
+     1,
+     {{0xff}},
+     {{0x0}}}, // Overloads: hfd18wil
+    {OC::WaveActiveBallot,
+     "WaveActiveBallot",
+     OCC::WaveActiveBallot,
+     "waveActiveBallot",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WaveReadLaneAt,
+     "WaveReadLaneAt",
+     OCC::WaveReadLaneAt,
+     "waveReadLaneAt",
+     Attribute::None,
+     1,
+     {{0xff}},
+     {{0x0}}}, // Overloads: hfd18wil
+    {OC::WaveReadLaneFirst,
+     "WaveReadLaneFirst",
+     OCC::WaveReadLaneFirst,
+     "waveReadLaneFirst",
+     Attribute::None,
+     1,
+     {{0xff}},
+     {{0x0}}}, // Overloads: hfd18wil
+    {OC::WaveActiveOp,
+     "WaveActiveOp",
+     OCC::WaveActiveOp,
+     "waveActiveOp",
+     Attribute::None,
+     1,
+     {{0xff}},
+     {{0x0}}}, // Overloads: hfd18wil
+    {OC::WaveActiveBit,
+     "WaveActiveBit",
+     OCC::WaveActiveBit,
+     "waveActiveBit",
+     Attribute::None,
+     1,
+     {{0xf0}},
+     {{0x0}}}, // Overloads: 8wil
+    {OC::WavePrefixOp,
+     "WavePrefixOp",
+     OCC::WavePrefixOp,
+     "wavePrefixOp",
+     Attribute::None,
+     1,
+     {{0xf7}},
+     {{0x0}}}, // Overloads: hfd8wil
+
+    // Quad Wave Ops
+    {OC::QuadReadLaneAt,
+     "QuadReadLaneAt",
+     OCC::QuadReadLaneAt,
+     "quadReadLaneAt",
+     Attribute::None,
+     1,
+     {{0xff}},
+     {{0x0}}}, // Overloads: hfd18wil
+    {OC::QuadOp,
+     "QuadOp",
+     OCC::QuadOp,
+     "quadOp",
+     Attribute::None,
+     1,
+     {{0xf7}},
+     {{0x0}}}, // Overloads: hfd8wil
+
+    // Bitcasts with different sizes
+    {OC::BitcastI16toF16,
+     "BitcastI16toF16",
+     OCC::BitcastI16toF16,
+     "bitcastI16toF16",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::BitcastF16toI16,
+     "BitcastF16toI16",
+     OCC::BitcastF16toI16,
+     "bitcastF16toI16",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::BitcastI32toF32,
+     "BitcastI32toF32",
+     OCC::BitcastI32toF32,
+     "bitcastI32toF32",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::BitcastF32toI32,
+     "BitcastF32toI32",
+     OCC::BitcastF32toI32,
+     "bitcastF32toI32",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::BitcastI64toF64,
+     "BitcastI64toF64",
+     OCC::BitcastI64toF64,
+     "bitcastI64toF64",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::BitcastF64toI64,
+     "BitcastF64toI64",
+     OCC::BitcastF64toI64,
+     "bitcastF64toI64",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Legacy floating-point
+    {OC::LegacyF32ToF16,
+     "LegacyF32ToF16",
+     OCC::LegacyF32ToF16,
+     "legacyF32ToF16",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::LegacyF16ToF32,
+     "LegacyF16ToF32",
+     OCC::LegacyF16ToF32,
+     "legacyF16ToF32",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Double precision
+    {OC::LegacyDoubleToFloat,
+     "LegacyDoubleToFloat",
+     OCC::LegacyDoubleToFloat,
+     "legacyDoubleToFloat",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::LegacyDoubleToSInt32,
+     "LegacyDoubleToSInt32",
+     OCC::LegacyDoubleToSInt32,
+     "legacyDoubleToSInt32",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::LegacyDoubleToUInt32,
+     "LegacyDoubleToUInt32",
+     OCC::LegacyDoubleToUInt32,
+     "legacyDoubleToUInt32",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Wave
+    {OC::WaveAllBitCount,
+     "WaveAllBitCount",
+     OCC::WaveAllOp,
+     "waveAllOp",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WavePrefixBitCount,
+     "WavePrefixBitCount",
+     OCC::WavePrefixOp,
+     "wavePrefixOp",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Pixel shader
+    {OC::AttributeAtVertex,
+     "AttributeAtVertex",
+     OCC::AttributeAtVertex,
+     "attributeAtVertex",
+     Attribute::ReadNone,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfiw
+
+    // Graphics shader
+    {OC::ViewID,
+     "ViewID",
+     OCC::ViewID,
+     "viewID",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Resources
+    {OC::RawBufferLoad,
+     "RawBufferLoad",
+     OCC::RawBufferLoad,
+     "rawBufferLoad",
+     Attribute::ReadOnly,
+     1,
+     {{0xe7}},
+     {{0x0}}}, // Overloads: hfwidl
+    {OC::RawBufferStore,
+     "RawBufferStore",
+     OCC::RawBufferStore,
+     "rawBufferStore",
+     Attribute::None,
+     1,
+     {{0xe7}},
+     {{0x0}}}, // Overloads: hfwidl
+
+    // Raytracing object space uint System Values
+    {OC::InstanceID,
+     "InstanceID",
+     OCC::InstanceID,
+     "instanceID",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::InstanceIndex,
+     "InstanceIndex",
+     OCC::InstanceIndex,
+     "instanceIndex",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Raytracing hit uint System Values
+    {OC::HitKind,
+     "HitKind",
+     OCC::HitKind,
+     "hitKind",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Raytracing uint System Values
+    {OC::RayFlags,
+     "RayFlags",
+     OCC::RayFlags,
+     "rayFlags",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Ray Dispatch Arguments
+    {OC::DispatchRaysIndex,
+     "DispatchRaysIndex",
+     OCC::DispatchRaysIndex,
+     "dispatchRaysIndex",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::DispatchRaysDimensions,
+     "DispatchRaysDimensions",
+     OCC::DispatchRaysDimensions,
+     "dispatchRaysDimensions",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Ray Vectors
+    {OC::WorldRayOrigin,
+     "WorldRayOrigin",
+     OCC::WorldRayOrigin,
+     "worldRayOrigin",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::WorldRayDirection,
+     "WorldRayDirection",
+     OCC::WorldRayDirection,
+     "worldRayDirection",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+
+    // Ray object space Vectors
+    {OC::ObjectRayOrigin,
+     "ObjectRayOrigin",
+     OCC::ObjectRayOrigin,
+     "objectRayOrigin",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::ObjectRayDirection,
+     "ObjectRayDirection",
+     OCC::ObjectRayDirection,
+     "objectRayDirection",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+
+    // Ray Transforms
+    {OC::ObjectToWorld,
+     "ObjectToWorld",
+     OCC::ObjectToWorld,
+     "objectToWorld",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::WorldToObject,
+     "WorldToObject",
+     OCC::WorldToObject,
+     "worldToObject",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+
+    // RayT
+    {OC::RayTMin,
+     "RayTMin",
+     OCC::RayTMin,
+     "rayTMin",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayTCurrent,
+     "RayTCurrent",
+     OCC::RayTCurrent,
+     "rayTCurrent",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+
+    // AnyHit Terminals
+    {OC::IgnoreHit,
+     "IgnoreHit",
+     OCC::IgnoreHit,
+     "ignoreHit",
+     Attribute::NoReturn,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::AcceptHitAndEndSearch,
+     "AcceptHitAndEndSearch",
+     OCC::AcceptHitAndEndSearch,
+     "acceptHitAndEndSearch",
+     Attribute::NoReturn,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Indirect Shader Invocation
+    {OC::TraceRay,
+     "TraceRay",
+     OCC::TraceRay,
+     "traceRay",
+     Attribute::None,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+    {OC::ReportHit,
+     "ReportHit",
+     OCC::ReportHit,
+     "reportHit",
+     Attribute::None,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+    {OC::CallShader,
+     "CallShader",
+     OCC::CallShader,
+     "callShader",
+     Attribute::None,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+
+    // Library create handle from resource struct (like HL intrinsic)
+    {OC::CreateHandleForLib,
+     "CreateHandleForLib",
+     OCC::CreateHandleForLib,
+     "createHandleForLib",
+     Attribute::ReadOnly,
+     1,
+     {{0x200}},
+     {{0x0}}}, // Overloads: o
+
+    // Raytracing object space uint System Values
+    {OC::PrimitiveIndex,
+     "PrimitiveIndex",
+     OCC::PrimitiveIndex,
+     "primitiveIndex",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Dot product with accumulate
+    {OC::Dot2AddHalf,
+     "Dot2AddHalf",
+     OCC::Dot2AddHalf,
+     "dot2AddHalf",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::Dot4AddI8Packed,
+     "Dot4AddI8Packed",
+     OCC::Dot4AddPacked,
+     "dot4AddPacked",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::Dot4AddU8Packed,
+     "Dot4AddU8Packed",
+     OCC::Dot4AddPacked,
+     "dot4AddPacked",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Wave
+    {OC::WaveMatch,
+     "WaveMatch",
+     OCC::WaveMatch,
+     "waveMatch",
+     Attribute::None,
+     1,
+     {{0xf7}},
+     {{0x0}}}, // Overloads: hfd8wil
+    {OC::WaveMultiPrefixOp,
+     "WaveMultiPrefixOp",
+     OCC::WaveMultiPrefixOp,
+     "waveMultiPrefixOp",
+     Attribute::None,
+     1,
+     {{0xf7}},
+     {{0x0}}}, // Overloads: hfd8wil
+    {OC::WaveMultiPrefixBitCount,
+     "WaveMultiPrefixBitCount",
+     OCC::WaveMultiPrefixBitCount,
+     "waveMultiPrefixBitCount",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Mesh shader instructions
+    {OC::SetMeshOutputCounts,
+     "SetMeshOutputCounts",
+     OCC::SetMeshOutputCounts,
+     "setMeshOutputCounts",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::EmitIndices,
+     "EmitIndices",
+     OCC::EmitIndices,
+     "emitIndices",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::GetMeshPayload,
+     "GetMeshPayload",
+     OCC::GetMeshPayload,
+     "getMeshPayload",
+     Attribute::ReadOnly,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+    {OC::StoreVertexOutput,
+     "StoreVertexOutput",
+     OCC::StoreVertexOutput,
+     "storeVertexOutput",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+    {OC::StorePrimitiveOutput,
+     "StorePrimitiveOutput",
+     OCC::StorePrimitiveOutput,
+     "storePrimitiveOutput",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+
+    // Amplification shader instructions
+    {OC::DispatchMesh,
+     "DispatchMesh",
+     OCC::DispatchMesh,
+     "dispatchMesh",
+     Attribute::None,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+
+    // Sampler Feedback
+    {OC::WriteSamplerFeedback,
+     "WriteSamplerFeedback",
+     OCC::WriteSamplerFeedback,
+     "writeSamplerFeedback",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WriteSamplerFeedbackBias,
+     "WriteSamplerFeedbackBias",
+     OCC::WriteSamplerFeedbackBias,
+     "writeSamplerFeedbackBias",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WriteSamplerFeedbackLevel,
+     "WriteSamplerFeedbackLevel",
+     OCC::WriteSamplerFeedbackLevel,
+     "writeSamplerFeedbackLevel",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::WriteSamplerFeedbackGrad,
+     "WriteSamplerFeedbackGrad",
+     OCC::WriteSamplerFeedbackGrad,
+     "writeSamplerFeedbackGrad",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Inline Ray Query
+    {OC::AllocateRayQuery,
+     "AllocateRayQuery",
+     OCC::AllocateRayQuery,
+     "allocateRayQuery",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::RayQuery_TraceRayInline,
+     "RayQuery_TraceRayInline",
+     OCC::RayQuery_TraceRayInline,
+     "rayQuery_TraceRayInline",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::RayQuery_Proceed,
+     "RayQuery_Proceed",
+     OCC::RayQuery_Proceed,
+     "rayQuery_Proceed",
+     Attribute::None,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+    {OC::RayQuery_Abort,
+     "RayQuery_Abort",
+     OCC::RayQuery_Abort,
+     "rayQuery_Abort",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::RayQuery_CommitNonOpaqueTriangleHit,
+     "RayQuery_CommitNonOpaqueTriangleHit",
+     OCC::RayQuery_CommitNonOpaqueTriangleHit,
+     "rayQuery_CommitNonOpaqueTriangleHit",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::RayQuery_CommitProceduralPrimitiveHit,
+     "RayQuery_CommitProceduralPrimitiveHit",
+     OCC::RayQuery_CommitProceduralPrimitiveHit,
+     "rayQuery_CommitProceduralPrimitiveHit",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::RayQuery_CommittedStatus,
+     "RayQuery_CommittedStatus",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CandidateType,
+     "RayQuery_CandidateType",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CandidateObjectToWorld3x4,
+     "RayQuery_CandidateObjectToWorld3x4",
+     OCC::RayQuery_StateMatrix,
+     "rayQuery_StateMatrix",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CandidateWorldToObject3x4,
+     "RayQuery_CandidateWorldToObject3x4",
+     OCC::RayQuery_StateMatrix,
+     "rayQuery_StateMatrix",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CommittedObjectToWorld3x4,
+     "RayQuery_CommittedObjectToWorld3x4",
+     OCC::RayQuery_StateMatrix,
+     "rayQuery_StateMatrix",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CommittedWorldToObject3x4,
+     "RayQuery_CommittedWorldToObject3x4",
+     OCC::RayQuery_StateMatrix,
+     "rayQuery_StateMatrix",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CandidateProceduralPrimitiveNonOpaque,
+     "RayQuery_CandidateProceduralPrimitiveNonOpaque",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+    {OC::RayQuery_CandidateTriangleFrontFace,
+     "RayQuery_CandidateTriangleFrontFace",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+    {OC::RayQuery_CommittedTriangleFrontFace,
+     "RayQuery_CommittedTriangleFrontFace",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+    {OC::RayQuery_CandidateTriangleBarycentrics,
+     "RayQuery_CandidateTriangleBarycentrics",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CommittedTriangleBarycentrics,
+     "RayQuery_CommittedTriangleBarycentrics",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_RayFlags,
+     "RayQuery_RayFlags",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_WorldRayOrigin,
+     "RayQuery_WorldRayOrigin",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_WorldRayDirection,
+     "RayQuery_WorldRayDirection",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_RayTMin,
+     "RayQuery_RayTMin",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CandidateTriangleRayT,
+     "RayQuery_CandidateTriangleRayT",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CommittedRayT,
+     "RayQuery_CommittedRayT",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CandidateInstanceIndex,
+     "RayQuery_CandidateInstanceIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CandidateInstanceID,
+     "RayQuery_CandidateInstanceID",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CandidateGeometryIndex,
+     "RayQuery_CandidateGeometryIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CandidatePrimitiveIndex,
+     "RayQuery_CandidatePrimitiveIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CandidateObjectRayOrigin,
+     "RayQuery_CandidateObjectRayOrigin",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CandidateObjectRayDirection,
+     "RayQuery_CandidateObjectRayDirection",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CommittedInstanceIndex,
+     "RayQuery_CommittedInstanceIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CommittedInstanceID,
+     "RayQuery_CommittedInstanceID",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CommittedGeometryIndex,
+     "RayQuery_CommittedGeometryIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CommittedPrimitiveIndex,
+     "RayQuery_CommittedPrimitiveIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CommittedObjectRayOrigin,
+     "RayQuery_CommittedObjectRayOrigin",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::RayQuery_CommittedObjectRayDirection,
+     "RayQuery_CommittedObjectRayDirection",
+     OCC::RayQuery_StateVector,
+     "rayQuery_StateVector",
+     Attribute::ReadOnly,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+
+    // Raytracing object space uint System Values, raytracing tier 1.1
+    {OC::GeometryIndex,
+     "GeometryIndex",
+     OCC::GeometryIndex,
+     "geometryIndex",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Inline Ray Query
+    {OC::RayQuery_CandidateInstanceContributionToHitGroupIndex,
+     "RayQuery_CandidateInstanceContributionToHitGroupIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::RayQuery_CommittedInstanceContributionToHitGroupIndex,
+     "RayQuery_CommittedInstanceContributionToHitGroupIndex",
+     OCC::RayQuery_StateScalar,
+     "rayQuery_StateScalar",
+     Attribute::ReadOnly,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Get handle from heap
+    {OC::AnnotateHandle,
+     "AnnotateHandle",
+     OCC::AnnotateHandle,
+     "annotateHandle",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::CreateHandleFromBinding,
+     "CreateHandleFromBinding",
+     OCC::CreateHandleFromBinding,
+     "createHandleFromBinding",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::CreateHandleFromHeap,
+     "CreateHandleFromHeap",
+     OCC::CreateHandleFromHeap,
+     "createHandleFromHeap",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Unpacking intrinsics
+    {OC::Unpack4x8,
+     "Unpack4x8",
+     OCC::Unpack4x8,
+     "unpack4x8",
+     Attribute::ReadNone,
+     1,
+     {{0x60}},
+     {{0x0}}}, // Overloads: iw
+
+    // Packing intrinsics
+    {OC::Pack4x8,
+     "Pack4x8",
+     OCC::Pack4x8,
+     "pack4x8",
+     Attribute::ReadNone,
+     1,
+     {{0x60}},
+     {{0x0}}}, // Overloads: iw
+
+    // Helper Lanes
+    {OC::IsHelperLane,
+     "IsHelperLane",
+     OCC::IsHelperLane,
+     "isHelperLane",
+     Attribute::ReadOnly,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+
+    // Quad Wave Ops
+    {OC::QuadVote,
+     "QuadVote",
+     OCC::QuadVote,
+     "quadVote",
+     Attribute::None,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+
+    // Resources - gather
+    {OC::TextureGatherRaw,
+     "TextureGatherRaw",
+     OCC::TextureGatherRaw,
+     "textureGatherRaw",
+     Attribute::ReadOnly,
+     1,
+     {{0xe0}},
+     {{0x0}}}, // Overloads: wil
+
+    // Resources - sample
+    {OC::SampleCmpLevel,
+     "SampleCmpLevel",
+     OCC::SampleCmpLevel,
+     "sampleCmpLevel",
+     Attribute::ReadOnly,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+
+    // Resources
+    {OC::TextureStoreSample,
+     "TextureStoreSample",
+     OCC::TextureStoreSample,
+     "textureStoreSample",
+     Attribute::None,
+     1,
+     {{0x63}},
+     {{0x0}}}, // Overloads: hfwi
+
+    {OC::Reserved0,
+     "Reserved0",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved1,
+     "Reserved1",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved2,
+     "Reserved2",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved3,
+     "Reserved3",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved4,
+     "Reserved4",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved5,
+     "Reserved5",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved6,
+     "Reserved6",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved7,
+     "Reserved7",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved8,
+     "Reserved8",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved9,
+     "Reserved9",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved10,
+     "Reserved10",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::Reserved11,
+     "Reserved11",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Create/Annotate Node Handles
+    {OC::AllocateNodeOutputRecords,
+     "AllocateNodeOutputRecords",
+     OCC::AllocateNodeOutputRecords,
+     "allocateNodeOutputRecords",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Get Pointer to Node Record in Address Space 6
+    {OC::GetNodeRecordPtr,
+     "GetNodeRecordPtr",
+     OCC::GetNodeRecordPtr,
+     "getNodeRecordPtr",
+     Attribute::ReadNone,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+
+    // Work Graph intrinsics
+    {OC::IncrementOutputCount,
+     "IncrementOutputCount",
+     OCC::IncrementOutputCount,
+     "incrementOutputCount",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::OutputComplete,
+     "OutputComplete",
+     OCC::OutputComplete,
+     "outputComplete",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::GetInputRecordCount,
+     "GetInputRecordCount",
+     OCC::GetInputRecordCount,
+     "getInputRecordCount",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::FinishedCrossGroupSharing,
+     "FinishedCrossGroupSharing",
+     OCC::FinishedCrossGroupSharing,
+     "finishedCrossGroupSharing",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Synchronization
+    {OC::BarrierByMemoryType,
+     "BarrierByMemoryType",
+     OCC::BarrierByMemoryType,
+     "barrierByMemoryType",
+     Attribute::NoDuplicate,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::BarrierByMemoryHandle,
+     "BarrierByMemoryHandle",
+     OCC::BarrierByMemoryHandle,
+     "barrierByMemoryHandle",
+     Attribute::NoDuplicate,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::BarrierByNodeRecordHandle,
+     "BarrierByNodeRecordHandle",
+     OCC::BarrierByNodeRecordHandle,
+     "barrierByNodeRecordHandle",
+     Attribute::NoDuplicate,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Create/Annotate Node Handles
+    {OC::CreateNodeOutputHandle,
+     "CreateNodeOutputHandle",
+     OCC::createNodeOutputHandle,
+     "createNodeOutputHandle",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::IndexNodeHandle,
+     "IndexNodeHandle",
+     OCC::IndexNodeHandle,
+     "indexNodeHandle",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::AnnotateNodeHandle,
+     "AnnotateNodeHandle",
+     OCC::AnnotateNodeHandle,
+     "annotateNodeHandle",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::CreateNodeInputRecordHandle,
+     "CreateNodeInputRecordHandle",
+     OCC::CreateNodeInputRecordHandle,
+     "createNodeInputRecordHandle",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::AnnotateNodeRecordHandle,
+     "AnnotateNodeRecordHandle",
+     OCC::AnnotateNodeRecordHandle,
+     "annotateNodeRecordHandle",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Work Graph intrinsics
+    {OC::NodeOutputIsValid,
+     "NodeOutputIsValid",
+     OCC::NodeOutputIsValid,
+     "nodeOutputIsValid",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::GetRemainingRecursionLevels,
+     "GetRemainingRecursionLevels",
+     OCC::GetRemainingRecursionLevels,
+     "getRemainingRecursionLevels",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Comparison Samples
+    {OC::SampleCmpGrad,
+     "SampleCmpGrad",
+     OCC::SampleCmpGrad,
+     "sampleCmpGrad",
+     Attribute::ReadOnly,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+    {OC::SampleCmpBias,
+     "SampleCmpBias",
+     OCC::SampleCmpBias,
+     "sampleCmpBias",
+     Attribute::ReadOnly,
+     1,
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
+
+    // Extended Command Information
+    {OC::StartVertexLocation,
+     "StartVertexLocation",
+     OCC::StartVertexLocation,
+     "startVertexLocation",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::StartInstanceLocation,
+     "StartInstanceLocation",
+     OCC::StartInstanceLocation,
+     "startInstanceLocation",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+
+    // Inline Ray Query
+    {OC::AllocateRayQuery2,
+     "AllocateRayQuery2",
+     OCC::AllocateRayQuery2,
+     "allocateRayQuery2",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    {OC::ReservedA0,
+     "ReservedA0",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedA1,
+     "ReservedA1",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedA2,
+     "ReservedA2",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Shader Execution Reordering
+    {OC::HitObject_TraceRay,
+     "HitObject_TraceRay",
+     OCC::HitObject_TraceRay,
+     "hitObject_TraceRay",
+     Attribute::None,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+    {OC::HitObject_FromRayQuery,
+     "HitObject_FromRayQuery",
+     OCC::HitObject_FromRayQuery,
+     "hitObject_FromRayQuery",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::HitObject_FromRayQueryWithAttrs,
+     "HitObject_FromRayQueryWithAttrs",
+     OCC::HitObject_FromRayQueryWithAttrs,
+     "hitObject_FromRayQueryWithAttrs",
+     Attribute::ReadOnly,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+    {OC::HitObject_MakeMiss,
+     "HitObject_MakeMiss",
+     OCC::HitObject_MakeMiss,
+     "hitObject_MakeMiss",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::HitObject_MakeNop,
+     "HitObject_MakeNop",
+     OCC::HitObject_MakeNop,
+     "hitObject_MakeNop",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::HitObject_Invoke,
+     "HitObject_Invoke",
+     OCC::HitObject_Invoke,
+     "hitObject_Invoke",
+     Attribute::None,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+    {OC::MaybeReorderThread,
+     "MaybeReorderThread",
+     OCC::MaybeReorderThread,
+     "maybeReorderThread",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::HitObject_IsMiss,
+     "HitObject_IsMiss",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+    {OC::HitObject_IsHit,
+     "HitObject_IsHit",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+    {OC::HitObject_IsNop,
+     "HitObject_IsNop",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x8}},
+     {{0x0}}}, // Overloads: 1
+    {OC::HitObject_RayFlags,
+     "HitObject_RayFlags",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::HitObject_RayTMin,
+     "HitObject_RayTMin",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_RayTCurrent,
+     "HitObject_RayTCurrent",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_WorldRayOrigin,
+     "HitObject_WorldRayOrigin",
+     OCC::HitObject_StateVector,
+     "hitObject_StateVector",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_WorldRayDirection,
+     "HitObject_WorldRayDirection",
+     OCC::HitObject_StateVector,
+     "hitObject_StateVector",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_ObjectRayOrigin,
+     "HitObject_ObjectRayOrigin",
+     OCC::HitObject_StateVector,
+     "hitObject_StateVector",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_ObjectRayDirection,
+     "HitObject_ObjectRayDirection",
+     OCC::HitObject_StateVector,
+     "hitObject_StateVector",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_ObjectToWorld3x4,
+     "HitObject_ObjectToWorld3x4",
+     OCC::HitObject_StateMatrix,
+     "hitObject_StateMatrix",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_WorldToObject3x4,
+     "HitObject_WorldToObject3x4",
+     OCC::HitObject_StateMatrix,
+     "hitObject_StateMatrix",
+     Attribute::ReadNone,
+     1,
+     {{0x2}},
+     {{0x0}}}, // Overloads: f
+    {OC::HitObject_GeometryIndex,
+     "HitObject_GeometryIndex",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::HitObject_InstanceIndex,
+     "HitObject_InstanceIndex",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::HitObject_InstanceID,
+     "HitObject_InstanceID",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::HitObject_PrimitiveIndex,
+     "HitObject_PrimitiveIndex",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::HitObject_HitKind,
+     "HitObject_HitKind",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::HitObject_ShaderTableIndex,
+     "HitObject_ShaderTableIndex",
+     OCC::HitObject_StateScalar,
+     "hitObject_StateScalar",
+     Attribute::ReadNone,
+     1,
+     {{0x40}},
+     {{0x0}}}, // Overloads: i
+    {OC::HitObject_SetShaderTableIndex,
+     "HitObject_SetShaderTableIndex",
+     OCC::HitObject_SetShaderTableIndex,
+     "hitObject_SetShaderTableIndex",
+     Attribute::ReadNone,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::HitObject_LoadLocalRootTableConstant,
+     "HitObject_LoadLocalRootTableConstant",
+     OCC::HitObject_LoadLocalRootTableConstant,
+     "hitObject_LoadLocalRootTableConstant",
+     Attribute::ReadOnly,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::HitObject_Attributes,
+     "HitObject_Attributes",
+     OCC::HitObject_Attributes,
+     "hitObject_Attributes",
+     Attribute::ArgMemOnly,
+     1,
+     {{0x100}},
+     {{0x0}}}, // Overloads: u
+
+    {OC::ReservedB28,
+     "ReservedB28",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB29,
+     "ReservedB29",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedB30,
+     "ReservedB30",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC0,
+     "ReservedC0",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC1,
+     "ReservedC1",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC2,
+     "ReservedC2",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC3,
+     "ReservedC3",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC4,
+     "ReservedC4",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC5,
+     "ReservedC5",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC6,
+     "ReservedC6",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC7,
+     "ReservedC7",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC8,
+     "ReservedC8",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedC9,
+     "ReservedC9",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+
+    // Resources
+    {OC::RawBufferVectorLoad,
+     "RawBufferVectorLoad",
+     OCC::RawBufferVectorLoad,
+     "rawBufferVectorLoad",
+     Attribute::ReadOnly,
+     1,
+     {{0x4e7}},
+     {{0xe7}}}, // Overloads: hfwidl<hfwidl
+    {OC::RawBufferVectorStore,
+     "RawBufferVectorStore",
+     OCC::RawBufferVectorStore,
+     "rawBufferVectorStore",
+     Attribute::None,
+     1,
+     {{0x4e7}},
+     {{0xe7}}}, // Overloads: hfwidl<hfwidl
 };
 // OPCODE-OLOADS:END
 
-const char *OP::m_OverloadTypeName[kNumTypeOverloads] = {
-    "void", "f16", "f32", "f64", "i1",  "i8",
-    "i16",  "i32", "i64", "udt", "obj", // These should not be used
-};
+const char *OP::m_OverloadTypeName[TS_BasicCount] = {
+    "f16", "f32", "f64", "i1", "i8", "i16", "i32", "i64"};
 
 const char *OP::m_NamePrefix = "dx.op.";
 const char *OP::m_TypePrefix = "dx.types.";
@@ -3035,82 +2673,110 @@ unsigned OP::GetTypeSlot(Type *pType) {
   Type::TypeID T = pType->getTypeID();
   switch (T) {
   case Type::VoidTyID:
-    return 0;
+    return TS_Invalid;
   case Type::HalfTyID:
-    return 1;
+    return TS_F16;
   case Type::FloatTyID:
-    return 2;
+    return TS_F32;
   case Type::DoubleTyID:
-    return 3;
+    return TS_F64;
   case Type::IntegerTyID: {
     IntegerType *pIT = dyn_cast<IntegerType>(pType);
     unsigned Bits = pIT->getBitWidth();
     switch (Bits) {
     case 1:
-      return 4;
+      return TS_I1;
     case 8:
-      return 5;
+      return TS_I8;
     case 16:
-      return 6;
+      return TS_I16;
     case 32:
-      return 7;
+      return TS_I32;
     case 64:
-      return 8;
+      return TS_I64;
     }
     llvm_unreachable("Invalid Bits size");
+    return TS_Invalid;
   }
   case Type::PointerTyID: {
     pType = cast<PointerType>(pType)->getElementType();
     if (pType->isStructTy())
-      return kUserDefineTypeSlot;
+      return TS_UDT;
     DXASSERT(!pType->isPointerTy(), "pointer-to-pointer type unsupported");
     return GetTypeSlot(pType);
   }
   case Type::StructTyID:
-    return kObjectTypeSlot;
+    // Named struct value (not pointer) indicates a built-in object type.
+    // Anonymous struct value is used to wrap multi-overload dimensions.
+    if (cast<StructType>(pType)->hasName())
+      return TS_Object;
+    else
+      return TS_Extended;
+  case Type::VectorTyID:
+    return TS_Vector;
   default:
     break;
   }
-  return UINT_MAX;
+  return TS_Invalid;
 }
 
 const char *OP::GetOverloadTypeName(unsigned TypeSlot) {
-  DXASSERT(TypeSlot < kUserDefineTypeSlot, "otherwise caller passed OOB index");
+  DXASSERT(TypeSlot < TS_BasicCount, "otherwise caller passed OOB index");
   return m_OverloadTypeName[TypeSlot];
 }
 
-llvm::StringRef OP::GetTypeName(Type *Ty, std::string &str) {
+StringRef OP::GetTypeName(Type *Ty, SmallVectorImpl<char> &Storage) {
+  DXASSERT(!Ty->isVoidTy(), "must not pass void type here");
   unsigned TypeSlot = OP::GetTypeSlot(Ty);
-  if (TypeSlot < kUserDefineTypeSlot) {
+  if (TypeSlot < TS_BasicCount) {
     return GetOverloadTypeName(TypeSlot);
-  } else if (TypeSlot == kUserDefineTypeSlot) {
+  } else if (TypeSlot == TS_UDT) {
     if (Ty->isPointerTy())
       Ty = Ty->getPointerElementType();
     StructType *ST = cast<StructType>(Ty);
     return ST->getStructName();
-  } else if (TypeSlot == kObjectTypeSlot) {
+  } else if (TypeSlot == TS_Object) {
     StructType *ST = cast<StructType>(Ty);
     return ST->getStructName();
+  } else if (TypeSlot == TS_Vector) {
+    VectorType *VecTy = cast<VectorType>(Ty);
+    return (Twine("v") + Twine(VecTy->getNumElements()) +
+            Twine(
+                GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType()))))
+        .toStringRef(Storage);
+  } else if (TypeSlot == TS_Extended) {
+    DXASSERT(isa<StructType>(Ty),
+             "otherwise, extended overload type not wrapped in struct type.");
+    StructType *ST = cast<StructType>(Ty);
+    DXASSERT(ST->getNumElements() <= DXIL::kDxilMaxOloadDims,
+             "otherwise, extended overload has too many dimensions.");
+    // Iterate extended slots, recurse, separate with '.'
+    raw_svector_ostream OS(Storage);
+    for (unsigned I = 0; I < ST->getNumElements(); ++I) {
+      if (I > 0)
+        OS << ".";
+      SmallVector<char, 32> TempStr;
+      OS << GetTypeName(ST->getElementType(I), TempStr);
+    }
+    return OS.str();
   } else {
-    raw_string_ostream os(str);
-    Ty->print(os);
-    os.flush();
-    return str;
+    raw_svector_ostream OS(Storage);
+    Ty->print(OS);
+    return OS.str();
   }
 }
 
-llvm::StringRef OP::ConstructOverloadName(Type *Ty, DXIL::OpCode opCode,
-                                          std::string &funcNameStorage) {
+StringRef OP::ConstructOverloadName(Type *Ty, DXIL::OpCode opCode,
+                                    SmallVectorImpl<char> &Storage) {
   if (Ty == Type::getVoidTy(Ty->getContext())) {
-    funcNameStorage =
-        (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode))).str();
+    return (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode)))
+        .toStringRef(Storage);
   } else {
-    funcNameStorage =
-        (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode)) + "." +
-         GetTypeName(Ty, funcNameStorage))
-            .str();
+    llvm::SmallVector<char, 64> TempStr;
+    return (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(opCode)) + "." +
+            GetTypeName(Ty, TempStr))
+        .toStringRef(Storage);
   }
-  return funcNameStorage;
 }
 
 const char *OP::GetOpCodeName(OpCode opCode) {
@@ -3138,13 +2804,41 @@ llvm::Attribute::AttrKind OP::GetMemAccessAttr(OpCode opCode) {
 }
 
 bool OP::IsOverloadLegal(OpCode opCode, Type *pType) {
-  if (!pType)
+  if (static_cast<unsigned>(opCode) >=
+      static_cast<unsigned>(OpCode::NumOpCodes))
     return false;
-  if (opCode == OpCode::NumOpCodes)
+  if (!pType)
     return false;
-  unsigned TypeSlot = GetTypeSlot(pType);
-  return TypeSlot != UINT_MAX &&
-         m_OpCodeProps[(unsigned)opCode].bAllowOverload[TypeSlot];
+  auto &OpProps = m_OpCodeProps[static_cast<unsigned>(opCode)];
+
+  if (OpProps.NumOverloadDims == 0)
+    return pType->isVoidTy();
+
+  // Normalize 1+ overload dimensions into array.
+  Type *Types[DXIL::kDxilMaxOloadDims] = {pType};
+  if (OpProps.NumOverloadDims > 1) {
+    StructType *ST = dyn_cast<StructType>(pType);
+    // Make sure multi-overload is well-formed.
+    if (!ST || ST->hasName() || ST->getNumElements() != OpProps.NumOverloadDims)
+      return false;
+    for (unsigned I = 0; I < ST->getNumElements(); ++I)
+      Types[I] = ST->getElementType(I);
+  }
+
+  for (unsigned I = 0; I < OpProps.NumOverloadDims; ++I) {
+    Type *Ty = Types[I];
+    unsigned TypeSlot = GetTypeSlot(Ty);
+    if (!OpProps.AllowedOverloads[I][TypeSlot])
+      return false;
+    if (TypeSlot == TS_Vector) {
+      unsigned EltTypeSlot =
+          GetTypeSlot(cast<VectorType>(Ty)->getElementType());
+      if (!OpProps.AllowedVectorElements[I][EltTypeSlot])
+        return false;
+    }
+  }
+
+  return true;
 }
 
 bool OP::CheckOpCodeTable() {
@@ -3168,41 +2862,6 @@ bool OP::IsDxilOpFunc(const llvm::Function *F) {
   return IsDxilOpFuncName(F->getName());
 }
 
-bool OP::IsDxilOpTypeName(StringRef name) {
-  return name.startswith(m_TypePrefix) || name.startswith(m_MatrixTypePrefix);
-}
-
-bool OP::IsDxilOpType(llvm::StructType *ST) {
-  if (!ST->hasName())
-    return false;
-  StringRef Name = ST->getName();
-  return IsDxilOpTypeName(Name);
-}
-
-bool OP::IsDupDxilOpType(llvm::StructType *ST) {
-  if (!ST->hasName())
-    return false;
-  StringRef Name = ST->getName();
-  if (!IsDxilOpTypeName(Name))
-    return false;
-  size_t DotPos = Name.rfind('.');
-  if (DotPos == 0 || DotPos == StringRef::npos || Name.back() == '.' ||
-      !isdigit(static_cast<unsigned char>(Name[DotPos + 1])))
-    return false;
-  return true;
-}
-
-StructType *OP::GetOriginalDxilOpType(llvm::StructType *ST, llvm::Module &M) {
-  DXASSERT(IsDupDxilOpType(ST), "else should not call GetOriginalDxilOpType");
-  StringRef Name = ST->getName();
-  size_t DotPos = Name.rfind('.');
-  StructType *OriginalST = M.getTypeByName(Name.substr(0, DotPos));
-  DXASSERT(OriginalST, "else name collison without original type");
-  DXASSERT(ST->isLayoutIdentical(OriginalST),
-           "else invalid layout for dxil types");
-  return OriginalST;
-}
-
 bool OP::IsDxilOpFuncCallInst(const llvm::Instruction *I) {
   const CallInst *CI = dyn_cast<CallInst>(I);
   if (CI == nullptr)
@@ -3292,6 +2951,12 @@ bool OP::IsDxilOpBarrier(OpCode C) {
   // OPCODE-BARRIER:END
 }
 
+bool OP::IsDxilOpExtendedOverload(OpCode C) {
+  if (C >= OpCode::NumOpCodes)
+    return false;
+  return m_OpCodeProps[static_cast<unsigned>(C)].NumOverloadDims > 1;
+}
+
 static unsigned MaskMemoryTypeFlagsIfAllowed(unsigned memoryTypeFlags,
                                              unsigned allowedMask) {
   // If the memory type is AllMemory, masking inapplicable flags is allowed.
@@ -3360,6 +3025,30 @@ bool OP::BarrierRequiresNode(const llvm::CallInst *CI) {
   }
 }
 
+bool OP::BarrierRequiresReorder(const llvm::CallInst *CI) {
+  OpCode Opcode = OP::GetDxilOpFuncCallInst(CI);
+  switch (Opcode) {
+  case OpCode::BarrierByMemoryType: {
+    DxilInst_BarrierByMemoryType Barrier(const_cast<CallInst *>(CI));
+    if (!isa<ConstantInt>(Barrier.get_SemanticFlags()))
+      return false;
+    unsigned SemanticFlags = Barrier.get_SemanticFlags_val();
+    return (SemanticFlags & static_cast<unsigned>(
+                                DXIL::BarrierSemanticFlag::ReorderScope)) != 0U;
+  }
+  case OpCode::BarrierByMemoryHandle: {
+    DxilInst_BarrierByMemoryHandle Barrier(const_cast<CallInst *>(CI));
+    if (!isa<ConstantInt>(Barrier.get_SemanticFlags()))
+      return false;
+    unsigned SemanticFlags = Barrier.get_SemanticFlags_val();
+    return (SemanticFlags & static_cast<unsigned>(
+                                DXIL::BarrierSemanticFlag::ReorderScope)) != 0U;
+  }
+  default:
+    return false;
+  }
+}
+
 DXIL::BarrierMode OP::TranslateToBarrierMode(const llvm::CallInst *CI) {
   OpCode opcode = OP::GetDxilOpFuncCallInst(CI);
   switch (opcode) {
@@ -3382,6 +3071,12 @@ DXIL::BarrierMode OP::TranslateToBarrierMode(const llvm::CallInst *CI) {
       semanticFlags = barrier.get_SemanticFlags_val();
     }
 
+    // Disallow SM6.9+ semantic flags.
+    if (semanticFlags &
+        ~static_cast<unsigned>(DXIL::BarrierSemanticFlag::LegacyFlags)) {
+      return DXIL::BarrierMode::Invalid;
+    }
+
     // Mask to legacy flags, if allowed.
     memoryTypeFlags = MaskMemoryTypeFlagsIfAllowed(
         memoryTypeFlags, (unsigned)DXIL::MemoryTypeFlag::LegacyFlags);
@@ -3744,10 +3439,38 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
     }
     return;
   }
-  // Instructions: AllocateRayQuery2=258
-  if (op == 258) {
+  // Instructions: AllocateRayQuery2=258, RawBufferVectorLoad=303,
+  // RawBufferVectorStore=304
+  if (op == 258 || (303 <= op && op <= 304)) {
+    major = 6;
+    minor = 9;
+    return;
+  }
+  // Instructions: MaybeReorderThread=268
+  if (op == 268) {
     major = 6;
     minor = 9;
+    mask = SFLAG(Library) | SFLAG(RayGeneration);
+    return;
+  }
+  // Instructions: HitObject_TraceRay=262, HitObject_FromRayQuery=263,
+  // HitObject_FromRayQueryWithAttrs=264, HitObject_MakeMiss=265,
+  // HitObject_MakeNop=266, HitObject_Invoke=267, HitObject_IsMiss=269,
+  // HitObject_IsHit=270, HitObject_IsNop=271, HitObject_RayFlags=272,
+  // HitObject_RayTMin=273, HitObject_RayTCurrent=274,
+  // HitObject_WorldRayOrigin=275, HitObject_WorldRayDirection=276,
+  // HitObject_ObjectRayOrigin=277, HitObject_ObjectRayDirection=278,
+  // HitObject_ObjectToWorld3x4=279, HitObject_WorldToObject3x4=280,
+  // HitObject_GeometryIndex=281, HitObject_InstanceIndex=282,
+  // HitObject_InstanceID=283, HitObject_PrimitiveIndex=284,
+  // HitObject_HitKind=285, HitObject_ShaderTableIndex=286,
+  // HitObject_SetShaderTableIndex=287,
+  // HitObject_LoadLocalRootTableConstant=288, HitObject_Attributes=289
+  if ((262 <= op && op <= 267) || (269 <= op && op <= 289)) {
+    major = 6;
+    minor = 9;
+    mask =
+        SFLAG(Library) | SFLAG(RayGeneration) | SFLAG(ClosestHit) | SFLAG(Miss);
     return;
   }
   // OPCODE-SMMASK:END
@@ -3794,10 +3517,17 @@ void OP::GetMinShaderModelAndMask(const llvm::CallInst *CI,
         minor = 8;
       }
     }
+    if (BarrierRequiresReorder(CI)) {
+      major = 6;
+      minor = 9;
+      mask &= SFLAG(Library) | SFLAG(RayGeneration);
+      return;
+    }
     if (BarrierRequiresNode(CI)) {
       mask &= SFLAG(Library) | SFLAG(Node);
       return;
-    } else if (BarrierRequiresGroup(CI)) {
+    }
+    if (BarrierRequiresGroup(CI)) {
       mask &= SFLAG(Library) | SFLAG(Compute) | SFLAG(Amplification) |
               SFLAG(Mesh) | SFLAG(Node);
       return;
@@ -3851,6 +3581,8 @@ OP::OP(LLVMContext &Ctx, Module *pModule)
 
   m_pHandleType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx),
                                         "dx.types.Handle", pModule);
+  m_pHitObjectType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx),
+                                           "dx.types.HitObject", pModule);
   m_pNodeHandleType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx),
                                             "dx.types.NodeHandle", pModule);
   m_pNodeRecordHandleType = GetOrCreateStructType(
@@ -3930,13 +3662,12 @@ void OP::FixOverloadNames() {
     if (F.isDeclaration() && OP::IsDxilOpFunc(&F) && !F.user_empty()) {
       CallInst *CI = cast<CallInst>(*F.user_begin());
       DXIL::OpCode opCode = OP::GetDxilOpFuncCallInst(CI);
+      if (!MayHaveNonCanonicalOverload(opCode))
+        continue;
       llvm::Type *Ty = OP::GetOverloadType(opCode, &F);
       if (!OP::IsOverloadLegal(opCode, Ty))
         continue;
-      if (!isa<StructType>(Ty) && !isa<PointerType>(Ty))
-        continue;
-
-      std::string funcName;
+      SmallVector<char, 256> funcName;
       if (OP::ConstructOverloadName(Ty, opCode, funcName)
               .compare(F.getName()) != 0)
         F.setName(funcName);
@@ -3949,11 +3680,54 @@ void OP::UpdateCache(OpCodeClass opClass, Type *Ty, llvm::Function *F) {
   m_FunctionToOpClass[F] = opClass;
 }
 
+bool OP::MayHaveNonCanonicalOverload(OpCode OC) {
+  if (OC >= OpCode::NumOpCodes)
+    return false;
+  const unsigned CheckMask = (1 << TS_UDT) | (1 << TS_Object);
+  auto &OpProps = m_OpCodeProps[static_cast<unsigned>(OC)];
+  for (unsigned I = 0; I < OpProps.NumOverloadDims; ++I)
+    if ((CheckMask & OpProps.AllowedOverloads[I].SlotMask) != 0)
+      return true;
+  return false;
+}
+
+Function *OP::GetOpFunc(OpCode OC, ArrayRef<Type *> OverloadTypes) {
+  if (OC >= OpCode::NumOpCodes)
+    return nullptr;
+  if (OverloadTypes.size() !=
+      m_OpCodeProps[static_cast<unsigned>(OC)].NumOverloadDims) {
+    llvm_unreachable("incorrect overload dimensions");
+    return nullptr;
+  }
+  if (OverloadTypes.size() == 0) {
+    return GetOpFunc(OC, Type::getVoidTy(m_Ctx));
+  } else if (OverloadTypes.size() == 1) {
+    return GetOpFunc(OC, OverloadTypes[0]);
+  }
+  return GetOpFunc(OC, GetExtendedOverloadType(OverloadTypes));
+}
+
 Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
-  if (opCode == OpCode::NumOpCodes)
+  if (opCode >= OpCode::NumOpCodes)
     return nullptr;
   if (!pOverloadType)
     return nullptr;
+
+  auto &OpProps = m_OpCodeProps[static_cast<unsigned>(opCode)];
+  if (IsDxilOpExtendedOverload(opCode)) {
+    // Make sure pOverloadType is well formed for an extended overload.
+    StructType *ST = dyn_cast<StructType>(pOverloadType);
+    DXASSERT(ST != nullptr,
+             "otherwise, extended overload type is not a struct");
+    if (ST == nullptr)
+      return nullptr;
+    bool EltCountValid = ST->getNumElements() == OpProps.NumOverloadDims;
+    DXASSERT(EltCountValid,
+             "otherwise, incorrect type count for extended overload.");
+    if (!EltCountValid)
+      return nullptr;
+  }
+
   // Illegal overloads are generated and eliminated by DXIL op constant
   // evaluation for a number of cases where a double overload of an HL intrinsic
   // that otherwise does not support double is used for literal values, when
@@ -3961,7 +3735,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
   // Illegal overloads of DXIL intrinsics may survive through to final DXIL,
   // but these will be caught by the validator, and this is not a regression.
 
-  OpCodeClass opClass = m_OpCodeProps[(unsigned)opCode].opCodeClass;
+  OpCodeClass opClass = OpProps.opCodeClass;
   Function *&F =
       m_OpCodeClassCache[(unsigned)opClass].pOverloads[pOverloadType];
   if (F != nullptr) {
@@ -3969,7 +3743,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     return F;
   }
 
-  vector<Type *> ArgTypes; // RetType is ArgTypes[0]
+  SmallVector<Type *, 32> ArgTypes; // RetType is ArgTypes[0]
   Type *pETy = pOverloadType;
   Type *pRes = GetHandleType();
   Type *pNodeHandle = GetNodeHandleType();
@@ -3993,6 +3767,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
   Type *pF64 = Type::getDoubleTy(m_Ctx);
   Type *pSDT = GetSplitDoubleType(); // Split double type.
   Type *p4I32 = GetFourI32Type();    // 4 i32s in a struct.
+  Type *pHit = GetHitObjectType();
 
   Type *udt = pOverloadType;
   Type *obj = pOverloadType;
@@ -4004,7 +3779,10 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
 #define A(_x) ArgTypes.emplace_back(_x)
 #define RRT(_y) A(GetResRetType(_y))
 #define CBRT(_y) A(GetCBufferRetType(_y))
-#define VEC4(_y) A(GetVectorType(4, _y))
+#define VEC4(_y) A(GetStructVectorType(4, _y))
+
+// Extended Overload types are wrapped in an anonymous struct
+#define EXT(_y) A(cast<StructType>(pOverloadType)->getElementType(_y))
 
   /* <py::lines('OPCODE-OLOAD-FUNCS')>hctdb_instrhelp.get_oloads_funcs()</py>*/
   switch (opCode) { // return     opCode
@@ -5859,118 +5637,188 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(pV);
     A(pI32);
     break;
-  case OpCode::ReservedB0:
-    A(pV);
+
+    // Shader Execution Reordering
+  case OpCode::HitObject_TraceRay:
+    A(pHit);
+    A(pI32);
+    A(pRes);
     A(pI32);
+    A(pI32);
+    A(pI32);
+    A(pI32);
+    A(pI32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(udt);
     break;
-  case OpCode::ReservedB1:
-    A(pV);
+  case OpCode::HitObject_FromRayQuery:
+    A(pHit);
+    A(pI32);
     A(pI32);
     break;
-  case OpCode::ReservedB2:
-    A(pV);
+  case OpCode::HitObject_FromRayQueryWithAttrs:
+    A(pHit);
+    A(pI32);
     A(pI32);
+    A(pI32);
+    A(udt);
     break;
-  case OpCode::ReservedB3:
-    A(pV);
+  case OpCode::HitObject_MakeMiss:
+    A(pHit);
     A(pI32);
+    A(pI32);
+    A(pI32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
+    A(pF32);
     break;
-  case OpCode::ReservedB4:
-    A(pV);
+  case OpCode::HitObject_MakeNop:
+    A(pHit);
     A(pI32);
     break;
-  case OpCode::ReservedB5:
+  case OpCode::HitObject_Invoke:
     A(pV);
     A(pI32);
+    A(pHit);
+    A(udt);
     break;
-  case OpCode::ReservedB6:
+  case OpCode::MaybeReorderThread:
     A(pV);
     A(pI32);
+    A(pHit);
+    A(pI32);
+    A(pI32);
     break;
-  case OpCode::ReservedB7:
-    A(pV);
+  case OpCode::HitObject_IsMiss:
+    A(pI1);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB8:
-    A(pV);
+  case OpCode::HitObject_IsHit:
+    A(pI1);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB9:
-    A(pV);
+  case OpCode::HitObject_IsNop:
+    A(pI1);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB10:
-    A(pV);
+  case OpCode::HitObject_RayFlags:
+    A(pI32);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB11:
-    A(pV);
+  case OpCode::HitObject_RayTMin:
+    A(pF32);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB12:
-    A(pV);
+  case OpCode::HitObject_RayTCurrent:
+    A(pF32);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB13:
-    A(pV);
+  case OpCode::HitObject_WorldRayOrigin:
+    A(pF32);
+    A(pI32);
+    A(pHit);
     A(pI32);
     break;
-  case OpCode::ReservedB14:
-    A(pV);
+  case OpCode::HitObject_WorldRayDirection:
+    A(pF32);
+    A(pI32);
+    A(pHit);
     A(pI32);
     break;
-  case OpCode::ReservedB15:
-    A(pV);
+  case OpCode::HitObject_ObjectRayOrigin:
+    A(pF32);
+    A(pI32);
+    A(pHit);
     A(pI32);
     break;
-  case OpCode::ReservedB16:
-    A(pV);
+  case OpCode::HitObject_ObjectRayDirection:
+    A(pF32);
+    A(pI32);
+    A(pHit);
     A(pI32);
     break;
-  case OpCode::ReservedB17:
-    A(pV);
+  case OpCode::HitObject_ObjectToWorld3x4:
+    A(pF32);
+    A(pI32);
+    A(pHit);
+    A(pI32);
     A(pI32);
     break;
-  case OpCode::ReservedB18:
-    A(pV);
+  case OpCode::HitObject_WorldToObject3x4:
+    A(pF32);
+    A(pI32);
+    A(pHit);
+    A(pI32);
     A(pI32);
     break;
-  case OpCode::ReservedB19:
-    A(pV);
+  case OpCode::HitObject_GeometryIndex:
+    A(pI32);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB20:
-    A(pV);
+  case OpCode::HitObject_InstanceIndex:
     A(pI32);
+    A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB21:
-    A(pV);
+  case OpCode::HitObject_InstanceID:
+    A(pI32);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB22:
-    A(pV);
+  case OpCode::HitObject_PrimitiveIndex:
+    A(pI32);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB23:
-    A(pV);
+  case OpCode::HitObject_HitKind:
+    A(pI32);
     A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB24:
-    A(pV);
+  case OpCode::HitObject_ShaderTableIndex:
     A(pI32);
+    A(pI32);
+    A(pHit);
     break;
-  case OpCode::ReservedB25:
-    A(pV);
+  case OpCode::HitObject_SetShaderTableIndex:
+    A(pHit);
+    A(pI32);
+    A(pHit);
     A(pI32);
     break;
-  case OpCode::ReservedB26:
-    A(pV);
+  case OpCode::HitObject_LoadLocalRootTableConstant:
+    A(pI32);
+    A(pI32);
+    A(pHit);
     A(pI32);
     break;
-  case OpCode::ReservedB27:
+  case OpCode::HitObject_Attributes:
     A(pV);
     A(pI32);
+    A(pHit);
+    A(udt);
     break;
+
+    //
   case OpCode::ReservedB28:
     A(pV);
     A(pI32);
@@ -6023,6 +5871,25 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(pV);
     A(pI32);
     break;
+
+    // Resources
+  case OpCode::RawBufferVectorLoad:
+    RRT(pETy);
+    A(pI32);
+    A(pRes);
+    A(pI32);
+    A(pI32);
+    A(pI32);
+    break;
+  case OpCode::RawBufferVectorStore:
+    A(pV);
+    A(pI32);
+    A(pRes);
+    A(pI32);
+    A(pI32);
+    A(pETy);
+    A(pI32);
+    break;
   // OPCODE-OLOAD-FUNCS:END
   default:
     DXASSERT(false, "otherwise unhandled case");
@@ -6036,14 +5903,15 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
   pFT = FunctionType::get(
       ArgTypes[0], ArrayRef<Type *>(&ArgTypes[1], ArgTypes.size() - 1), false);
 
-  std::string funcName;
-  ConstructOverloadName(pOverloadType, opCode, funcName);
+  SmallVector<char, 256> FuncStorage;
+  StringRef FuncName =
+      ConstructOverloadName(pOverloadType, opCode, FuncStorage);
 
   // Try to find existing function with the same name in the module.
   // This needs to happen after the switch statement that constructs arguments
   // and return values to ensure that ResRetType is constructed in the
   // RefreshCache case.
-  if (Function *existF = m_pModule->getFunction(funcName)) {
+  if (Function *existF = m_pModule->getFunction(FuncName)) {
     if (existF->getFunctionType() != pFT)
       return nullptr;
     F = existF;
@@ -6051,13 +5919,13 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     return F;
   }
 
-  F = cast<Function>(m_pModule->getOrInsertFunction(funcName, pFT));
+  F = cast<Function>(m_pModule->getOrInsertFunction(FuncName, pFT));
 
   UpdateCache(opClass, pOverloadType, F);
   F->setCallingConv(CallingConv::C);
   F->addFnAttr(Attribute::NoUnwind);
-  if (m_OpCodeProps[(unsigned)opCode].FuncAttr != Attribute::None)
-    F->addFnAttr(m_OpCodeProps[(unsigned)opCode].FuncAttr);
+  if (OpProps.FuncAttr != Attribute::None)
+    F->addFnAttr(OpProps.FuncAttr);
 
   return F;
 }
@@ -6160,6 +6028,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::TempRegStore:
   case OpCode::CallShader:
   case OpCode::Pack4x8:
+  case OpCode::HitObject_Invoke:
+  case OpCode::HitObject_Attributes:
     if (FT->getNumParams() <= 2)
       return nullptr;
     return FT->getParamType(2);
@@ -6171,6 +6041,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::StoreVertexOutput:
   case OpCode::StorePrimitiveOutput:
   case OpCode::DispatchMesh:
+  case OpCode::RawBufferVectorStore:
     if (FT->getNumParams() <= 4)
       return nullptr;
     return FT->getParamType(4);
@@ -6199,10 +6070,12 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
       return nullptr;
     return FT->getParamType(5);
   case OpCode::TraceRay:
+  case OpCode::HitObject_TraceRay:
     if (FT->getNumParams() <= 15)
       return nullptr;
     return FT->getParamType(15);
   case OpCode::ReportHit:
+  case OpCode::HitObject_FromRayQueryWithAttrs:
     if (FT->getNumParams() <= 3)
       return nullptr;
     return FT->getParamType(3);
@@ -6285,34 +6158,12 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::ReservedA0:
   case OpCode::ReservedA1:
   case OpCode::ReservedA2:
-  case OpCode::ReservedB0:
-  case OpCode::ReservedB1:
-  case OpCode::ReservedB2:
-  case OpCode::ReservedB3:
-  case OpCode::ReservedB4:
-  case OpCode::ReservedB5:
-  case OpCode::ReservedB6:
-  case OpCode::ReservedB7:
-  case OpCode::ReservedB8:
-  case OpCode::ReservedB9:
-  case OpCode::ReservedB10:
-  case OpCode::ReservedB11:
-  case OpCode::ReservedB12:
-  case OpCode::ReservedB13:
-  case OpCode::ReservedB14:
-  case OpCode::ReservedB15:
-  case OpCode::ReservedB16:
-  case OpCode::ReservedB17:
-  case OpCode::ReservedB18:
-  case OpCode::ReservedB19:
-  case OpCode::ReservedB20:
-  case OpCode::ReservedB21:
-  case OpCode::ReservedB22:
-  case OpCode::ReservedB23:
-  case OpCode::ReservedB24:
-  case OpCode::ReservedB25:
-  case OpCode::ReservedB26:
-  case OpCode::ReservedB27:
+  case OpCode::HitObject_FromRayQuery:
+  case OpCode::HitObject_MakeMiss:
+  case OpCode::HitObject_MakeNop:
+  case OpCode::MaybeReorderThread:
+  case OpCode::HitObject_SetShaderTableIndex:
+  case OpCode::HitObject_LoadLocalRootTableConstant:
   case OpCode::ReservedB28:
   case OpCode::ReservedB29:
   case OpCode::ReservedB30:
@@ -6364,6 +6215,13 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::RayQuery_CommittedInstanceContributionToHitGroupIndex:
   case OpCode::StartVertexLocation:
   case OpCode::StartInstanceLocation:
+  case OpCode::HitObject_RayFlags:
+  case OpCode::HitObject_GeometryIndex:
+  case OpCode::HitObject_InstanceIndex:
+  case OpCode::HitObject_InstanceID:
+  case OpCode::HitObject_PrimitiveIndex:
+  case OpCode::HitObject_HitKind:
+  case OpCode::HitObject_ShaderTableIndex:
     return IntegerType::get(Ctx, 32);
   case OpCode::CalculateLOD:
   case OpCode::DomainLocation:
@@ -6390,6 +6248,14 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::RayQuery_CandidateObjectRayDirection:
   case OpCode::RayQuery_CommittedObjectRayOrigin:
   case OpCode::RayQuery_CommittedObjectRayDirection:
+  case OpCode::HitObject_RayTMin:
+  case OpCode::HitObject_RayTCurrent:
+  case OpCode::HitObject_WorldRayOrigin:
+  case OpCode::HitObject_WorldRayDirection:
+  case OpCode::HitObject_ObjectRayOrigin:
+  case OpCode::HitObject_ObjectRayDirection:
+  case OpCode::HitObject_ObjectToWorld3x4:
+  case OpCode::HitObject_WorldToObject3x4:
     return Type::getFloatTy(Ctx);
   case OpCode::MakeDouble:
   case OpCode::SplitDouble:
@@ -6400,6 +6266,9 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::RayQuery_CommittedTriangleFrontFace:
   case OpCode::IsHelperLane:
   case OpCode::QuadVote:
+  case OpCode::HitObject_IsMiss:
+  case OpCode::HitObject_IsHit:
+  case OpCode::HitObject_IsNop:
     return IntegerType::get(Ctx, 1);
   case OpCode::CBufferLoadLegacy:
   case OpCode::Sample:
@@ -6417,7 +6286,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::TextureGatherRaw:
   case OpCode::SampleCmpLevel:
   case OpCode::SampleCmpGrad:
-  case OpCode::SampleCmpBias: {
+  case OpCode::SampleCmpBias:
+  case OpCode::RawBufferVectorLoad: {
     StructType *ST = cast<StructType>(Ty);
     return ST->getElementType(0);
   }
@@ -6431,6 +6301,8 @@ Type *OP::GetHandleType() const { return m_pHandleType; }
 
 Type *OP::GetNodeHandleType() const { return m_pNodeHandleType; }
 
+Type *OP::GetHitObjectType() const { return m_pHitObjectType; }
+
 Type *OP::GetNodeRecordHandleType() const { return m_pNodeRecordHandleType; }
 
 Type *OP::GetResourcePropertiesType() const {
@@ -6462,62 +6334,91 @@ Type *OP::GetFourI32Type() const { return m_pFourI32Type; }
 Type *OP::GetFourI16Type() const { return m_pFourI16Type; }
 
 bool OP::IsResRetType(llvm::Type *Ty) {
+  if (!Ty->isStructTy())
+    return false;
   for (Type *ResTy : m_pResRetType) {
     if (Ty == ResTy)
       return true;
   }
-  return false;
+  // Check for vector overload which isn't cached in m_pResRetType.
+  StructType *ST = cast<StructType>(Ty);
+  if (!ST->hasName() || ST->getNumElements() < 2 ||
+      !ST->getElementType(0)->isVectorTy())
+    return false;
+  return Ty == GetResRetType(ST->getElementType(0));
 }
 
 Type *OP::GetResRetType(Type *pOverloadType) {
   unsigned TypeSlot = GetTypeSlot(pOverloadType);
 
-  if (m_pResRetType[TypeSlot] == nullptr) {
-    string TypeName("dx.types.ResRet.");
-    TypeName += GetOverloadTypeName(TypeSlot);
-    Type *FieldTypes[5] = {pOverloadType, pOverloadType, pOverloadType,
-                           pOverloadType, Type::getInt32Ty(m_Ctx)};
-    m_pResRetType[TypeSlot] =
-        GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
+  if (TypeSlot < TS_BasicCount) {
+    if (m_pResRetType[TypeSlot] == nullptr) {
+      SmallVector<char, 32> Storage;
+      StringRef TypeName =
+          (Twine("dx.types.ResRet.") + Twine(GetOverloadTypeName(TypeSlot)))
+              .toStringRef(Storage);
+      Type *FieldTypes[5] = {pOverloadType, pOverloadType, pOverloadType,
+                             pOverloadType, Type::getInt32Ty(m_Ctx)};
+      m_pResRetType[TypeSlot] =
+          GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
+    }
+    return m_pResRetType[TypeSlot];
+  } else if (TypeSlot == TS_Vector) {
+    SmallVector<char, 32> Storage;
+    VectorType *VecTy = cast<VectorType>(pOverloadType);
+    StringRef TypeName =
+        (Twine("dx.types.ResRet.v") + Twine(VecTy->getNumElements()) +
+         Twine(GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType()))))
+            .toStringRef(Storage);
+    Type *FieldTypes[2] = {pOverloadType, Type::getInt32Ty(m_Ctx)};
+    return GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
   }
 
-  return m_pResRetType[TypeSlot];
+  llvm_unreachable("Invalid overload for GetResRetType");
+  return nullptr;
 }
 
 Type *OP::GetCBufferRetType(Type *pOverloadType) {
   unsigned TypeSlot = GetTypeSlot(pOverloadType);
 
+  if (TypeSlot >= TS_BasicCount) {
+    llvm_unreachable("Invalid overload for GetResRetType");
+    return nullptr;
+  }
+
   if (m_pCBufferRetType[TypeSlot] == nullptr) {
     DXASSERT(m_LowPrecisionMode != DXIL::LowPrecisionMode::Undefined,
              "m_LowPrecisionMode must be set before constructing type.");
-    string TypeName("dx.types.CBufRet.");
-    TypeName += GetOverloadTypeName(TypeSlot);
+    SmallVector<char, 32> Storage;
+    raw_svector_ostream OS(Storage);
+    OS << "dx.types.CBufRet.";
+    OS << GetOverloadTypeName(TypeSlot);
     Type *i64Ty = Type::getInt64Ty(pOverloadType->getContext());
     Type *i16Ty = Type::getInt16Ty(pOverloadType->getContext());
     if (pOverloadType->isDoubleTy() || pOverloadType == i64Ty) {
       Type *FieldTypes[2] = {pOverloadType, pOverloadType};
       m_pCBufferRetType[TypeSlot] =
-          GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
+          GetOrCreateStructType(m_Ctx, FieldTypes, OS.str(), m_pModule);
     } else if (!UseMinPrecision() &&
                (pOverloadType->isHalfTy() || pOverloadType == i16Ty)) {
-      TypeName += ".8"; // dx.types.CBufRet.fp16.8 for buffer of 8 halves
+      OS << ".8"; // dx.types.CBufRet.f16.8 for buffer of 8 halves
       Type *FieldTypes[8] = {
           pOverloadType, pOverloadType, pOverloadType, pOverloadType,
           pOverloadType, pOverloadType, pOverloadType, pOverloadType,
       };
       m_pCBufferRetType[TypeSlot] =
-          GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
+          GetOrCreateStructType(m_Ctx, FieldTypes, OS.str(), m_pModule);
     } else {
       Type *FieldTypes[4] = {pOverloadType, pOverloadType, pOverloadType,
                              pOverloadType};
       m_pCBufferRetType[TypeSlot] =
-          GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule);
+          GetOrCreateStructType(m_Ctx, FieldTypes, OS.str(), m_pModule);
     }
   }
   return m_pCBufferRetType[TypeSlot];
 }
 
-Type *OP::GetVectorType(unsigned numElements, Type *pOverloadType) {
+Type *OP::GetStructVectorType(unsigned numElements, Type *pOverloadType) {
   if (numElements == 4) {
     if (pOverloadType == Type::getInt32Ty(pOverloadType->getContext())) {
       return m_pFourI32Type;
@@ -6529,6 +6430,10 @@ Type *OP::GetVectorType(unsigned numElements, Type *pOverloadType) {
   return nullptr;
 }
 
+StructType *OP::GetExtendedOverloadType(ArrayRef<Type *> OverloadTypes) {
+  return StructType::get(m_Ctx, OverloadTypes);
+}
+
 //------------------------------------------------------------------------------
 //
 //  LLVM utility methods.
diff --git a/lib/DXIL/DxilResource.cpp b/lib/DXIL/DxilResource.cpp
index 3ab71030bb..0e6f1df877 100644
--- a/lib/DXIL/DxilResource.cpp
+++ b/lib/DXIL/DxilResource.cpp
@@ -25,8 +25,8 @@ namespace hlsl {
 DxilResource::DxilResource()
     : DxilResourceBase(DxilResourceBase::Class::Invalid), m_SampleCount(0),
       m_ElementStride(0), m_SamplerFeedbackType((DXIL::SamplerFeedbackType)0),
-      m_bGloballyCoherent(false), m_bHasCounter(false), m_bROV(false),
-      m_bHasAtomic64Use(false) {}
+      m_bGloballyCoherent(false), m_bReorderCoherent(false),
+      m_bHasCounter(false), m_bROV(false), m_bHasAtomic64Use(false) {}
 
 CompType DxilResource::GetCompType() const { return m_CompType; }
 
@@ -74,6 +74,10 @@ bool DxilResource::IsGloballyCoherent() const { return m_bGloballyCoherent; }
 
 void DxilResource::SetGloballyCoherent(bool b) { m_bGloballyCoherent = b; }
 
+bool DxilResource::IsReorderCoherent() const { return m_bReorderCoherent; }
+
+void DxilResource::SetReorderCoherent(bool b) { m_bReorderCoherent = b; }
+
 bool DxilResource::HasCounter() const { return m_bHasCounter; }
 
 void DxilResource::SetHasCounter(bool b) { m_bHasCounter = b; }
diff --git a/lib/DXIL/DxilResourceProperties.cpp b/lib/DXIL/DxilResourceProperties.cpp
index 2d1bf95014..54ab24f36e 100644
--- a/lib/DXIL/DxilResourceProperties.cpp
+++ b/lib/DXIL/DxilResourceProperties.cpp
@@ -190,6 +190,7 @@ DxilResourceProperties loadPropsFromResourceBase(const DxilResourceBase *Res) {
     RP.Basic.IsUAV = true;
     RP.Basic.ResourceKind = (uint8_t)Res->GetKind();
     RP.Basic.IsGloballyCoherent = UAV->IsGloballyCoherent();
+    RP.Basic.IsReorderCoherent = UAV->IsReorderCoherent();
     RP.Basic.SamplerCmpOrHasCounter = UAV->HasCounter();
     RP.Basic.IsROV = UAV->IsROV();
     SetResProperties(*UAV);
@@ -234,6 +235,8 @@ DxilResourceProperties tryMergeProps(DxilResourceProperties curProps,
         prevProps.Basic.IsGloballyCoherent) {
       curProps.Basic.IsGloballyCoherent = prevProps.Basic.IsGloballyCoherent;
     }
+    if (curProps.Basic.IsReorderCoherent != prevProps.Basic.IsReorderCoherent)
+      curProps.Basic.IsReorderCoherent = prevProps.Basic.IsReorderCoherent;
   }
 
   if (curProps.Basic.ResourceKind == (uint8_t)DXIL::ResourceKind::CBuffer) {
diff --git a/lib/DXIL/DxilShaderFlags.cpp b/lib/DXIL/DxilShaderFlags.cpp
index 7d0799dc64..993038aaf1 100644
--- a/lib/DXIL/DxilShaderFlags.cpp
+++ b/lib/DXIL/DxilShaderFlags.cpp
@@ -637,6 +637,7 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
           hasViewID = true;
           break;
         case DXIL::OpCode::AllocateRayQuery:
+        case DXIL::OpCode::AllocateRayQuery2:
         case DXIL::OpCode::GeometryIndex:
           hasRaytracingTier1_1 = true;
           break;
diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp
index 757a0bc3ee..966c2e189c 100644
--- a/lib/DXIL/DxilUtil.cpp
+++ b/lib/DXIL/DxilUtil.cpp
@@ -426,35 +426,37 @@ GetHLSLResourceProperties(llvm::Type *Ty) {
                                             false, false, false));
 
     if (name == "SamplerComparisonState")
-      return RetType(
-          true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Sampler, false,
-                                       false, /*cmp or counter*/ true));
+      return RetType(true, MakeResourceProperties(
+                               hlsl::DXIL::ResourceKind::Sampler, /*UAV*/ false,
+                               /*ROV*/ false, /*cmp or counter*/ true));
 
     if (name.startswith("AppendStructuredBuffer<"))
-      return RetType(true, MakeResourceProperties(
-                               hlsl::DXIL::ResourceKind::StructuredBuffer,
-                               false, false, /*cmp or counter*/ true));
+      return RetType(true,
+                     MakeResourceProperties(
+                         hlsl::DXIL::ResourceKind::StructuredBuffer,
+                         /*UAV*/ true, /*ROV*/ false, /*cmp or counter*/ true));
 
     if (name.startswith("ConsumeStructuredBuffer<"))
       return RetType(true, MakeResourceProperties(
                                hlsl::DXIL::ResourceKind::StructuredBuffer,
-                               false, false, /*cmp or counter*/ true));
+                               /*UAV*/ true, /*ROV*/ false,
+                               /*cmp or counter*/ true));
 
     if (name == "RaytracingAccelerationStructure")
       return RetType(true,
                      MakeResourceProperties(
                          hlsl::DXIL::ResourceKind::RTAccelerationStructure,
-                         false, false, false));
+                         /*UAV*/ false, /*ROV*/ false, false));
 
     if (name.startswith("ConstantBuffer<"))
-      return RetType(true,
-                     MakeResourceProperties(hlsl::DXIL::ResourceKind::CBuffer,
-                                            false, false, false));
+      return RetType(
+          true, MakeResourceProperties(hlsl::DXIL::ResourceKind::CBuffer,
+                                       /*UAV*/ false, /*ROV*/ false, false));
 
     if (name.startswith("TextureBuffer<"))
-      return RetType(true,
-                     MakeResourceProperties(hlsl::DXIL::ResourceKind::TBuffer,
-                                            false, false, false));
+      return RetType(
+          true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TBuffer,
+                                       /*UAV*/ false, /*ROV*/ false, false));
 
     if (ConsumePrefix(name, "FeedbackTexture2D")) {
       hlsl::DXIL::ResourceKind kind = hlsl::DXIL::ResourceKind::Invalid;
@@ -464,7 +466,9 @@ GetHLSLResourceProperties(llvm::Type *Ty) {
         kind = hlsl::DXIL::ResourceKind::FeedbackTexture2D;
 
       if (name.startswith("<"))
-        return RetType(true, MakeResourceProperties(kind, false, false, false));
+        return RetType(true,
+                       MakeResourceProperties(kind, /*UAV*/ false,
+                                              /*ROV*/ false, /*Cmp*/ false));
 
       return FalseRet;
     }
@@ -475,63 +479,63 @@ GetHLSLResourceProperties(llvm::Type *Ty) {
     if (name == "ByteAddressBuffer")
       return RetType(true,
                      MakeResourceProperties(hlsl::DXIL::ResourceKind::RawBuffer,
-                                            UAV, ROV, false));
+                                            UAV, ROV, /*Cmp*/ false));
 
     if (name.startswith("Buffer<"))
       return RetType(
           true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TypedBuffer,
-                                       UAV, ROV, false));
+                                       UAV, ROV, /*Cmp*/ false));
 
     if (name.startswith("StructuredBuffer<"))
       return RetType(true, MakeResourceProperties(
                                hlsl::DXIL::ResourceKind::StructuredBuffer, UAV,
-                               ROV, false));
+                               ROV, /*Cmp*/ false));
 
     if (ConsumePrefix(name, "Texture")) {
       if (name.startswith("1D<"))
         return RetType(
             true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture1D,
-                                         UAV, ROV, false));
+                                         UAV, ROV, /*Cmp*/ false));
 
       if (name.startswith("1DArray<"))
         return RetType(true, MakeResourceProperties(
                                  hlsl::DXIL::ResourceKind::Texture1DArray, UAV,
-                                 ROV, false));
+                                 ROV, /*Cmp*/ false));
 
       if (name.startswith("2D<"))
         return RetType(
             true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture2D,
-                                         UAV, ROV, false));
+                                         UAV, ROV, /*Cmp*/ false));
 
       if (name.startswith("2DArray<"))
         return RetType(true, MakeResourceProperties(
                                  hlsl::DXIL::ResourceKind::Texture2DArray, UAV,
-                                 ROV, false));
+                                 ROV, /*Cmp*/ false));
 
       if (name.startswith("3D<"))
         return RetType(
             true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture3D,
-                                         UAV, ROV, false));
+                                         UAV, ROV, /*Cmp*/ false));
 
       if (name.startswith("Cube<"))
         return RetType(
             true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TextureCube,
-                                         UAV, ROV, false));
+                                         UAV, ROV, /*Cmp*/ false));
 
       if (name.startswith("CubeArray<"))
         return RetType(true, MakeResourceProperties(
                                  hlsl::DXIL::ResourceKind::TextureCubeArray,
-                                 UAV, ROV, false));
+                                 UAV, ROV, /*Cmp*/ false));
 
       if (name.startswith("2DMS<"))
         return RetType(
             true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture2DMS,
-                                         UAV, ROV, false));
+                                         UAV, ROV, /*Cmp*/ false));
 
       if (name.startswith("2DMSArray<"))
         return RetType(true, MakeResourceProperties(
                                  hlsl::DXIL::ResourceKind::Texture2DMSArray,
-                                 UAV, ROV, false));
+                                 UAV, ROV, /*Cmp*/ false));
       return FalseRet;
     }
   }
@@ -570,6 +574,9 @@ bool IsHLSLObjectType(llvm::Type *Ty) {
 
     if (IsHLSLNodeIOType(Ty))
       return true;
+
+    if (IsHLSLHitObjectType(Ty))
+      return true;
   }
   return false;
 }
@@ -587,6 +594,24 @@ bool IsHLSLRayQueryType(llvm::Type *Ty) {
   return false;
 }
 
+llvm::Type *GetHLSLHitObjectType(llvm::Module *M) {
+  using namespace llvm;
+  StructType *HitObjectTy = M->getTypeByName("dx.types.HitObject");
+  if (!HitObjectTy)
+    HitObjectTy = StructType::create({Type::getInt8PtrTy(M->getContext(), 0)},
+                                     "dx.types.HitObject", false);
+  return HitObjectTy;
+}
+
+bool IsHLSLHitObjectType(llvm::Type *Ty) {
+  llvm::StructType *ST = dyn_cast<llvm::StructType>(Ty);
+  if (!ST)
+    return false;
+  if (!ST->hasName())
+    return false;
+  return ST->getName() == "dx.types.HitObject";
+}
+
 bool IsHLSLResourceDescType(llvm::Type *Ty) {
   if (llvm::StructType *ST = dyn_cast<llvm::StructType>(Ty)) {
     if (!ST->hasName())
@@ -1390,5 +1415,18 @@ bool DeleteDeadAllocas(llvm::Function &F) {
   return Changed;
 }
 
+// Retrieve dxil version in the given module.
+// Where the module doesn't already have a Dxil module,
+// it identifies and returns the version info from the metatdata.
+// Returns false where none of that works, but that shouldn't happen much.
+bool LoadDxilVersion(const Module *M, unsigned &Major, unsigned &Minor) {
+  if (M->HasDxilModule()) {
+    M->GetDxilModule().GetShaderModel()->GetDxilVersion(Major, Minor);
+    return true;
+  }
+  // No module, try metadata.
+  return DxilMDHelper::LoadDxilVersion(M, Major, Minor);
+}
+
 } // namespace dxilutil
 } // namespace hlsl
diff --git a/lib/DxcSupport/HLSLOptions.cpp b/lib/DxcSupport/HLSLOptions.cpp
index 3daf880f6d..1ce7d0dfc0 100644
--- a/lib/DxcSupport/HLSLOptions.cpp
+++ b/lib/DxcSupport/HLSLOptions.cpp
@@ -1089,6 +1089,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 
   addDiagnosticArgs(Args, OPT_W_Group, OPT_W_value_Group, opts.Warnings);
 
+  opts.GenMetal = Args.hasFlag(OPT_metal, OPT_INVALID, false);
+
   // SPIRV Change Starts
 #ifdef ENABLE_SPIRV_CODEGEN
   opts.GenSPIRV = Args.hasFlag(OPT_spirv, OPT_INVALID, false);
@@ -1313,6 +1315,21 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 #endif // ENABLE_SPIRV_CODEGEN
   // SPIRV Change Ends
 
+#ifndef ENABLE_METAL_CODEGEN
+  if (opts.GenMetal) {
+    errors << "Metal CodeGen not available. "
+              "Please rebuild with Metal IR Converter installed.";
+    return 1;
+  }
+#endif
+
+  if (opts.GenMetal) {
+    if (!opts.AssemblyCode.empty() || opts.OutputObject.empty()) {
+      errors << "Disassembly of Metal IR not supported (yet).";
+      return 1;
+    }
+  }
+
   // Validation for DebugInfo here because spirv uses same DebugInfo opt,
   // and legacy wrappers will add EmbedDebug in this case, leading to this
   // failing if placed before spirv path sets DebugInfo to true.
diff --git a/lib/DxilContainer/DxilContainerAssembler.cpp b/lib/DxilContainer/DxilContainerAssembler.cpp
index 0b7f5dd467..48d8872733 100644
--- a/lib/DxilContainer/DxilContainerAssembler.cpp
+++ b/lib/DxilContainer/DxilContainerAssembler.cpp
@@ -37,6 +37,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/MD5.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include <algorithm>
 #include <assert.h> // Needed for DxilPipelineStateValidation.h
@@ -1056,6 +1057,9 @@ class DxilRDATWriter : public DxilPartWriter {
       if (pRes->IsGloballyCoherent())
         info.Flags |=
             static_cast<uint32_t>(RDAT::DxilResourceFlag::UAVGloballyCoherent);
+      if (pRes->IsReorderCoherent())
+        info.Flags |=
+            static_cast<uint32_t>(RDAT::DxilResourceFlag::UAVReorderCoherent);
       if (pRes->IsROV())
         info.Flags |= static_cast<uint32_t>(
             RDAT::DxilResourceFlag::UAVRasterizerOrderedView);
@@ -1895,6 +1899,7 @@ void hlsl::SerializeDxilContainerForModule(
     DxilShaderHash *pShaderHashOut, AbstractMemoryStream *pReflectionStreamOut,
     AbstractMemoryStream *pRootSigStreamOut, void *pPrivateData,
     size_t PrivateDataSize) {
+  llvm::TimeTraceScope TimeScope("SerializeDxilContainer", StringRef(""));
   // TODO: add a flag to update the module and remove information that is not
   // part of DXIL proper and is used only to assemble the container.
 
diff --git a/lib/DxilPIXPasses/CMakeLists.txt b/lib/DxilPIXPasses/CMakeLists.txt
index c36d11d559..67e77f17cd 100644
--- a/lib/DxilPIXPasses/CMakeLists.txt
+++ b/lib/DxilPIXPasses/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_library(LLVMDxilPIXPasses
   PixPassHelpers.cpp
   DxilPIXAddTidToAmplificationShaderPayload.cpp
   DxilPIXDXRInvocationsLog.cpp
+  DxilNonUniformResourceIndexInstrumentation.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/IR
diff --git a/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp b/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp
new file mode 100644
index 0000000000..a442bfabed
--- /dev/null
+++ b/lib/DxilPIXPasses/DxilNonUniformResourceIndexInstrumentation.cpp
@@ -0,0 +1,173 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilNonUniformResourceIndexInstrumentation.cpp                            //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides a pass to add instrumentation to determine missing usage of the  //
+// NonUniformResourceIndex qualifier when dynamically indexing resources.    //
+// Used by PIX.                                                              //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "PixPassHelpers.h"
+#include "dxc/DXIL/DxilInstructions.h"
+#include "dxc/DxilPIXPasses/DxilPIXPasses.h"
+#include "dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h"
+#include "dxc/Support/Global.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+using namespace hlsl;
+
+class DxilNonUniformResourceIndexInstrumentation : public ModulePass {
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilNonUniformResourceIndexInstrumentation() : ModulePass(ID) {}
+  StringRef getPassName() const override {
+    return "DXIL NonUniformResourceIndex Instrumentation";
+  }
+  bool runOnModule(Module &M) override;
+};
+
+bool DxilNonUniformResourceIndexInstrumentation::runOnModule(Module &M) {
+  // This pass adds instrumentation for incorrect NonUniformResourceIndex usage
+
+  DxilModule &DM = M.GetOrCreateDxilModule();
+  LLVMContext &Ctx = M.getContext();
+  OP *HlslOP = DM.GetOP();
+
+  hlsl::DxilResource *PixUAVResource = nullptr;
+
+  UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(Ctx));
+
+  // Use WaveActiveAllEqual to check if a dynamic index is uniform
+  Function *WaveActiveAllEqualFunc = HlslOP->GetOpFunc(
+      DXIL::OpCode::WaveActiveAllEqual, Type::getInt32Ty(Ctx));
+  Constant *WaveActiveAllEqualOpCode =
+      HlslOP->GetI32Const((int32_t)DXIL::OpCode::WaveActiveAllEqual);
+
+  // Atomic operation to use for writing to the result uav resource
+  Function *AtomicOpFunc =
+      HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(Ctx));
+  Constant *AtomicBinOpcode =
+      HlslOP->GetU32Const((uint32_t)OP::OpCode::AtomicBinOp);
+  Constant *AtomicOr = HlslOP->GetU32Const((uint32_t)DXIL::AtomicBinOpCode::Or);
+
+  std::map<Function *, CallInst *> FunctionToUAVHandle;
+
+  // This is the main pass that will iterate through all of the resources that
+  // are dynamically indexed. If not already marked NonUniformResourceIndex,
+  // then insert WaveActiveAllEqual to determine if the index is uniform
+  // and finally write to a UAV resource with the result.
+
+  PIXPassHelpers::ForEachDynamicallyIndexedResource(
+      DM, [&](bool IsNonUniformIndex, Instruction *CreateHandle,
+              Value *IndexOperand) {
+        if (IsNonUniformIndex) {
+          // The NonUniformResourceIndex qualifier was used, continue.
+          return true;
+        }
+
+        if (!PixUAVResource) {
+          PixUAVResource =
+              PIXPassHelpers::CreateGlobalUAVResource(DM, 0, "PixUAVResource");
+        }
+
+        CallInst *PixUAVHandle = nullptr;
+        Function *F = CreateHandle->getParent()->getParent();
+
+        const auto FunctionToUAVHandleIter = FunctionToUAVHandle.lower_bound(F);
+
+        if ((FunctionToUAVHandleIter != FunctionToUAVHandle.end()) &&
+            (FunctionToUAVHandleIter->first == F)) {
+          PixUAVHandle = FunctionToUAVHandleIter->second;
+        } else {
+          IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt());
+
+          PixUAVHandle = PIXPassHelpers::CreateHandleForResource(
+              DM, Builder, PixUAVResource, "PixUAVHandle");
+
+          FunctionToUAVHandle.insert(FunctionToUAVHandleIter,
+                                     {F, PixUAVHandle});
+        }
+
+        IRBuilder<> Builder(CreateHandle);
+
+        uint32_t InstructionNumber = 0;
+        if (!pix_dxil::PixDxilInstNum::FromInst(CreateHandle,
+                                                &InstructionNumber)) {
+          DXASSERT_NOMSG(false);
+        }
+
+        // The output UAV is treated as a bit array where each bit corresponds
+        // to an instruction number. This determines what byte offset to write
+        // our result to based on the instruction number.
+        const uint32_t InstructionNumByteOffset =
+            (InstructionNumber / 32u) * sizeof(uint32_t);
+        const uint32_t InstructionNumBitPosition = (InstructionNumber % 32u);
+        const uint32_t InstructionNumBitMask = 1u << InstructionNumBitPosition;
+
+        Constant *UAVByteOffsetArg =
+            HlslOP->GetU32Const(InstructionNumByteOffset);
+
+        CallInst *WaveActiveAllEqualCall = Builder.CreateCall(
+            WaveActiveAllEqualFunc, {WaveActiveAllEqualOpCode, IndexOperand});
+
+        // This takes the result of the WaveActiveAllEqual result and shifts
+        // it into the same bit position as the instruction number, followed
+        // by an xor to determine what to write to the UAV
+        Value *IsWaveEqual =
+            Builder.CreateZExt(WaveActiveAllEqualCall, Builder.getInt32Ty());
+        Value *WaveEqualBitMask =
+            Builder.CreateShl(IsWaveEqual, InstructionNumBitPosition);
+        Value *FinalResult =
+            Builder.CreateXor(WaveEqualBitMask, InstructionNumBitMask);
+
+        // Generate instructions to bitwise OR a UAV value corresponding
+        // to the instruction number and result of WaveActiveAllEqual.
+        // If WaveActiveAllEqual was false, we write a 1, otherwise a 0.
+        Builder.CreateCall(
+            AtomicOpFunc,
+            {
+                AtomicBinOpcode,  // i32, ; opcode
+                PixUAVHandle,     // %dx.types.Handle, ; resource handle
+                AtomicOr,         // i32, ; binary operation code :
+                                  // EXCHANGE, IADD, AND, OR, XOR
+                                  // IMIN, IMAX, UMIN, UMAX
+                UAVByteOffsetArg, // i32, ; coordinate c0: byte offset
+                UndefArg,         // i32, ; coordinate c1 (unused)
+                UndefArg,         // i32, ; coordinate c2 (unused)
+                FinalResult       // i32);  value
+            },
+            "UAVInstructionNumberBitSet");
+        return true;
+      });
+
+  const bool modified = (PixUAVResource != nullptr);
+
+  if (modified) {
+    DM.ReEmitDxilResources();
+
+    if (OSOverride != nullptr) {
+      formatted_raw_ostream FOS(*OSOverride);
+      FOS << "\nFoundDynamicIndexingNoNuri\n";
+    }
+  }
+
+  return modified;
+}
+
+char DxilNonUniformResourceIndexInstrumentation::ID = 0;
+
+ModulePass *llvm::createDxilNonUniformResourceIndexInstrumentationPass() {
+  return new DxilNonUniformResourceIndexInstrumentation();
+}
+
+INITIALIZE_PASS(DxilNonUniformResourceIndexInstrumentation,
+                "hlsl-dxil-non-uniform-resource-index-instrumentation",
+                "HLSL DXIL NonUniformResourceIndex instrumentation for PIX",
+                false, false)
diff --git a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
index 4f4cc7c620..1dddb6c0e6 100644
--- a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
+++ b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
@@ -795,87 +795,6 @@ DxilShaderAccessTracking::GetResourceFromHandle(Value *resHandle,
   return ret;
 }
 
-static bool CheckForDynamicIndexing(OP *HlslOP, LLVMContext &Ctx,
-                                    DxilModule &DM) {
-  bool FoundDynamicIndexing = false;
-
-  for (llvm::Function &F : DM.GetModule()->functions()) {
-    if (F.isDeclaration() && !F.use_empty() && OP::IsDxilOpFunc(&F)) {
-      if (F.hasName()) {
-        if (F.getName().find("createHandleForLib") != StringRef::npos) {
-          auto FunctionUses = F.uses();
-          for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
-            auto &FunctionUse = *FI++;
-            auto FunctionUser = FunctionUse.getUser();
-            auto instruction = cast<Instruction>(FunctionUser);
-            Value *resourceLoad =
-                instruction->getOperand(kCreateHandleForLibResOpIdx);
-            if (auto *load = cast<LoadInst>(resourceLoad)) {
-              auto *resOrGep = load->getOperand(0);
-              if (isa<GetElementPtrInst>(resOrGep)) {
-                FoundDynamicIndexing = true;
-                break;
-              }
-            }
-          }
-        }
-      }
-    }
-    if (FoundDynamicIndexing) {
-      break;
-    }
-  }
-
-  if (!FoundDynamicIndexing) {
-    auto CreateHandleFn =
-        HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
-    for (auto FI = CreateHandleFn->user_begin();
-         FI != CreateHandleFn->user_end();) {
-      auto *FunctionUser = *FI++;
-      auto instruction = cast<Instruction>(FunctionUser);
-      Value *index = instruction->getOperand(kCreateHandleResIndexOpIdx);
-      if (!isa<Constant>(index)) {
-        FoundDynamicIndexing = true;
-        break;
-      }
-    }
-  }
-
-  if (!FoundDynamicIndexing) {
-    auto CreateHandleFromBindingFn = HlslOP->GetOpFunc(
-        DXIL::OpCode::CreateHandleFromBinding, Type::getVoidTy(Ctx));
-    for (auto FI = CreateHandleFromBindingFn->user_begin();
-         FI != CreateHandleFromBindingFn->user_end();) {
-      auto *FunctionUser = *FI++;
-      auto instruction = cast<Instruction>(FunctionUser);
-      Value *index =
-          instruction->getOperand(kCreateHandleFromBindingResIndexOpIdx);
-      if (!isa<Constant>(index)) {
-        FoundDynamicIndexing = true;
-        break;
-      }
-    }
-  }
-
-  if (!FoundDynamicIndexing) {
-    auto CreateHandleFromHeapFn = HlslOP->GetOpFunc(
-        DXIL::OpCode::CreateHandleFromHeap, Type::getVoidTy(Ctx));
-    for (auto FI = CreateHandleFromHeapFn->user_begin();
-         FI != CreateHandleFromHeapFn->user_end();) {
-      auto *FunctionUser = *FI++;
-      auto instruction = cast<Instruction>(FunctionUser);
-      Value *index =
-          instruction->getOperand(kCreateHandleFromHeapHeapIndexOpIdx);
-      if (!isa<Constant>(index)) {
-        FoundDynamicIndexing = true;
-        break;
-      }
-    }
-  }
-
-  return FoundDynamicIndexing;
-}
-
 bool DxilShaderAccessTracking::runOnModule(Module &M) {
   // This pass adds instrumentation for shader access to resources
 
@@ -887,7 +806,13 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) {
 
   if (m_CheckForDynamicIndexing) {
 
-    bool FoundDynamicIndexing = CheckForDynamicIndexing(HlslOP, Ctx, DM);
+    bool FoundDynamicIndexing = false;
+
+    PIXPassHelpers::ForEachDynamicallyIndexedResource(
+        DM, [&FoundDynamicIndexing](bool, Instruction *, Value *) {
+          FoundDynamicIndexing = true;
+          return false;
+        });
 
     if (FoundDynamicIndexing) {
       if (OSOverride != nullptr) {
@@ -980,13 +905,14 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) {
           case DXIL::OpCode::BufferUpdateCounter:
             readWrite = ShaderAccessFlags::Counter;
             break;
+          case DXIL::OpCode::HitObject_TraceRay:
           case DXIL::OpCode::TraceRay: {
             // Read of AccelerationStructure; doesn't match function attribute
-            auto res = GetResourceFromHandle(Call->getArgOperand(1), DM);
-            if (res.accessStyle == AccessStyle::None) {
+            auto Res = GetResourceFromHandle(Call->getArgOperand(1), DM);
+            if (Res.accessStyle == AccessStyle::None) {
               continue;
             }
-            if (EmitResourceAccess(DM, res, Call, HlslOP, Ctx,
+            if (EmitResourceAccess(DM, Res, Call, HlslOP, Ctx,
                                    ShaderAccessFlags::Read)) {
               Modified = true;
             }
diff --git a/lib/DxilPIXPasses/PixPassHelpers.cpp b/lib/DxilPIXPasses/PixPassHelpers.cpp
index dfb4b3aa83..c7c99cf763 100644
--- a/lib/DxilPIXPasses/PixPassHelpers.cpp
+++ b/lib/DxilPIXPasses/PixPassHelpers.cpp
@@ -199,6 +199,18 @@ constexpr uint32_t toolsUAVRegister = 0;
 template <typename RootSigDesc, typename RootParameterDesc>
 void ExtendRootSig(RootSigDesc &rootSigDesc) {
   auto *existingParams = rootSigDesc.pParameters;
+  for (uint32_t i = 0; i < rootSigDesc.NumParameters; ++i) {
+    if (rootSigDesc.pParameters[i].ParameterType ==
+        DxilRootParameterType::UAV) {
+      if (rootSigDesc.pParameters[i].Descriptor.RegisterSpace ==
+              toolsRegisterSpace &&
+          rootSigDesc.pParameters[i].Descriptor.ShaderRegister ==
+              toolsUAVRegister) {
+        // Already added
+        return;
+      }
+    }
+  }
   auto *newParams = new RootParameterDesc[rootSigDesc.NumParameters + 1];
   if (existingParams != nullptr) {
     memcpy(newParams, existingParams,
@@ -312,6 +324,7 @@ hlsl::DxilResource *CreateGlobalUAVResource(hlsl::DxilModule &DM,
       (unsigned int)-2);   // This is the reserved-for-tools register space
   pUAV->SetSampleCount(0); // This is what compiler generates for a raw UAV
   pUAV->SetGloballyCoherent(false);
+  pUAV->SetReorderCoherent(false);
   pUAV->SetHasCounter(false);
   pUAV->SetCompType(
       CompType::getInvalid()); // This is what compiler generates for a raw UAV
@@ -500,6 +513,90 @@ unsigned int FindOrAddSV_Position(hlsl::DxilModule &DM,
   }
 }
 
+void ForEachDynamicallyIndexedResource(
+    hlsl::DxilModule &DM,
+    const std::function<bool(bool, Instruction *, Value *)> &Visitor) {
+  OP *HlslOP = DM.GetOP();
+  LLVMContext &Ctx = DM.GetModule()->getContext();
+
+  for (llvm::Function &F : DM.GetModule()->functions()) {
+    if (F.isDeclaration() && !F.use_empty() && OP::IsDxilOpFunc(&F)) {
+      if (F.hasName()) {
+        if (F.getName().find("createHandleForLib") != StringRef::npos) {
+          auto FunctionUses = F.uses();
+          for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
+            auto &FunctionUse = *FI++;
+            auto FunctionUser = FunctionUse.getUser();
+            auto instruction = cast<Instruction>(FunctionUser);
+            Value *resourceLoad = instruction->getOperand(
+                DXIL::OperandIndex::kCreateHandleForLibResOpIdx);
+            if (auto *load = cast<LoadInst>(resourceLoad)) {
+              auto *resOrGep = load->getOperand(0);
+              if (auto *gep = dyn_cast<GetElementPtrInst>(resOrGep)) {
+                if (!Visitor(DxilMDHelper::IsMarkedNonUniform(gep), load,
+                             gep->getOperand(2))) {
+                  return;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  auto CreateHandleFn =
+      HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
+  for (auto FI = CreateHandleFn->user_begin();
+       FI != CreateHandleFn->user_end();) {
+    auto *FunctionUser = *FI++;
+    auto instruction = cast<Instruction>(FunctionUser);
+    Value *index =
+        instruction->getOperand(DXIL::OperandIndex::kCreateHandleResIndexOpIdx);
+    if (!isa<Constant>(index)) {
+      const DxilInst_CreateHandle createHandle(instruction);
+      if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction,
+                   index)) {
+        return;
+      }
+    }
+  }
+
+  auto CreateHandleFromBindingFn = HlslOP->GetOpFunc(
+      DXIL::OpCode::CreateHandleFromBinding, Type::getVoidTy(Ctx));
+  for (auto FI = CreateHandleFromBindingFn->user_begin();
+       FI != CreateHandleFromBindingFn->user_end();) {
+    auto *FunctionUser = *FI++;
+    auto instruction = cast<Instruction>(FunctionUser);
+    Value *index = instruction->getOperand(
+        DXIL::OperandIndex::kCreateHandleFromBindingResIndexOpIdx);
+    if (!isa<Constant>(index)) {
+      const DxilInst_CreateHandleFromBinding createHandle(instruction);
+      if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction,
+                   index)) {
+        return;
+      }
+    }
+  }
+
+  auto CreateHandleFromHeapFn = HlslOP->GetOpFunc(
+      DXIL::OpCode::CreateHandleFromHeap, Type::getVoidTy(Ctx));
+  for (auto FI = CreateHandleFromHeapFn->user_begin();
+       FI != CreateHandleFromHeapFn->user_end();) {
+    auto *FunctionUser = *FI++;
+    auto instruction = cast<Instruction>(FunctionUser);
+    Value *index = instruction->getOperand(
+        DXIL::OperandIndex::kCreateHandleFromHeapHeapIndexOpIdx);
+    if (!isa<Constant>(index)) {
+      const DxilInst_CreateHandleFromHeap createHandle(instruction);
+      if (!Visitor(createHandle.get_nonUniformIndex_val(), instruction,
+                   index)) {
+        return;
+      }
+    }
+  }
+}
+
 #ifdef PIX_DEBUG_DUMP_HELPER
 
 static int g_logIndent = 0;
diff --git a/lib/DxilPIXPasses/PixPassHelpers.h b/lib/DxilPIXPasses/PixPassHelpers.h
index 4cd0e1a549..d7b0b40af8 100644
--- a/lib/DxilPIXPasses/PixPassHelpers.h
+++ b/lib/DxilPIXPasses/PixPassHelpers.h
@@ -9,6 +9,7 @@
 
 #pragma once
 
+#include <functional>
 #include <vector>
 
 #include "dxc/DXIL/DxilModule.h"
@@ -16,7 +17,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 
-//#define PIX_DEBUG_DUMP_HELPER
+// #define PIX_DEBUG_DUMP_HELPER
 #ifdef PIX_DEBUG_DUMP_HELPER
 #include "dxc/Support/Global.h"
 #endif
@@ -82,4 +83,8 @@ void ReplaceAllUsesOfInstructionWithNewValueAndDeleteInstruction(
     llvm::Instruction *Instr, llvm::Value *newValue, llvm::Type *newType);
 unsigned int FindOrAddSV_Position(hlsl::DxilModule &DM,
                                   unsigned UpStreamSVPosRow);
+void ForEachDynamicallyIndexedResource(
+    hlsl::DxilModule &DM,
+    const std::function<bool(bool, llvm::Instruction *, llvm::Value *)>
+        &Visitor);
 } // namespace PIXPassHelpers
diff --git a/lib/DxilValidation/DxilContainerValidation.cpp b/lib/DxilValidation/DxilContainerValidation.cpp
index 890e90e354..89e23767fe 100644
--- a/lib/DxilValidation/DxilContainerValidation.cpp
+++ b/lib/DxilValidation/DxilContainerValidation.cpp
@@ -337,7 +337,7 @@ void PSVContentVerifier::VerifySignatureElement(
 
   PSVSignatureElement PSVSE(StrTab, IndexTab, PSVSE0);
   if (SE.IsArbitrary())
-    Mismatch |= strcmp(PSVSE.GetSemanticName(), SE.GetName());
+    Mismatch |= strcmp(PSVSE.GetSemanticName(), SE.GetName()) != 0;
   else
     Mismatch |= PSVSE0->SemanticKind != static_cast<uint8_t>(SE.GetKind());
 
@@ -494,7 +494,8 @@ void PSVContentVerifier::Verify(unsigned ValMajor, unsigned ValMinor,
                         std::to_string(ShaderStage));
       return;
     }
-    if (PSV1->UsesViewID != DM.m_ShaderFlags.GetViewID())
+    bool ViewIDUsed = PSV1->UsesViewID != 0;
+    if (ViewIDUsed != DM.m_ShaderFlags.GetViewID())
       EmitMismatchError("UsesViewID", std::to_string(PSV1->UsesViewID),
                         std::to_string(DM.m_ShaderFlags.GetViewID()));
 
diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
index 0a2001a745..00a6b9ae14 100644
--- a/lib/DxilValidation/DxilValidation.cpp
+++ b/lib/DxilValidation/DxilValidation.cpp
@@ -65,8 +65,8 @@ using std::vector;
 namespace hlsl {
 
 // PrintDiagnosticContext methods.
-PrintDiagnosticContext::PrintDiagnosticContext(DiagnosticPrinter &printer)
-    : m_Printer(printer), m_errorsFound(false), m_warningsFound(false) {}
+PrintDiagnosticContext::PrintDiagnosticContext(DiagnosticPrinter &Printer)
+    : m_Printer(Printer), m_errorsFound(false), m_warningsFound(false) {}
 
 bool PrintDiagnosticContext::HasErrors() const { return m_errorsFound; }
 bool PrintDiagnosticContext::HasWarnings() const { return m_warningsFound; }
@@ -97,68 +97,68 @@ struct PSExecutionInfo {
 };
 
 static unsigned ValidateSignatureRowCol(Instruction *I,
-                                        DxilSignatureElement &SE, Value *rowVal,
-                                        Value *colVal, EntryStatus &Status,
+                                        DxilSignatureElement &SE, Value *RowVal,
+                                        Value *ColVal, EntryStatus &Status,
                                         ValidationContext &ValCtx) {
-  if (ConstantInt *constRow = dyn_cast<ConstantInt>(rowVal)) {
-    unsigned row = constRow->getLimitedValue();
-    if (row >= SE.GetRows()) {
-      std::string range = std::string("0~") + std::to_string(SE.GetRows());
+  if (ConstantInt *ConstRow = dyn_cast<ConstantInt>(RowVal)) {
+    unsigned Row = ConstRow->getLimitedValue();
+    if (Row >= SE.GetRows()) {
+      std::string Range = std::string("0~") + std::to_string(SE.GetRows());
       ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOperandRange,
-                                  {"Row", range, std::to_string(row)});
+                                  {"Row", Range, std::to_string(Row)});
     }
   }
 
-  if (!isa<ConstantInt>(colVal)) {
-    // col must be const
+  if (!isa<ConstantInt>(ColVal)) {
+    // Col must be const
     ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst,
                                 {"Col", "LoadInput/StoreOutput"});
     return 0;
   }
 
-  unsigned col = cast<ConstantInt>(colVal)->getLimitedValue();
+  unsigned Col = cast<ConstantInt>(ColVal)->getLimitedValue();
 
-  if (col > SE.GetCols()) {
-    std::string range = std::string("0~") + std::to_string(SE.GetCols());
+  if (Col > SE.GetCols()) {
+    std::string Range = std::string("0~") + std::to_string(SE.GetCols());
     ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOperandRange,
-                                {"Col", range, std::to_string(col)});
+                                {"Col", Range, std::to_string(Col)});
   } else {
     if (SE.IsOutput())
-      Status.outputCols[SE.GetID()] |= 1 << col;
+      Status.outputCols[SE.GetID()] |= 1 << Col;
     if (SE.IsPatchConstOrPrim())
-      Status.patchConstOrPrimCols[SE.GetID()] |= 1 << col;
+      Status.patchConstOrPrimCols[SE.GetID()] |= 1 << Col;
   }
 
-  return col;
+  return Col;
 }
 
 static DxilSignatureElement *
-ValidateSignatureAccess(Instruction *I, DxilSignature &sig, Value *sigID,
-                        Value *rowVal, Value *colVal, EntryStatus &Status,
+ValidateSignatureAccess(Instruction *I, DxilSignature &Sig, Value *SigId,
+                        Value *RowVal, Value *ColVal, EntryStatus &Status,
                         ValidationContext &ValCtx) {
-  if (!isa<ConstantInt>(sigID)) {
+  if (!isa<ConstantInt>(SigId)) {
     // inputID must be const
     ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst,
                                 {"SignatureID", "LoadInput/StoreOutput"});
     return nullptr;
   }
 
-  unsigned SEIdx = cast<ConstantInt>(sigID)->getLimitedValue();
-  if (sig.GetElements().size() <= SEIdx) {
+  unsigned SEIdx = cast<ConstantInt>(SigId)->getLimitedValue();
+  if (Sig.GetElements().size() <= SEIdx) {
     ValCtx.EmitInstrError(I, ValidationRule::InstrOpConstRange);
     return nullptr;
   }
 
-  DxilSignatureElement &SE = sig.GetElement(SEIdx);
-  bool isOutput = sig.IsOutput();
+  DxilSignatureElement &SE = Sig.GetElement(SEIdx);
+  bool IsOutput = Sig.IsOutput();
 
-  unsigned col = ValidateSignatureRowCol(I, SE, rowVal, colVal, Status, ValCtx);
+  unsigned Col = ValidateSignatureRowCol(I, SE, RowVal, ColVal, Status, ValCtx);
 
-  if (isOutput && SE.GetSemantic()->GetKind() == DXIL::SemanticKind::Position) {
-    unsigned mask = Status.OutputPositionMask[SE.GetOutputStream()];
-    mask |= 1 << col;
+  if (IsOutput && SE.GetSemantic()->GetKind() == DXIL::SemanticKind::Position) {
+    unsigned Mask = Status.OutputPositionMask[SE.GetOutputStream()];
+    Mask |= 1 << Col;
     if (SE.GetOutputStream() < DXIL::kNumOutputStreams)
-      Status.OutputPositionMask[SE.GetOutputStream()] = mask;
+      Status.OutputPositionMask[SE.GetOutputStream()] = Mask;
   }
   return &SE;
 }
@@ -183,9 +183,9 @@ static DxilResourceProperties GetResourceFromHandle(Value *Handle,
   return RP;
 }
 
-static DXIL::SamplerKind GetSamplerKind(Value *samplerHandle,
+static DXIL::SamplerKind GetSamplerKind(Value *SamplerHandle,
                                         ValidationContext &ValCtx) {
-  DxilResourceProperties RP = GetResourceFromHandle(samplerHandle, ValCtx);
+  DxilResourceProperties RP = GetResourceFromHandle(SamplerHandle, ValCtx);
 
   if (RP.getResourceClass() != DXIL::ResourceClass::Sampler) {
     // must be sampler.
@@ -200,14 +200,14 @@ static DXIL::SamplerKind GetSamplerKind(Value *samplerHandle,
 }
 
 static DXIL::ResourceKind
-GetResourceKindAndCompTy(Value *handle, DXIL::ComponentType &CompTy,
+GetResourceKindAndCompTy(Value *Handle, DXIL::ComponentType &CompTy,
                          DXIL::ResourceClass &ResClass,
                          ValidationContext &ValCtx) {
   CompTy = DXIL::ComponentType::Invalid;
   ResClass = DXIL::ResourceClass::Invalid;
   // TODO: validate ROV is used only in PS.
 
-  DxilResourceProperties RP = GetResourceFromHandle(handle, ValCtx);
+  DxilResourceProperties RP = GetResourceFromHandle(Handle, ValCtx);
   ResClass = RP.getResourceClass();
 
   switch (ResClass) {
@@ -230,19 +230,19 @@ GetResourceKindAndCompTy(Value *handle, DXIL::ComponentType &CompTy,
   return RP.getResourceKind();
 }
 
-DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &typeSys,
-                                        std::deque<unsigned> &offsets) {
+DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &TypeSys,
+                                        std::deque<unsigned> &Offsets) {
   unsigned CurIdx = 1;
-  unsigned LastIdx = offsets.size() - 1;
+  unsigned LastIdx = Offsets.size() - 1;
   DxilStructAnnotation *StructAnnot = nullptr;
 
-  for (; CurIdx < offsets.size(); ++CurIdx) {
+  for (; CurIdx < Offsets.size(); ++CurIdx) {
     if (const StructType *EltST = dyn_cast<StructType>(Ty)) {
-      if (DxilStructAnnotation *EltAnnot = typeSys.GetStructAnnotation(EltST)) {
+      if (DxilStructAnnotation *EltAnnot = TypeSys.GetStructAnnotation(EltST)) {
         StructAnnot = EltAnnot;
-        Ty = EltST->getElementType(offsets[CurIdx]);
+        Ty = EltST->getElementType(Offsets[CurIdx]);
         if (CurIdx == LastIdx) {
-          return &StructAnnot->GetFieldAnnotation(offsets[CurIdx]);
+          return &StructAnnot->GetFieldAnnotation(Offsets[CurIdx]);
         }
       } else {
         return nullptr;
@@ -252,16 +252,16 @@ DxilFieldAnnotation *GetFieldAnnotation(Type *Ty, DxilTypeSystem &typeSys,
       StructAnnot = nullptr;
     } else {
       if (StructAnnot)
-        return &StructAnnot->GetFieldAnnotation(offsets[CurIdx]);
+        return &StructAnnot->GetFieldAnnotation(Offsets[CurIdx]);
     }
   }
   return nullptr;
 }
 
-DxilResourceProperties ValidationContext::GetResourceFromVal(Value *resVal) {
-  auto it = ResPropMap.find(resVal);
-  if (it != ResPropMap.end()) {
-    return it->second;
+DxilResourceProperties ValidationContext::GetResourceFromVal(Value *ResVal) {
+  auto It = ResPropMap.find(ResVal);
+  if (It != ResPropMap.end()) {
+    return It->second;
   } else {
     DxilResourceProperties RP;
     return RP;
@@ -269,34 +269,34 @@ DxilResourceProperties ValidationContext::GetResourceFromVal(Value *resVal) {
 }
 
 struct ResRetUsage {
-  bool x;
-  bool y;
-  bool z;
-  bool w;
-  bool status;
-  ResRetUsage() : x(false), y(false), z(false), w(false), status(false) {}
+  bool X;
+  bool Y;
+  bool Z;
+  bool W;
+  bool Status;
+  ResRetUsage() : X(false), Y(false), Z(false), W(false), Status(false) {}
 };
 
-static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet,
+static void CollectGetDimResRetUsage(ResRetUsage &Usage, Instruction *ResRet,
                                      ValidationContext &ValCtx) {
   for (User *U : ResRet->users()) {
     if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U)) {
-      for (unsigned idx : EVI->getIndices()) {
-        switch (idx) {
+      for (unsigned Idx : EVI->getIndices()) {
+        switch (Idx) {
         case 0:
-          usage.x = true;
+          Usage.X = true;
           break;
         case 1:
-          usage.y = true;
+          Usage.Y = true;
           break;
         case 2:
-          usage.z = true;
+          Usage.Z = true;
           break;
         case 3:
-          usage.w = true;
+          Usage.W = true;
           break;
         case DXIL::kResRetStatusIndex:
-          usage.status = true;
+          Usage.Status = true;
           break;
         default:
           // Emit index out of bound.
@@ -306,7 +306,7 @@ static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet,
         }
       }
     } else if (PHINode *PHI = dyn_cast<PHINode>(U)) {
-      CollectGetDimResRetUsage(usage, PHI, ValCtx);
+      CollectGetDimResRetUsage(Usage, PHI, ValCtx);
     } else {
       Instruction *User = cast<Instruction>(U);
       ValCtx.EmitInstrError(User, ValidationRule::InstrDxilStructUser);
@@ -314,18 +314,18 @@ static void CollectGetDimResRetUsage(ResRetUsage &usage, Instruction *ResRet,
   }
 }
 
-static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind resKind,
-                                  ArrayRef<Value *> coords,
+static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind ResKind,
+                                  ArrayRef<Value *> Coords,
                                   ValidationContext &ValCtx) {
-  const unsigned kMaxNumCoords = 4;
-  unsigned numCoords = DxilResource::GetNumCoords(resKind);
-  for (unsigned i = 0; i < kMaxNumCoords; i++) {
-    if (i < numCoords) {
-      if (isa<UndefValue>(coords[i])) {
+  const unsigned KMaxNumCoords = 4;
+  unsigned NumCoords = DxilResource::GetNumCoords(ResKind);
+  for (unsigned I = 0; I < KMaxNumCoords; I++) {
+    if (I < NumCoords) {
+      if (isa<UndefValue>(Coords[I])) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateMiss);
       }
     } else {
-      if (!isa<UndefValue>(coords[i])) {
+      if (!isa<UndefValue>(Coords[I])) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrResourceCoordinateTooMany);
       }
@@ -334,18 +334,18 @@ static void ValidateResourceCoord(CallInst *CI, DXIL::ResourceKind resKind,
 }
 
 static void ValidateCalcLODResourceDimensionCoord(CallInst *CI,
-                                                  DXIL::ResourceKind resKind,
-                                                  ArrayRef<Value *> coords,
+                                                  DXIL::ResourceKind ResKind,
+                                                  ArrayRef<Value *> Coords,
                                                   ValidationContext &ValCtx) {
   const unsigned kMaxNumDimCoords = 3;
-  unsigned numCoords = DxilResource::GetNumDimensionsForCalcLOD(resKind);
-  for (unsigned i = 0; i < kMaxNumDimCoords; i++) {
-    if (i < numCoords) {
-      if (isa<UndefValue>(coords[i])) {
+  unsigned NumCoords = DxilResource::GetNumDimensionsForCalcLOD(ResKind);
+  for (unsigned I = 0; I < kMaxNumDimCoords; I++) {
+    if (I < NumCoords) {
+      if (isa<UndefValue>(Coords[I])) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceCoordinateMiss);
       }
     } else {
-      if (!isa<UndefValue>(coords[i])) {
+      if (!isa<UndefValue>(Coords[I])) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrResourceCoordinateTooMany);
       }
@@ -353,21 +353,21 @@ static void ValidateCalcLODResourceDimensionCoord(CallInst *CI,
   }
 }
 
-static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind,
-                                   ArrayRef<Value *> offsets,
+static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind ResKind,
+                                   ArrayRef<Value *> Offsets,
                                    ValidationContext &ValCtx) {
   const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
 
-  unsigned numOffsets = DxilResource::GetNumOffsets(resKind);
-  bool hasOffset = !isa<UndefValue>(offsets[0]);
+  unsigned NumOffsets = DxilResource::GetNumOffsets(ResKind);
+  bool HasOffset = !isa<UndefValue>(Offsets[0]);
 
-  auto validateOffset = [&](Value *offset) {
+  auto ValidateOffset = [&](Value *Offset) {
     // 6.7 Advanced Textures allow programmable offsets
     if (pSM->IsSM67Plus())
       return;
-    if (ConstantInt *cOffset = dyn_cast<ConstantInt>(offset)) {
-      int offset = cOffset->getValue().getSExtValue();
-      if (offset > 7 || offset < -8) {
+    if (ConstantInt *cOffset = dyn_cast<ConstantInt>(Offset)) {
+      int Offset = cOffset->getValue().getSExtValue();
+      if (Offset > 7 || Offset < -8) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrTextureOffset);
       }
     } else {
@@ -375,20 +375,20 @@ static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind,
     }
   };
 
-  if (hasOffset) {
-    validateOffset(offsets[0]);
+  if (HasOffset) {
+    ValidateOffset(Offsets[0]);
   }
 
-  for (unsigned i = 1; i < offsets.size(); i++) {
-    if (i < numOffsets) {
-      if (hasOffset) {
-        if (isa<UndefValue>(offsets[i]))
+  for (unsigned I = 1; I < Offsets.size(); I++) {
+    if (I < NumOffsets) {
+      if (HasOffset) {
+        if (isa<UndefValue>(Offsets[I]))
           ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetMiss);
         else
-          validateOffset(offsets[i]);
+          ValidateOffset(Offsets[I]);
       }
     } else {
-      if (!isa<UndefValue>(offsets[i])) {
+      if (!isa<UndefValue>(Offsets[I])) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany);
       }
     }
@@ -405,53 +405,53 @@ static void ValidateDerivativeOp(CallInst *CI, ValidationContext &ValCtx) {
         {"Derivatives in CS/MS/AS", "Shader Model 6.6+"});
 }
 
-static void ValidateSampleInst(CallInst *CI, Value *srvHandle,
-                               Value *samplerHandle, ArrayRef<Value *> coords,
-                               ArrayRef<Value *> offsets, bool IsSampleC,
+static void ValidateSampleInst(CallInst *CI, Value *SrvHandle,
+                               Value *SamplerHandle, ArrayRef<Value *> Coords,
+                               ArrayRef<Value *> Offsets, bool IsSampleC,
                                ValidationContext &ValCtx) {
   if (!IsSampleC) {
-    if (GetSamplerKind(samplerHandle, ValCtx) != DXIL::SamplerKind::Default) {
+    if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Default) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSample);
     }
   } else {
-    if (GetSamplerKind(samplerHandle, ValCtx) !=
+    if (GetSamplerKind(SamplerHandle, ValCtx) !=
         DXIL::SamplerKind::Comparison) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSampleC);
     }
   }
 
-  DXIL::ComponentType compTy;
-  DXIL::ResourceClass resClass;
-  DXIL::ResourceKind resKind =
-      GetResourceKindAndCompTy(srvHandle, compTy, resClass, ValCtx);
-  bool isSampleCompTy = compTy == DXIL::ComponentType::F32;
-  isSampleCompTy |= compTy == DXIL::ComponentType::SNormF32;
-  isSampleCompTy |= compTy == DXIL::ComponentType::UNormF32;
-  isSampleCompTy |= compTy == DXIL::ComponentType::F16;
-  isSampleCompTy |= compTy == DXIL::ComponentType::SNormF16;
-  isSampleCompTy |= compTy == DXIL::ComponentType::UNormF16;
+  DXIL::ComponentType CompTy;
+  DXIL::ResourceClass ResClass;
+  DXIL::ResourceKind ResKind =
+      GetResourceKindAndCompTy(SrvHandle, CompTy, ResClass, ValCtx);
+  bool IsSampleCompTy = CompTy == DXIL::ComponentType::F32;
+  IsSampleCompTy |= CompTy == DXIL::ComponentType::SNormF32;
+  IsSampleCompTy |= CompTy == DXIL::ComponentType::UNormF32;
+  IsSampleCompTy |= CompTy == DXIL::ComponentType::F16;
+  IsSampleCompTy |= CompTy == DXIL::ComponentType::SNormF16;
+  IsSampleCompTy |= CompTy == DXIL::ComponentType::UNormF16;
   const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
   if (pSM->IsSM67Plus() && !IsSampleC) {
-    isSampleCompTy |= compTy == DXIL::ComponentType::I16;
-    isSampleCompTy |= compTy == DXIL::ComponentType::U16;
-    isSampleCompTy |= compTy == DXIL::ComponentType::I32;
-    isSampleCompTy |= compTy == DXIL::ComponentType::U32;
+    IsSampleCompTy |= CompTy == DXIL::ComponentType::I16;
+    IsSampleCompTy |= CompTy == DXIL::ComponentType::U16;
+    IsSampleCompTy |= CompTy == DXIL::ComponentType::I32;
+    IsSampleCompTy |= CompTy == DXIL::ComponentType::U32;
   }
-  if (!isSampleCompTy) {
+  if (!IsSampleCompTy) {
     ValCtx.EmitInstrError(CI, ValidationRule::InstrSampleCompType);
   }
 
-  if (resClass != DXIL::ResourceClass::SRV) {
+  if (ResClass != DXIL::ResourceClass::SRV) {
     ValCtx.EmitInstrError(CI,
                           ValidationRule::InstrResourceClassForSamplerGather);
   }
 
-  ValidationRule rule = ValidationRule::InstrResourceKindForSample;
+  ValidationRule Rule = ValidationRule::InstrResourceKindForSample;
   if (IsSampleC) {
-    rule = ValidationRule::InstrResourceKindForSampleC;
+    Rule = ValidationRule::InstrResourceKindForSampleC;
   }
 
-  switch (resKind) {
+  switch (ResKind) {
   case DXIL::ResourceKind::Texture1D:
   case DXIL::ResourceKind::Texture1DArray:
   case DXIL::ResourceKind::Texture2D:
@@ -461,64 +461,64 @@ static void ValidateSampleInst(CallInst *CI, Value *srvHandle,
     break;
   case DXIL::ResourceKind::Texture3D:
     if (IsSampleC) {
-      ValCtx.EmitInstrError(CI, rule);
+      ValCtx.EmitInstrError(CI, Rule);
     }
     break;
   default:
-    ValCtx.EmitInstrError(CI, rule);
+    ValCtx.EmitInstrError(CI, Rule);
     return;
   }
 
   // Coord match resource kind.
-  ValidateResourceCoord(CI, resKind, coords, ValCtx);
+  ValidateResourceCoord(CI, ResKind, Coords, ValCtx);
   // Offset match resource kind.
-  ValidateResourceOffset(CI, resKind, offsets, ValCtx);
+  ValidateResourceOffset(CI, ResKind, Offsets, ValCtx);
 }
 
-static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle,
-                           ArrayRef<Value *> coords, ArrayRef<Value *> offsets,
+static void ValidateGather(CallInst *CI, Value *SrvHandle, Value *SamplerHandle,
+                           ArrayRef<Value *> Coords, ArrayRef<Value *> Offsets,
                            bool IsSampleC, ValidationContext &ValCtx) {
   if (!IsSampleC) {
-    if (GetSamplerKind(samplerHandle, ValCtx) != DXIL::SamplerKind::Default) {
+    if (GetSamplerKind(SamplerHandle, ValCtx) != DXIL::SamplerKind::Default) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSample);
     }
   } else {
-    if (GetSamplerKind(samplerHandle, ValCtx) !=
+    if (GetSamplerKind(SamplerHandle, ValCtx) !=
         DXIL::SamplerKind::Comparison) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForSampleC);
     }
   }
 
-  DXIL::ComponentType compTy;
-  DXIL::ResourceClass resClass;
-  DXIL::ResourceKind resKind =
-      GetResourceKindAndCompTy(srvHandle, compTy, resClass, ValCtx);
+  DXIL::ComponentType CompTy;
+  DXIL::ResourceClass ResClass;
+  DXIL::ResourceKind ResKind =
+      GetResourceKindAndCompTy(SrvHandle, CompTy, ResClass, ValCtx);
 
-  if (resClass != DXIL::ResourceClass::SRV) {
+  if (ResClass != DXIL::ResourceClass::SRV) {
     ValCtx.EmitInstrError(CI,
                           ValidationRule::InstrResourceClassForSamplerGather);
     return;
   }
 
   // Coord match resource kind.
-  ValidateResourceCoord(CI, resKind, coords, ValCtx);
+  ValidateResourceCoord(CI, ResKind, Coords, ValCtx);
   // Offset match resource kind.
-  switch (resKind) {
+  switch (ResKind) {
   case DXIL::ResourceKind::Texture2D:
   case DXIL::ResourceKind::Texture2DArray: {
-    bool hasOffset = !isa<UndefValue>(offsets[0]);
-    if (hasOffset) {
-      if (isa<UndefValue>(offsets[1])) {
+    bool HasOffset = !isa<UndefValue>(Offsets[0]);
+    if (HasOffset) {
+      if (isa<UndefValue>(Offsets[1])) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetMiss);
       }
     }
   } break;
   case DXIL::ResourceKind::TextureCube:
   case DXIL::ResourceKind::TextureCubeArray: {
-    if (!isa<UndefValue>(offsets[0])) {
+    if (!isa<UndefValue>(Offsets[0])) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany);
     }
-    if (!isa<UndefValue>(offsets[1])) {
+    if (!isa<UndefValue>(Offsets[1])) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceOffsetTooMany);
     }
   } break;
@@ -529,21 +529,21 @@ static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle,
   }
 }
 
-static unsigned StoreValueToMask(ArrayRef<Value *> vals) {
-  unsigned mask = 0;
-  for (unsigned i = 0; i < 4; i++) {
-    if (!isa<UndefValue>(vals[i])) {
-      mask |= 1 << i;
+static unsigned StoreValueToMask(ArrayRef<Value *> Vals) {
+  unsigned Mask = 0;
+  for (unsigned I = 0; I < 4; I++) {
+    if (!isa<UndefValue>(Vals[I])) {
+      Mask |= 1 << I;
     }
   }
-  return mask;
+  return Mask;
 }
 
-static int GetCBufSize(Value *cbHandle, ValidationContext &ValCtx) {
-  DxilResourceProperties RP = GetResourceFromHandle(cbHandle, ValCtx);
+static int GetCBufSize(Value *CbHandle, ValidationContext &ValCtx) {
+  DxilResourceProperties RP = GetResourceFromHandle(CbHandle, ValCtx);
 
   if (RP.getResourceClass() != DXIL::ResourceClass::CBuffer) {
-    ValCtx.EmitInstrError(cast<CallInst>(cbHandle),
+    ValCtx.EmitInstrError(cast<CallInst>(CbHandle),
                           ValidationRule::InstrCBufferClassForCBufferHandle);
     return -1;
   }
@@ -554,7 +554,7 @@ static int GetCBufSize(Value *cbHandle, ValidationContext &ValCtx) {
 // Make sure none of the handle arguments are undef / zero-initializer,
 // Also, do not accept any resource handles with invalid dxil resource
 // properties
-void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode,
+void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode Opcode,
                                       ValidationContext &ValCtx) {
 
   for (Value *op : CI->operands()) {
@@ -563,13 +563,13 @@ void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode,
     const Type *pNodeRecordHandleTy =
         ValCtx.DxilMod.GetOP()->GetNodeRecordHandleType();
 
-    const Type *argTy = op->getType();
-    if (argTy == pNodeHandleTy || argTy == pNodeRecordHandleTy ||
-        argTy == pHandleTy) {
+    const Type *ArgTy = op->getType();
+    if (ArgTy == pNodeHandleTy || ArgTy == pNodeRecordHandleTy ||
+        ArgTy == pHandleTy) {
 
       if (isa<UndefValue>(op) || isa<ConstantAggregateZero>(op)) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized);
-      } else if (argTy == pHandleTy) {
+      } else if (ArgTy == pHandleTy) {
         // GetResourceFromHandle will emit an error on an invalid handle
         GetResourceFromHandle(op, ValCtx);
       }
@@ -577,10 +577,10 @@ void ValidateHandleArgsForInstruction(CallInst *CI, DXIL::OpCode opcode,
   }
 }
 
-void ValidateHandleArgs(CallInst *CI, DXIL::OpCode opcode,
+void ValidateHandleArgs(CallInst *CI, DXIL::OpCode Opcode,
                         ValidationContext &ValCtx) {
 
-  switch (opcode) {
+  switch (Opcode) {
     // TODO: add case DXIL::OpCode::IndexNodeRecordHandle:
 
   case DXIL::OpCode::AnnotateHandle:
@@ -591,12 +591,12 @@ void ValidateHandleArgs(CallInst *CI, DXIL::OpCode opcode,
     break;
 
   default:
-    ValidateHandleArgsForInstruction(CI, opcode, ValCtx);
+    ValidateHandleArgsForInstruction(CI, Opcode, ValCtx);
     break;
   }
 }
 
-static unsigned GetNumVertices(DXIL::InputPrimitive inputPrimitive) {
+static unsigned GetNumVertices(DXIL::InputPrimitive InputPrimitive) {
   const unsigned InputPrimitiveVertexTab[] = {
       0,  // Undefined = 0,
       1,  // Point = 1,
@@ -641,26 +641,26 @@ static unsigned GetNumVertices(DXIL::InputPrimitive inputPrimitive) {
       0,  // LastEntry,
   };
 
-  unsigned primitiveIdx = static_cast<unsigned>(inputPrimitive);
-  return InputPrimitiveVertexTab[primitiveIdx];
+  unsigned PrimitiveIdx = static_cast<unsigned>(InputPrimitive);
+  return InputPrimitiveVertexTab[PrimitiveIdx];
 }
 
-static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
+static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode Opcode,
                                     ValidationContext &ValCtx) {
   Function *F = CI->getParent()->getParent();
   DxilModule &DM = ValCtx.DxilMod;
-  bool bIsPatchConstantFunc = false;
+  bool IsPatchConstantFunc = false;
   if (!DM.HasDxilEntryProps(F)) {
-    auto it = ValCtx.PatchConstantFuncMap.find(F);
-    if (it == ValCtx.PatchConstantFuncMap.end()) {
+    auto It = ValCtx.PatchConstantFuncMap.find(F);
+    if (It == ValCtx.PatchConstantFuncMap.end()) {
       // Missing entry props.
       ValCtx.EmitInstrError(CI,
                             ValidationRule::InstrSignatureOperationNotInEntry);
       return;
     }
     // Use hull entry instead of patch constant function.
-    F = it->second.front();
-    bIsPatchConstantFunc = true;
+    F = It->second.front();
+    IsPatchConstantFunc = true;
   }
   if (!ValCtx.HasEntryStatus(F)) {
     return;
@@ -668,67 +668,67 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
 
   EntryStatus &Status = ValCtx.GetEntryStatus(F);
   DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F);
-  DxilFunctionProps &props = EntryProps.props;
+  DxilFunctionProps &Props = EntryProps.props;
   DxilEntrySignature &S = EntryProps.sig;
 
-  switch (opcode) {
+  switch (Opcode) {
   case DXIL::OpCode::LoadInput: {
-    Value *inputID = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
-    DxilSignature &inputSig = S.InputSignature;
-    Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
-    Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
-    ValidateSignatureAccess(CI, inputSig, inputID, row, col, Status, ValCtx);
-
-    // Check vertexID in ps/vs. and none array input.
-    Value *vertexID =
+    Value *InputId = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
+    DxilSignature &InputSig = S.InputSignature;
+    Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
+    Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
+    ValidateSignatureAccess(CI, InputSig, InputId, Row, Col, Status, ValCtx);
+
+    // Check VertexId in ps/vs. and none array input.
+    Value *VertexId =
         CI->getArgOperand(DXIL::OperandIndex::kLoadInputVertexIDOpIdx);
-    bool usedVertexID = vertexID && !isa<UndefValue>(vertexID);
-    if (props.IsVS() || props.IsPS()) {
-      if (usedVertexID) {
-        // use vertexID in VS/PS input.
+    bool UsedVertexId = VertexId && !isa<UndefValue>(VertexId);
+    if (Props.IsVS() || Props.IsPS()) {
+      if (UsedVertexId) {
+        // Use VertexId in VS/PS input.
         ValCtx.EmitInstrError(CI, ValidationRule::SmOperand);
         return;
       }
     } else {
-      if (ConstantInt *cVertexID = dyn_cast<ConstantInt>(vertexID)) {
-        int immVertexID = cVertexID->getValue().getLimitedValue();
-        if (cVertexID->getValue().isNegative()) {
-          immVertexID = cVertexID->getValue().getSExtValue();
+      if (ConstantInt *cVertexId = dyn_cast<ConstantInt>(VertexId)) {
+        int ImmVertexId = cVertexId->getValue().getLimitedValue();
+        if (cVertexId->getValue().isNegative()) {
+          ImmVertexId = cVertexId->getValue().getSExtValue();
         }
-        const int low = 0;
-        int high = 0;
-        if (props.IsGS()) {
-          DXIL::InputPrimitive inputPrimitive =
-              props.ShaderProps.GS.inputPrimitive;
-          high = GetNumVertices(inputPrimitive);
-        } else if (props.IsDS()) {
-          high = props.ShaderProps.DS.inputControlPoints;
-        } else if (props.IsHS()) {
-          high = props.ShaderProps.HS.inputControlPoints;
+        const int Low = 0;
+        int High = 0;
+        if (Props.IsGS()) {
+          DXIL::InputPrimitive InputPrimitive =
+              Props.ShaderProps.GS.inputPrimitive;
+          High = GetNumVertices(InputPrimitive);
+        } else if (Props.IsDS()) {
+          High = Props.ShaderProps.DS.inputControlPoints;
+        } else if (Props.IsHS()) {
+          High = Props.ShaderProps.HS.inputControlPoints;
         } else {
           ValCtx.EmitInstrFormatError(CI,
                                       ValidationRule::SmOpcodeInInvalidFunction,
                                       {"LoadInput", "VS/HS/DS/GS/PS"});
         }
-        if (immVertexID < low || immVertexID >= high) {
-          std::string range = std::to_string(low) + "~" + std::to_string(high);
+        if (ImmVertexId < Low || ImmVertexId >= High) {
+          std::string Range = std::to_string(Low) + "~" + std::to_string(High);
           ValCtx.EmitInstrFormatError(
               CI, ValidationRule::InstrOperandRange,
-              {"VertexID", range, std::to_string(immVertexID)});
+              {"VertexID", Range, std::to_string(ImmVertexId)});
         }
       }
     }
   } break;
   case DXIL::OpCode::DomainLocation: {
-    Value *colValue =
+    Value *ColValue =
         CI->getArgOperand(DXIL::OperandIndex::kDomainLocationColOpIdx);
-    if (!isa<ConstantInt>(colValue)) {
-      // col must be const
+    if (!isa<ConstantInt>(ColValue)) {
+      // Col must be const
       ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrOpConst,
                                   {"Col", "DomainLocation"});
     } else {
-      unsigned col = cast<ConstantInt>(colValue)->getLimitedValue();
-      if (col >= Status.domainLocSize) {
+      unsigned Col = cast<ConstantInt>(ColValue)->getLimitedValue();
+      if (Col >= Status.domainLocSize) {
         ValCtx.EmitInstrError(CI, ValidationRule::SmDomainLocationIdxOOB);
       }
     }
@@ -736,60 +736,60 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
   case DXIL::OpCode::StoreOutput:
   case DXIL::OpCode::StoreVertexOutput:
   case DXIL::OpCode::StorePrimitiveOutput: {
-    Value *outputID =
+    Value *OutputId =
         CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx);
-    DxilSignature &outputSig = opcode == DXIL::OpCode::StorePrimitiveOutput
+    DxilSignature &OutputSig = Opcode == DXIL::OpCode::StorePrimitiveOutput
                                    ? S.PatchConstOrPrimSignature
                                    : S.OutputSignature;
-    Value *row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx);
-    Value *col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx);
-    ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status, ValCtx);
+    Value *Row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx);
+    Value *Col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx);
+    ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status, ValCtx);
   } break;
   case DXIL::OpCode::OutputControlPointID: {
     // Only used in hull shader.
-    Function *func = CI->getParent()->getParent();
+    Function *Func = CI->getParent()->getParent();
     // Make sure this is inside hs shader entry function.
-    if (!(props.IsHS() && F == func)) {
+    if (!(Props.IsHS() && F == Func)) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"OutputControlPointID", "hull function"});
     }
   } break;
   case DXIL::OpCode::LoadOutputControlPoint: {
     // Only used in patch constant function.
-    Function *func = CI->getParent()->getParent();
-    if (ValCtx.entryFuncCallSet.count(func) > 0) {
+    Function *Func = CI->getParent()->getParent();
+    if (ValCtx.entryFuncCallSet.count(Func) > 0) {
       ValCtx.EmitInstrFormatError(
           CI, ValidationRule::SmOpcodeInInvalidFunction,
           {"LoadOutputControlPoint", "PatchConstant function"});
     }
-    Value *outputID =
+    Value *OutputId =
         CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx);
-    DxilSignature &outputSig = S.OutputSignature;
-    Value *row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx);
-    Value *col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx);
-    ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status, ValCtx);
+    DxilSignature &OutputSig = S.OutputSignature;
+    Value *Row = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx);
+    Value *Col = CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx);
+    ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status, ValCtx);
   } break;
   case DXIL::OpCode::StorePatchConstant: {
     // Only used in patch constant function.
-    Function *func = CI->getParent()->getParent();
-    if (!bIsPatchConstantFunc) {
+    Function *Func = CI->getParent()->getParent();
+    if (!IsPatchConstantFunc) {
       ValCtx.EmitInstrFormatError(
           CI, ValidationRule::SmOpcodeInInvalidFunction,
           {"StorePatchConstant", "PatchConstant function"});
     } else {
-      auto &hullShaders = ValCtx.PatchConstantFuncMap[func];
-      for (Function *F : hullShaders) {
+      auto &HullShaders = ValCtx.PatchConstantFuncMap[Func];
+      for (Function *F : HullShaders) {
         EntryStatus &Status = ValCtx.GetEntryStatus(F);
         DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F);
         DxilEntrySignature &S = EntryProps.sig;
-        Value *outputID =
+        Value *OutputId =
             CI->getArgOperand(DXIL::OperandIndex::kStoreOutputIDOpIdx);
-        DxilSignature &outputSig = S.PatchConstOrPrimSignature;
-        Value *row =
+        DxilSignature &OutputSig = S.PatchConstOrPrimSignature;
+        Value *Row =
             CI->getArgOperand(DXIL::OperandIndex::kStoreOutputRowOpIdx);
-        Value *col =
+        Value *Col =
             CI->getArgOperand(DXIL::OperandIndex::kStoreOutputColOpIdx);
-        ValidateSignatureAccess(CI, outputSig, outputID, row, col, Status,
+        ValidateSignatureAccess(CI, OutputSig, OutputId, Row, Col, Status,
                                 ValCtx);
       }
     }
@@ -807,12 +807,12 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
   case DXIL::OpCode::EvalSampleIndex:
   case DXIL::OpCode::EvalSnapped: {
     // Eval* share same operand index with load input.
-    Value *inputID = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
-    DxilSignature &inputSig = S.InputSignature;
-    Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
-    Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
+    Value *InputId = CI->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
+    DxilSignature &InputSig = S.InputSignature;
+    Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
+    Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
     DxilSignatureElement *pSE = ValidateSignatureAccess(
-        CI, inputSig, inputID, row, col, Status, ValCtx);
+        CI, InputSig, InputId, Row, Col, Status, ValCtx);
     if (pSE) {
       switch (pSE->GetInterpolationMode()->GetKind()) {
       case DXIL::InterpolationMode::Linear:
@@ -836,11 +836,11 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
   } break;
   case DXIL::OpCode::AttributeAtVertex: {
     Value *Attribute = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx);
-    DxilSignature &inputSig = S.InputSignature;
-    Value *row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
-    Value *col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
+    DxilSignature &InputSig = S.InputSignature;
+    Value *Row = CI->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
+    Value *Col = CI->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
     DxilSignatureElement *pSE = ValidateSignatureAccess(
-        CI, inputSig, Attribute, row, col, Status, ValCtx);
+        CI, InputSig, Attribute, Row, Col, Status, ValCtx);
     if (pSE && pSE->GetInterpolationMode()->GetKind() !=
                    hlsl::InterpolationMode::Kind::Constant) {
       ValCtx.EmitInstrFormatError(
@@ -851,35 +851,35 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
   case DXIL::OpCode::CutStream:
   case DXIL::OpCode::EmitThenCutStream:
   case DXIL::OpCode::EmitStream: {
-    if (props.IsGS()) {
-      auto &GS = props.ShaderProps.GS;
-      unsigned streamMask = 0;
-      for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) {
-        if (GS.streamPrimitiveTopologies[i] !=
+    if (Props.IsGS()) {
+      auto &GS = Props.ShaderProps.GS;
+      unsigned StreamMask = 0;
+      for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) {
+        if (GS.streamPrimitiveTopologies[I] !=
             DXIL::PrimitiveTopology::Undefined) {
-          streamMask |= 1 << i;
+          StreamMask |= 1 << I;
         }
       }
-      Value *streamID =
+      Value *StreamId =
           CI->getArgOperand(DXIL::OperandIndex::kStreamEmitCutIDOpIdx);
-      if (ConstantInt *cStreamID = dyn_cast<ConstantInt>(streamID)) {
-        int immStreamID = cStreamID->getValue().getLimitedValue();
-        if (cStreamID->getValue().isNegative() || immStreamID >= 4) {
+      if (ConstantInt *cStreamId = dyn_cast<ConstantInt>(StreamId)) {
+        int ImmStreamId = cStreamId->getValue().getLimitedValue();
+        if (cStreamId->getValue().isNegative() || ImmStreamId >= 4) {
           ValCtx.EmitInstrFormatError(
               CI, ValidationRule::InstrOperandRange,
-              {"StreamID", "0~4", std::to_string(immStreamID)});
+              {"StreamID", "0~4", std::to_string(ImmStreamId)});
         } else {
-          unsigned immMask = 1 << immStreamID;
-          if ((streamMask & immMask) == 0) {
-            std::string range;
-            for (unsigned i = 0; i < 4; i++) {
-              if (streamMask & (1 << i)) {
-                range += std::to_string(i) + " ";
+          unsigned ImmMask = 1 << ImmStreamId;
+          if ((StreamMask & ImmMask) == 0) {
+            std::string Range;
+            for (unsigned I = 0; I < 4; I++) {
+              if (StreamMask & (1 << I)) {
+                Range += std::to_string(I) + " ";
               }
             }
             ValCtx.EmitInstrFormatError(
                 CI, ValidationRule::InstrOperandRange,
-                {"StreamID", range, std::to_string(immStreamID)});
+                {"StreamID", Range, std::to_string(ImmStreamId)});
           }
         }
 
@@ -893,25 +893,25 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
   } break;
   case DXIL::OpCode::EmitIndices: {
-    if (!props.IsMS()) {
+    if (!Props.IsMS()) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"EmitIndices", "Mesh shader"});
     }
   } break;
   case DXIL::OpCode::SetMeshOutputCounts: {
-    if (!props.IsMS()) {
+    if (!Props.IsMS()) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"SetMeshOutputCounts", "Mesh shader"});
     }
   } break;
   case DXIL::OpCode::GetMeshPayload: {
-    if (!props.IsMS()) {
+    if (!Props.IsMS()) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"GetMeshPayload", "Mesh shader"});
     }
   } break;
   case DXIL::OpCode::DispatchMesh: {
-    if (!props.IsAS()) {
+    if (!Props.IsAS()) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"DispatchMesh", "Amplification shader"});
     }
@@ -925,9 +925,9 @@ static void ValidateSignatureDxilOp(CallInst *CI, DXIL::OpCode opcode,
   }
 }
 
-static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode opcode,
+static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode Opcode,
                                             ValidationContext &ValCtx) {
-  switch (opcode) {
+  switch (Opcode) {
   // Imm input value validation.
   case DXIL::OpCode::Asin: {
     DxilInst_Asin I(CI);
@@ -973,77 +973,86 @@ static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode opcode,
 // Validate the type-defined mask compared to the store value mask which
 // indicates which parts were defined returns true if caller should continue
 // validation
-static bool ValidateStorageMasks(Instruction *I, DXIL::OpCode opcode,
-                                 ConstantInt *mask, unsigned stValMask,
-                                 bool isTyped, ValidationContext &ValCtx) {
-  if (!mask) {
+static bool ValidateStorageMasks(Instruction *I, DXIL::OpCode Opcode,
+                                 ConstantInt *Mask, unsigned StValMask,
+                                 bool IsTyped, ValidationContext &ValCtx) {
+  if (!Mask) {
     // Mask for buffer store should be immediate.
     ValCtx.EmitInstrFormatError(I, ValidationRule::InstrOpConst,
-                                {"Mask", hlsl::OP::GetOpCodeName(opcode)});
+                                {"Mask", hlsl::OP::GetOpCodeName(Opcode)});
     return false;
   }
 
-  unsigned uMask = mask->getLimitedValue();
-  if (isTyped && uMask != 0xf) {
+  unsigned UMask = Mask->getLimitedValue();
+  if (IsTyped && UMask != 0xf) {
     ValCtx.EmitInstrError(I, ValidationRule::InstrWriteMaskForTypedUAVStore);
   }
 
   // write mask must be contiguous (.x .xy .xyz or .xyzw)
-  if (!((uMask == 0xf) || (uMask == 0x7) || (uMask == 0x3) || (uMask == 0x1))) {
+  if (!((UMask == 0xf) || (UMask == 0x7) || (UMask == 0x3) || (UMask == 0x1))) {
     ValCtx.EmitInstrError(I, ValidationRule::InstrWriteMaskGapForUAV);
   }
 
-  // If a bit is set in the uMask (expected values) that isn't set in stValMask
+  // If a bit is set in the UMask (expected values) that isn't set in StValMask
   // (user provided values) then the user failed to define some of the output
   // values.
-  if (uMask & ~stValMask)
+  if (UMask & ~StValMask)
     ValCtx.EmitInstrError(I, ValidationRule::InstrUndefinedValueForUAVStore);
-  else if (uMask != stValMask)
+  else if (UMask != StValMask)
     ValCtx.EmitInstrFormatError(
         I, ValidationRule::InstrWriteMaskMatchValueForUAVStore,
-        {std::to_string(uMask), std::to_string(stValMask)});
+        {std::to_string(UMask), std::to_string(StValMask)});
 
   return true;
 }
 
-static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
+static void ValidateASHandle(CallInst *CI, Value *Hdl,
+                             ValidationContext &ValCtx) {
+  DxilResourceProperties RP = ValCtx.GetResourceFromVal(Hdl);
+  if (RP.getResourceClass() == DXIL::ResourceClass::Invalid ||
+      RP.getResourceKind() != DXIL::ResourceKind::RTAccelerationStructure) {
+    ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay);
+  }
+}
+
+static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode,
                                    ValidationContext &ValCtx) {
-  switch (opcode) {
+  switch (Opcode) {
   case DXIL::OpCode::GetDimensions: {
-    DxilInst_GetDimensions getDim(CI);
-    Value *handle = getDim.get_handle();
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(handle, compTy, resClass, ValCtx);
+    DxilInst_GetDimensions GetDim(CI);
+    Value *Handle = GetDim.get_handle();
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx);
 
     // Check the result component use.
-    ResRetUsage usage;
-    CollectGetDimResRetUsage(usage, CI, ValCtx);
+    ResRetUsage Usage;
+    CollectGetDimResRetUsage(Usage, CI, ValCtx);
 
     // Mip level only for texture.
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::Texture1D:
-      if (usage.y) {
+      if (Usage.Y) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"y", "Texture1D"});
       }
-      if (usage.z) {
+      if (Usage.Z) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"z", "Texture1D"});
       }
       break;
     case DXIL::ResourceKind::Texture1DArray:
-      if (usage.z) {
+      if (Usage.Z) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"z", "Texture1DArray"});
       }
       break;
     case DXIL::ResourceKind::Texture2D:
-      if (usage.z) {
+      if (Usage.Z) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"z", "Texture2D"});
@@ -1052,7 +1061,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     case DXIL::ResourceKind::Texture2DArray:
       break;
     case DXIL::ResourceKind::Texture2DMS:
-      if (usage.z) {
+      if (Usage.Z) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"z", "Texture2DMS"});
@@ -1063,7 +1072,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     case DXIL::ResourceKind::Texture3D:
       break;
     case DXIL::ResourceKind::TextureCube:
-      if (usage.z) {
+      if (Usage.Z) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrUndefResultForGetDimension,
             {"z", "TextureCube"});
@@ -1075,12 +1084,12 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     case DXIL::ResourceKind::RawBuffer:
     case DXIL::ResourceKind::TypedBuffer:
     case DXIL::ResourceKind::TBuffer: {
-      Value *mip = getDim.get_mipLevel();
-      if (!isa<UndefValue>(mip)) {
+      Value *Mip = GetDim.get_mipLevel();
+      if (!isa<UndefValue>(Mip)) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrMipLevelForGetDimension);
       }
-      if (resKind != DXIL::ResourceKind::Invalid) {
-        if (usage.y || usage.z || usage.w) {
+      if (ResKind != DXIL::ResourceKind::Invalid) {
+        if (Usage.Y || Usage.Z || Usage.W) {
           ValCtx.EmitInstrFormatError(
               CI, ValidationRule::InstrUndefResultForGetDimension,
               {"invalid", "resource"});
@@ -1092,38 +1101,38 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     } break;
     }
 
-    if (usage.status) {
+    if (Usage.Status) {
       ValCtx.EmitInstrFormatError(
           CI, ValidationRule::InstrUndefResultForGetDimension,
           {"invalid", "resource"});
     }
   } break;
   case DXIL::OpCode::CalculateLOD: {
-    DxilInst_CalculateLOD lod(CI);
-    Value *samplerHandle = lod.get_sampler();
-    DXIL::SamplerKind samplerKind = GetSamplerKind(samplerHandle, ValCtx);
-    if (samplerKind != DXIL::SamplerKind::Default) {
+    DxilInst_CalculateLOD LOD(CI);
+    Value *SamplerHandle = LOD.get_sampler();
+    DXIL::SamplerKind SamplerKind = GetSamplerKind(SamplerHandle, ValCtx);
+    if (SamplerKind != DXIL::SamplerKind::Default) {
       // After SM68, Comparison is supported.
       if (!ValCtx.DxilMod.GetShaderModel()->IsSM68Plus() ||
-          samplerKind != DXIL::SamplerKind::Comparison)
+          SamplerKind != DXIL::SamplerKind::Comparison)
         ValCtx.EmitInstrError(CI, ValidationRule::InstrSamplerModeForLOD);
     }
-    Value *handle = lod.get_handle();
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(handle, compTy, resClass, ValCtx);
-    if (resClass != DXIL::ResourceClass::SRV) {
+    Value *Handle = LOD.get_handle();
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx);
+    if (ResClass != DXIL::ResourceClass::SRV) {
       ValCtx.EmitInstrError(CI,
                             ValidationRule::InstrResourceClassForSamplerGather);
       return;
     }
     // Coord match resource.
     ValidateCalcLODResourceDimensionCoord(
-        CI, resKind, {lod.get_coord0(), lod.get_coord1(), lod.get_coord2()},
+        CI, ResKind, {LOD.get_coord0(), LOD.get_coord1(), LOD.get_coord2()},
         ValCtx);
 
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::Texture1D:
     case DXIL::ResourceKind::Texture1DArray:
     case DXIL::ResourceKind::Texture2D:
@@ -1140,67 +1149,67 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     ValidateDerivativeOp(CI, ValCtx);
   } break;
   case DXIL::OpCode::TextureGather: {
-    DxilInst_TextureGather gather(CI);
-    ValidateGather(CI, gather.get_srv(), gather.get_sampler(),
-                   {gather.get_coord0(), gather.get_coord1(),
-                    gather.get_coord2(), gather.get_coord3()},
-                   {gather.get_offset0(), gather.get_offset1()},
+    DxilInst_TextureGather Gather(CI);
+    ValidateGather(CI, Gather.get_srv(), Gather.get_sampler(),
+                   {Gather.get_coord0(), Gather.get_coord1(),
+                    Gather.get_coord2(), Gather.get_coord3()},
+                   {Gather.get_offset0(), Gather.get_offset1()},
                    /*IsSampleC*/ false, ValCtx);
   } break;
   case DXIL::OpCode::TextureGatherCmp: {
-    DxilInst_TextureGatherCmp gather(CI);
-    ValidateGather(CI, gather.get_srv(), gather.get_sampler(),
-                   {gather.get_coord0(), gather.get_coord1(),
-                    gather.get_coord2(), gather.get_coord3()},
-                   {gather.get_offset0(), gather.get_offset1()},
+    DxilInst_TextureGatherCmp Gather(CI);
+    ValidateGather(CI, Gather.get_srv(), Gather.get_sampler(),
+                   {Gather.get_coord0(), Gather.get_coord1(),
+                    Gather.get_coord2(), Gather.get_coord3()},
+                   {Gather.get_offset0(), Gather.get_offset1()},
                    /*IsSampleC*/ true, ValCtx);
   } break;
   case DXIL::OpCode::Sample: {
-    DxilInst_Sample sample(CI);
+    DxilInst_Sample Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ false, ValCtx);
     ValidateDerivativeOp(CI, ValCtx);
   } break;
   case DXIL::OpCode::SampleCmp: {
-    DxilInst_SampleCmp sample(CI);
+    DxilInst_SampleCmp Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ true, ValCtx);
     ValidateDerivativeOp(CI, ValCtx);
   } break;
   case DXIL::OpCode::SampleCmpLevel: {
     // sampler must be comparison mode.
-    DxilInst_SampleCmpLevel sample(CI);
+    DxilInst_SampleCmpLevel Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ true, ValCtx);
   } break;
   case DXIL::OpCode::SampleCmpLevelZero: {
     // sampler must be comparison mode.
-    DxilInst_SampleCmpLevelZero sample(CI);
+    DxilInst_SampleCmpLevelZero Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ true, ValCtx);
   } break;
   case DXIL::OpCode::SampleBias: {
-    DxilInst_SampleBias sample(CI);
-    Value *bias = sample.get_bias();
-    if (ConstantFP *cBias = dyn_cast<ConstantFP>(bias)) {
-      float fBias = cBias->getValueAPF().convertToFloat();
-      if (fBias < DXIL::kMinMipLodBias || fBias > DXIL::kMaxMipLodBias) {
+    DxilInst_SampleBias Sample(CI);
+    Value *Bias = Sample.get_bias();
+    if (ConstantFP *cBias = dyn_cast<ConstantFP>(Bias)) {
+      float FBias = cBias->getValueAPF().convertToFloat();
+      if (FBias < DXIL::kMinMipLodBias || FBias > DXIL::kMaxMipLodBias) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrImmBiasForSampleB,
             {std::to_string(DXIL::kMinMipLodBias),
@@ -1210,19 +1219,19 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
 
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ false, ValCtx);
     ValidateDerivativeOp(CI, ValCtx);
   } break;
   case DXIL::OpCode::SampleCmpBias: {
-    DxilInst_SampleCmpBias sample(CI);
-    Value *bias = sample.get_bias();
-    if (ConstantFP *cBias = dyn_cast<ConstantFP>(bias)) {
-      float fBias = cBias->getValueAPF().convertToFloat();
-      if (fBias < DXIL::kMinMipLodBias || fBias > DXIL::kMaxMipLodBias) {
+    DxilInst_SampleCmpBias Sample(CI);
+    Value *Bias = Sample.get_bias();
+    if (ConstantFP *cBias = dyn_cast<ConstantFP>(Bias)) {
+      float FBias = cBias->getValueAPF().convertToFloat();
+      if (FBias < DXIL::kMinMipLodBias || FBias > DXIL::kMaxMipLodBias) {
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::InstrImmBiasForSampleB,
             {std::to_string(DXIL::kMinMipLodBias),
@@ -1232,38 +1241,38 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
 
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ true, ValCtx);
     ValidateDerivativeOp(CI, ValCtx);
   } break;
   case DXIL::OpCode::SampleGrad: {
-    DxilInst_SampleGrad sample(CI);
+    DxilInst_SampleGrad Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ false, ValCtx);
   } break;
   case DXIL::OpCode::SampleCmpGrad: {
-    DxilInst_SampleCmpGrad sample(CI);
+    DxilInst_SampleCmpGrad Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ true, ValCtx);
   } break;
   case DXIL::OpCode::SampleLevel: {
-    DxilInst_SampleLevel sample(CI);
+    DxilInst_SampleLevel Sample(CI);
     ValidateSampleInst(
-        CI, sample.get_srv(), sample.get_sampler(),
-        {sample.get_coord0(), sample.get_coord1(), sample.get_coord2(),
-         sample.get_coord3()},
-        {sample.get_offset0(), sample.get_offset1(), sample.get_offset2()},
+        CI, Sample.get_srv(), Sample.get_sampler(),
+        {Sample.get_coord0(), Sample.get_coord1(), Sample.get_coord2(),
+         Sample.get_coord3()},
+        {Sample.get_offset0(), Sample.get_offset1(), Sample.get_offset2()},
         /*IsSampleC*/ false, ValCtx);
   } break;
   case DXIL::OpCode::CheckAccessFullyMapped: {
@@ -1273,53 +1282,53 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
       ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped);
     } else {
       Value *V = EVI->getOperand(0);
-      bool isLegal = EVI->getNumIndices() == 1 &&
+      bool IsLegal = EVI->getNumIndices() == 1 &&
                      EVI->getIndices()[0] == DXIL::kResRetStatusIndex &&
                      ValCtx.DxilMod.GetOP()->IsResRetType(V->getType());
-      if (!isLegal) {
+      if (!IsLegal) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped);
       }
     }
   } break;
   case DXIL::OpCode::BufferStore: {
-    DxilInst_BufferStore bufSt(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(bufSt.get_uav(), compTy, resClass, ValCtx);
+    DxilInst_BufferStore BufSt(CI);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(BufSt.get_uav(), CompTy, ResClass, ValCtx);
 
-    if (resClass != DXIL::ResourceClass::UAV) {
+    if (ResClass != DXIL::ResourceClass::UAV) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore);
     }
 
-    ConstantInt *mask = dyn_cast<ConstantInt>(bufSt.get_mask());
-    unsigned stValMask =
-        StoreValueToMask({bufSt.get_value0(), bufSt.get_value1(),
-                          bufSt.get_value2(), bufSt.get_value3()});
+    ConstantInt *Mask = dyn_cast<ConstantInt>(BufSt.get_mask());
+    unsigned StValMask =
+        StoreValueToMask({BufSt.get_value0(), BufSt.get_value1(),
+                          BufSt.get_value2(), BufSt.get_value3()});
 
-    if (!ValidateStorageMasks(CI, opcode, mask, stValMask,
-                              resKind == DXIL::ResourceKind::TypedBuffer ||
-                                  resKind == DXIL::ResourceKind::TBuffer,
+    if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask,
+                              ResKind == DXIL::ResourceKind::TypedBuffer ||
+                                  ResKind == DXIL::ResourceKind::TBuffer,
                               ValCtx))
       return;
-    Value *offset = bufSt.get_coord1();
+    Value *Offset = BufSt.get_coord1();
 
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::RawBuffer:
-      if (!isa<UndefValue>(offset)) {
+      if (!isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(
             CI, ValidationRule::InstrCoordinateCountForRawTypedBuf);
       }
       break;
     case DXIL::ResourceKind::TypedBuffer:
     case DXIL::ResourceKind::TBuffer:
-      if (!isa<UndefValue>(offset)) {
+      if (!isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(
             CI, ValidationRule::InstrCoordinateCountForRawTypedBuf);
       }
       break;
     case DXIL::ResourceKind::StructuredBuffer:
-      if (isa<UndefValue>(offset)) {
+      if (isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrCoordinateCountForStructBuf);
       }
@@ -1332,26 +1341,26 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
 
   } break;
   case DXIL::OpCode::TextureStore: {
-    DxilInst_TextureStore texSt(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(texSt.get_srv(), compTy, resClass, ValCtx);
+    DxilInst_TextureStore TexSt(CI);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(TexSt.get_srv(), CompTy, ResClass, ValCtx);
 
-    if (resClass != DXIL::ResourceClass::UAV) {
+    if (ResClass != DXIL::ResourceClass::UAV) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore);
     }
 
-    ConstantInt *mask = dyn_cast<ConstantInt>(texSt.get_mask());
-    unsigned stValMask =
-        StoreValueToMask({texSt.get_value0(), texSt.get_value1(),
-                          texSt.get_value2(), texSt.get_value3()});
+    ConstantInt *Mask = dyn_cast<ConstantInt>(TexSt.get_mask());
+    unsigned StValMask =
+        StoreValueToMask({TexSt.get_value0(), TexSt.get_value1(),
+                          TexSt.get_value2(), TexSt.get_value3()});
 
-    if (!ValidateStorageMasks(CI, opcode, mask, stValMask, true /*isTyped*/,
+    if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, true /*IsTyped*/,
                               ValCtx))
       return;
 
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::Texture1D:
     case DXIL::ResourceKind::Texture1DArray:
     case DXIL::ResourceKind::Texture2D:
@@ -1367,30 +1376,30 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
   } break;
   case DXIL::OpCode::BufferLoad: {
-    DxilInst_BufferLoad bufLd(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(bufLd.get_srv(), compTy, resClass, ValCtx);
-
-    if (resClass != DXIL::ResourceClass::SRV &&
-        resClass != DXIL::ResourceClass::UAV) {
+    DxilInst_BufferLoad BufLd(CI);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(BufLd.get_srv(), CompTy, ResClass, ValCtx);
+
+    if (ResClass != DXIL::ResourceClass::SRV &&
+        ResClass != DXIL::ResourceClass::UAV) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad);
     }
 
-    Value *offset = bufLd.get_wot();
+    Value *Offset = BufLd.get_wot();
 
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::RawBuffer:
     case DXIL::ResourceKind::TypedBuffer:
     case DXIL::ResourceKind::TBuffer:
-      if (!isa<UndefValue>(offset)) {
+      if (!isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(
             CI, ValidationRule::InstrCoordinateCountForRawTypedBuf);
       }
       break;
     case DXIL::ResourceKind::StructuredBuffer:
-      if (isa<UndefValue>(offset)) {
+      if (isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrCoordinateCountForStructBuf);
       }
@@ -1403,33 +1412,33 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
 
   } break;
   case DXIL::OpCode::TextureLoad: {
-    DxilInst_TextureLoad texLd(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(texLd.get_srv(), compTy, resClass, ValCtx);
-
-    Value *mipLevel = texLd.get_mipLevelOrSampleCount();
-
-    if (resClass == DXIL::ResourceClass::UAV) {
-      bool noOffset = isa<UndefValue>(texLd.get_offset0());
-      noOffset &= isa<UndefValue>(texLd.get_offset1());
-      noOffset &= isa<UndefValue>(texLd.get_offset2());
-      if (!noOffset) {
+    DxilInst_TextureLoad TexLd(CI);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(TexLd.get_srv(), CompTy, ResClass, ValCtx);
+
+    Value *MipLevel = TexLd.get_mipLevelOrSampleCount();
+
+    if (ResClass == DXIL::ResourceClass::UAV) {
+      bool NoOffset = isa<UndefValue>(TexLd.get_offset0());
+      NoOffset &= isa<UndefValue>(TexLd.get_offset1());
+      NoOffset &= isa<UndefValue>(TexLd.get_offset2());
+      if (!NoOffset) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrOffsetOnUAVLoad);
       }
-      if (!isa<UndefValue>(mipLevel)) {
-        if (resKind != DXIL::ResourceKind::Texture2DMS &&
-            resKind != DXIL::ResourceKind::Texture2DMSArray)
+      if (!isa<UndefValue>(MipLevel)) {
+        if (ResKind != DXIL::ResourceKind::Texture2DMS &&
+            ResKind != DXIL::ResourceKind::Texture2DMSArray)
           ValCtx.EmitInstrError(CI, ValidationRule::InstrMipOnUAVLoad);
       }
     } else {
-      if (resClass != DXIL::ResourceClass::SRV) {
+      if (ResClass != DXIL::ResourceClass::SRV) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad);
       }
     }
 
-    switch (resKind) {
+    switch (ResKind) {
     case DXIL::ResourceKind::Texture1D:
     case DXIL::ResourceKind::Texture1DArray:
     case DXIL::ResourceKind::Texture2D:
@@ -1438,7 +1447,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
       break;
     case DXIL::ResourceKind::Texture2DMS:
     case DXIL::ResourceKind::Texture2DMSArray: {
-      if (isa<UndefValue>(mipLevel)) {
+      if (isa<UndefValue>(MipLevel)) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrSampleIndexForLoad2DMS);
       }
     } break;
@@ -1449,69 +1458,70 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
 
     ValidateResourceOffset(
-        CI, resKind,
-        {texLd.get_offset0(), texLd.get_offset1(), texLd.get_offset2()},
+        CI, ResKind,
+        {TexLd.get_offset0(), TexLd.get_offset1(), TexLd.get_offset2()},
         ValCtx);
   } break;
   case DXIL::OpCode::CBufferLoad: {
     DxilInst_CBufferLoad CBLoad(CI);
-    Value *regIndex = CBLoad.get_byteOffset();
-    if (ConstantInt *cIndex = dyn_cast<ConstantInt>(regIndex)) {
-      int offset = cIndex->getLimitedValue();
-      int size = GetCBufSize(CBLoad.get_handle(), ValCtx);
-      if (size > 0 && offset >= size) {
+    Value *RegIndex = CBLoad.get_byteOffset();
+    if (ConstantInt *cIndex = dyn_cast<ConstantInt>(RegIndex)) {
+      int Offset = cIndex->getLimitedValue();
+      int Size = GetCBufSize(CBLoad.get_handle(), ValCtx);
+      if (Size > 0 && Offset >= Size) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrCBufferOutOfBound);
       }
     }
   } break;
   case DXIL::OpCode::CBufferLoadLegacy: {
     DxilInst_CBufferLoadLegacy CBLoad(CI);
-    Value *regIndex = CBLoad.get_regIndex();
-    if (ConstantInt *cIndex = dyn_cast<ConstantInt>(regIndex)) {
-      int offset = cIndex->getLimitedValue() * 16; // 16 bytes align
-      int size = GetCBufSize(CBLoad.get_handle(), ValCtx);
-      if (size > 0 && offset >= size) {
+    Value *RegIndex = CBLoad.get_regIndex();
+    if (ConstantInt *cIndex = dyn_cast<ConstantInt>(RegIndex)) {
+      int Offset = cIndex->getLimitedValue() * 16; // 16 bytes align
+      int Size = GetCBufSize(CBLoad.get_handle(), ValCtx);
+      if (Size > 0 && Offset >= Size) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrCBufferOutOfBound);
       }
     }
   } break;
-  case DXIL::OpCode::RawBufferLoad: {
+  case DXIL::OpCode::RawBufferLoad:
     if (!ValCtx.DxilMod.GetShaderModel()->IsSM63Plus()) {
       Type *Ty = OP::GetOverloadType(DXIL::OpCode::RawBufferLoad,
                                      CI->getCalledFunction());
-      if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) {
+      if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32)
         ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore);
-      }
     }
-    DxilInst_RawBufferLoad bufLd(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(bufLd.get_srv(), compTy, resClass, ValCtx);
+    LLVM_FALLTHROUGH;
+  case DXIL::OpCode::RawBufferVectorLoad: {
+    Value *Handle =
+        CI->getOperand(DXIL::OperandIndex::kRawBufferLoadHandleOpIdx);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx);
+
+    if (ResClass != DXIL::ResourceClass::SRV &&
+        ResClass != DXIL::ResourceClass::UAV)
 
-    if (resClass != DXIL::ResourceClass::SRV &&
-        resClass != DXIL::ResourceClass::UAV) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForLoad);
-    }
 
-    Value *offset = bufLd.get_elementOffset();
-    Value *align = bufLd.get_alignment();
-    unsigned alignSize = 0;
-    if (!isa<ConstantInt>(align)) {
-      ValCtx.EmitInstrError(CI,
-                            ValidationRule::InstrCoordinateCountForRawTypedBuf);
-    } else {
-      alignSize = bufLd.get_alignment_val();
-    }
-    switch (resKind) {
+    unsigned AlignIdx = DXIL::OperandIndex::kRawBufferLoadAlignmentOpIdx;
+    if (DXIL::OpCode::RawBufferVectorLoad == Opcode)
+      AlignIdx = DXIL::OperandIndex::kRawBufferVectorLoadAlignmentOpIdx;
+    if (!isa<ConstantInt>(CI->getOperand(AlignIdx)))
+      ValCtx.EmitInstrError(CI, ValidationRule::InstrConstAlignForRawBuf);
+
+    Value *Offset =
+        CI->getOperand(DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx);
+    switch (ResKind) {
     case DXIL::ResourceKind::RawBuffer:
-      if (!isa<UndefValue>(offset)) {
+      if (!isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(
             CI, ValidationRule::InstrCoordinateCountForRawTypedBuf);
       }
       break;
     case DXIL::ResourceKind::StructuredBuffer:
-      if (isa<UndefValue>(offset)) {
+      if (isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrCoordinateCountForStructBuf);
       }
@@ -1526,47 +1536,53 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     if (!ValCtx.DxilMod.GetShaderModel()->IsSM63Plus()) {
       Type *Ty = OP::GetOverloadType(DXIL::OpCode::RawBufferStore,
                                      CI->getCalledFunction());
-      if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32) {
+      if (ValCtx.DL.getTypeAllocSizeInBits(Ty) > 32)
         ValCtx.EmitInstrError(CI, ValidationRule::Sm64bitRawBufferLoadStore);
-      }
     }
     DxilInst_RawBufferStore bufSt(CI);
-    DXIL::ComponentType compTy;
-    DXIL::ResourceClass resClass;
-    DXIL::ResourceKind resKind =
-        GetResourceKindAndCompTy(bufSt.get_uav(), compTy, resClass, ValCtx);
-
-    if (resClass != DXIL::ResourceClass::UAV) {
-      ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore);
-    }
-
-    ConstantInt *mask = dyn_cast<ConstantInt>(bufSt.get_mask());
-    unsigned stValMask =
+    ConstantInt *Mask = dyn_cast<ConstantInt>(bufSt.get_mask());
+    unsigned StValMask =
         StoreValueToMask({bufSt.get_value0(), bufSt.get_value1(),
                           bufSt.get_value2(), bufSt.get_value3()});
 
-    if (!ValidateStorageMasks(CI, opcode, mask, stValMask, false /*isTyped*/,
+    if (!ValidateStorageMasks(CI, Opcode, Mask, StValMask, false /*IsTyped*/,
                               ValCtx))
       return;
+  }
+    LLVM_FALLTHROUGH;
+  case DXIL::OpCode::RawBufferVectorStore: {
+    Value *Handle =
+        CI->getOperand(DXIL::OperandIndex::kRawBufferStoreHandleOpIdx);
+    DXIL::ComponentType CompTy;
+    DXIL::ResourceClass ResClass;
+    DXIL::ResourceKind ResKind =
+        GetResourceKindAndCompTy(Handle, CompTy, ResClass, ValCtx);
+
+    if (ResClass != DXIL::ResourceClass::UAV)
+      ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForUAVStore);
 
-    Value *offset = bufSt.get_elementOffset();
-    Value *align = bufSt.get_alignment();
-    unsigned alignSize = 0;
-    if (!isa<ConstantInt>(align)) {
-      ValCtx.EmitInstrError(CI,
-                            ValidationRule::InstrCoordinateCountForRawTypedBuf);
-    } else {
-      alignSize = bufSt.get_alignment_val();
+    unsigned AlignIdx = DXIL::OperandIndex::kRawBufferStoreAlignmentOpIdx;
+    if (DXIL::OpCode::RawBufferVectorStore == Opcode) {
+      AlignIdx = DXIL::OperandIndex::kRawBufferVectorStoreAlignmentOpIdx;
+      unsigned ValueIx = DXIL::OperandIndex::kRawBufferVectorStoreValOpIdx;
+      if (isa<UndefValue>(CI->getOperand(ValueIx)))
+        ValCtx.EmitInstrError(CI,
+                              ValidationRule::InstrUndefinedValueForUAVStore);
     }
-    switch (resKind) {
+    if (!isa<ConstantInt>(CI->getOperand(AlignIdx)))
+      ValCtx.EmitInstrError(CI, ValidationRule::InstrConstAlignForRawBuf);
+
+    Value *Offset =
+        CI->getOperand(DXIL::OperandIndex::kRawBufferStoreElementOffsetOpIdx);
+    switch (ResKind) {
     case DXIL::ResourceKind::RawBuffer:
-      if (!isa<UndefValue>(offset)) {
+      if (!isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(
             CI, ValidationRule::InstrCoordinateCountForRawTypedBuf);
       }
       break;
     case DXIL::ResourceKind::StructuredBuffer:
-      if (isa<UndefValue>(offset)) {
+      if (isa<UndefValue>(Offset)) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrCoordinateCountForStructBuf);
       }
@@ -1578,16 +1594,14 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     }
   } break;
   case DXIL::OpCode::TraceRay: {
-    DxilInst_TraceRay traceRay(CI);
-    Value *hdl = traceRay.get_AccelerationStructure();
-    DxilResourceProperties RP = ValCtx.GetResourceFromVal(hdl);
-    if (RP.getResourceClass() == DXIL::ResourceClass::Invalid) {
-      ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay);
-      return;
-    }
-    if (RP.getResourceKind() != DXIL::ResourceKind::RTAccelerationStructure) {
-      ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForTraceRay);
-    }
+    DxilInst_TraceRay TraceRay(CI);
+    Value *Hdl = TraceRay.get_AccelerationStructure();
+    ValidateASHandle(CI, Hdl, ValCtx);
+  } break;
+  case DXIL::OpCode::HitObject_TraceRay: {
+    DxilInst_HitObject_TraceRay HOTraceRay(CI);
+    Value *Hdl = HOTraceRay.get_accelerationStructure();
+    ValidateASHandle(CI, Hdl, ValCtx);
   } break;
   default:
     break;
@@ -1595,12 +1609,12 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
 }
 
 static void ValidateBarrierFlagArg(ValidationContext &ValCtx, CallInst *CI,
-                                   Value *Arg, unsigned validMask,
-                                   StringRef flagName, StringRef opName) {
+                                   Value *Arg, unsigned ValidMask,
+                                   StringRef FlagName, StringRef OpName) {
   if (ConstantInt *CArg = dyn_cast<ConstantInt>(Arg)) {
-    if ((CArg->getLimitedValue() & (uint32_t)(~validMask)) != 0) {
+    if ((CArg->getLimitedValue() & (uint32_t)(~ValidMask)) != 0) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrBarrierFlagInvalid,
-                                  {flagName, opName});
+                                  {FlagName, OpName});
     }
   } else {
     ValCtx.EmitInstrError(CI,
@@ -1621,36 +1635,45 @@ std::string GetLaunchTypeStr(DXIL::NodeLaunchType LT) {
   }
 }
 
+static unsigned getSemanticFlagValidMask(const ShaderModel *pSM) {
+  unsigned DxilMajor, DxilMinor;
+  pSM->GetDxilVersion(DxilMajor, DxilMinor);
+  // DXIL version >= 1.9
+  if (hlsl::DXIL::CompareVersions(DxilMajor, DxilMinor, 1, 9) < 0)
+    return static_cast<unsigned>(hlsl::DXIL::BarrierSemanticFlag::LegacyFlags);
+  return static_cast<unsigned>(hlsl::DXIL::BarrierSemanticFlag::ValidMask);
+}
+
 static void ValidateDxilOperationCallInProfile(CallInst *CI,
-                                               DXIL::OpCode opcode,
+                                               DXIL::OpCode Opcode,
                                                const ShaderModel *pSM,
                                                ValidationContext &ValCtx) {
-  DXIL::ShaderKind shaderKind =
+  DXIL::ShaderKind ShaderKind =
       pSM ? pSM->GetKind() : DXIL::ShaderKind::Invalid;
   llvm::Function *F = CI->getParent()->getParent();
-  DXIL::NodeLaunchType nodeLaunchType = DXIL::NodeLaunchType::Invalid;
-  if (DXIL::ShaderKind::Library == shaderKind) {
+  DXIL::NodeLaunchType NodeLaunchType = DXIL::NodeLaunchType::Invalid;
+  if (DXIL::ShaderKind::Library == ShaderKind) {
     if (ValCtx.DxilMod.HasDxilFunctionProps(F)) {
-      DxilEntryProps &entryProps = ValCtx.DxilMod.GetDxilEntryProps(F);
-      shaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind;
-      if (shaderKind == DXIL::ShaderKind::Node)
-        nodeLaunchType = entryProps.props.Node.LaunchType;
+      DxilEntryProps &EntryProps = ValCtx.DxilMod.GetDxilEntryProps(F);
+      ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind;
+      if (ShaderKind == DXIL::ShaderKind::Node)
+        NodeLaunchType = EntryProps.props.Node.LaunchType;
 
     } else if (ValCtx.DxilMod.IsPatchConstantShader(F))
-      shaderKind = DXIL::ShaderKind::Hull;
+      ShaderKind = DXIL::ShaderKind::Hull;
   }
 
   // These shader models are treted like compute
-  bool isCSLike = shaderKind == DXIL::ShaderKind::Compute ||
-                  shaderKind == DXIL::ShaderKind::Mesh ||
-                  shaderKind == DXIL::ShaderKind::Amplification ||
-                  shaderKind == DXIL::ShaderKind::Node;
+  bool IsCSLike = ShaderKind == DXIL::ShaderKind::Compute ||
+                  ShaderKind == DXIL::ShaderKind::Mesh ||
+                  ShaderKind == DXIL::ShaderKind::Amplification ||
+                  ShaderKind == DXIL::ShaderKind::Node;
   // Is called from a library function
-  bool isLibFunc = shaderKind == DXIL::ShaderKind::Library;
+  bool IsLibFunc = ShaderKind == DXIL::ShaderKind::Library;
 
-  ValidateHandleArgs(CI, opcode, ValCtx);
+  ValidateHandleArgs(CI, Opcode, ValCtx);
 
-  switch (opcode) {
+  switch (Opcode) {
   // Imm input value validation.
   case DXIL::OpCode::Asin:
   case DXIL::OpCode::Acos:
@@ -1659,7 +1682,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   case DXIL::OpCode::DerivFineY:
   case DXIL::OpCode::DerivCoarseX:
   case DXIL::OpCode::DerivCoarseY:
-    ValidateImmOperandForMathDxilOp(CI, opcode, ValCtx);
+    ValidateImmOperandForMathDxilOp(CI, Opcode, ValCtx);
     break;
   // Resource validation.
   case DXIL::OpCode::GetDimensions:
@@ -1684,7 +1707,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   case DXIL::OpCode::CBufferLoadLegacy:
   case DXIL::OpCode::RawBufferLoad:
   case DXIL::OpCode::RawBufferStore:
-    ValidateResourceDxilOp(CI, opcode, ValCtx);
+  case DXIL::OpCode::RawBufferVectorLoad:
+  case DXIL::OpCode::RawBufferVectorStore:
+    ValidateResourceDxilOp(CI, Opcode, ValCtx);
     break;
   // Input output.
   case DXIL::OpCode::LoadInput:
@@ -1705,13 +1730,13 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   case DXIL::OpCode::EmitStream:
   case DXIL::OpCode::EmitThenCutStream:
   case DXIL::OpCode::CutStream:
-    ValidateSignatureDxilOp(CI, opcode, ValCtx);
+    ValidateSignatureDxilOp(CI, Opcode, ValCtx);
     break;
   // Special.
   case DXIL::OpCode::AllocateRayQuery: {
     // validate flags are immediate and compatible
-    llvm::Value *constRayFlag = CI->getOperand(1);
-    if (!llvm::isa<llvm::Constant>(constRayFlag)) {
+    llvm::Value *ConstRayFlag = CI->getOperand(1);
+    if (!llvm::isa<llvm::Constant>(ConstRayFlag)) {
       ValCtx.EmitInstrError(CI,
                             ValidationRule::DeclAllocateRayQueryFlagsAreConst);
     }
@@ -1719,9 +1744,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   }
   case DXIL::OpCode::AllocateRayQuery2: {
     // validate flags are immediate and compatible
-    llvm::Value *constRayFlag = CI->getOperand(1);
+    llvm::Value *ConstRayFlag = CI->getOperand(1);
     llvm::Value *RayQueryFlag = CI->getOperand(2);
-    if (!llvm::isa<llvm::Constant>(constRayFlag) ||
+    if (!llvm::isa<llvm::Constant>(ConstRayFlag) ||
         !llvm::isa<llvm::Constant>(RayQueryFlag)) {
       ValCtx.EmitInstrError(CI,
                             ValidationRule::DeclAllocateRayQuery2FlagsAreConst);
@@ -1730,7 +1755,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
     // When the ForceOMM2State ConstRayFlag is given as an argument to
     // a RayQuery object, AllowOpacityMicromaps is expected
     // as a RayQueryFlag argument
-    llvm::ConstantInt *Arg1 = llvm::cast<llvm::ConstantInt>(constRayFlag);
+    llvm::ConstantInt *Arg1 = llvm::cast<llvm::ConstantInt>(ConstRayFlag);
     llvm::ConstantInt *Arg2 = llvm::cast<llvm::ConstantInt>(RayQueryFlag);
     if ((Arg1->getValue().getSExtValue() &
          (unsigned)DXIL::RayFlag::ForceOMM2State) &&
@@ -1744,9 +1769,9 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   }
 
   case DXIL::OpCode::BufferUpdateCounter: {
-    DxilInst_BufferUpdateCounter updateCounter(CI);
-    Value *handle = updateCounter.get_uav();
-    DxilResourceProperties RP = ValCtx.GetResourceFromVal(handle);
+    DxilInst_BufferUpdateCounter UpdateCounter(CI);
+    Value *Handle = UpdateCounter.get_uav();
+    DxilResourceProperties RP = ValCtx.GetResourceFromVal(Handle);
 
     if (!RP.isUAV()) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBufferUpdateCounterOnUAV);
@@ -1761,20 +1786,20 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
           CI, ValidationRule::InstrBufferUpdateCounterOnResHasCounter);
     }
 
-    Value *inc = updateCounter.get_inc();
-    if (ConstantInt *cInc = dyn_cast<ConstantInt>(inc)) {
-      bool isInc = cInc->getLimitedValue() == 1;
+    Value *Inc = UpdateCounter.get_inc();
+    if (ConstantInt *cInc = dyn_cast<ConstantInt>(Inc)) {
+      bool IsInc = cInc->getLimitedValue() == 1;
       if (!ValCtx.isLibProfile) {
-        auto it = ValCtx.HandleResIndexMap.find(handle);
-        if (it != ValCtx.HandleResIndexMap.end()) {
-          unsigned resIndex = it->second;
-          if (ValCtx.UavCounterIncMap.count(resIndex)) {
-            if (isInc != ValCtx.UavCounterIncMap[resIndex]) {
+        auto It = ValCtx.HandleResIndexMap.find(Handle);
+        if (It != ValCtx.HandleResIndexMap.end()) {
+          unsigned ResIndex = It->second;
+          if (ValCtx.UavCounterIncMap.count(ResIndex)) {
+            if (IsInc != ValCtx.UavCounterIncMap[ResIndex]) {
               ValCtx.EmitInstrError(CI,
                                     ValidationRule::InstrOnlyOneAllocConsume);
             }
           } else {
-            ValCtx.UavCounterIncMap[resIndex] = isInc;
+            ValCtx.UavCounterIncMap[ResIndex] = IsInc;
           }
         }
 
@@ -1789,35 +1814,35 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
 
   } break;
   case DXIL::OpCode::Barrier: {
-    DxilInst_Barrier barrier(CI);
-    Value *mode = barrier.get_barrierMode();
-    ConstantInt *cMode = dyn_cast<ConstantInt>(mode);
-    if (!cMode) {
+    DxilInst_Barrier Barrier(CI);
+    Value *Mode = Barrier.get_barrierMode();
+    ConstantInt *CMode = dyn_cast<ConstantInt>(Mode);
+    if (!CMode) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrOpConst,
                                   {"Mode", "Barrier"});
       return;
     }
 
-    const unsigned uglobal =
+    const unsigned Uglobal =
         static_cast<unsigned>(DXIL::BarrierMode::UAVFenceGlobal);
-    const unsigned g = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence);
-    const unsigned ut =
+    const unsigned G = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence);
+    const unsigned Ut =
         static_cast<unsigned>(DXIL::BarrierMode::UAVFenceThreadGroup);
-    unsigned barrierMode = cMode->getLimitedValue();
+    unsigned BarrierMode = CMode->getLimitedValue();
 
-    if (isCSLike || isLibFunc) {
-      bool bHasUGlobal = barrierMode & uglobal;
-      bool bHasGroup = barrierMode & g;
-      bool bHasUGroup = barrierMode & ut;
-      if (bHasUGlobal && bHasUGroup) {
+    if (IsCSLike || IsLibFunc) {
+      bool HasUGlobal = BarrierMode & Uglobal;
+      bool HasGroup = BarrierMode & G;
+      bool HasUGroup = BarrierMode & Ut;
+      if (HasUGlobal && HasUGroup) {
         ValCtx.EmitInstrError(CI,
                               ValidationRule::InstrBarrierModeUselessUGroup);
       }
-      if (!bHasUGlobal && !bHasGroup && !bHasUGroup) {
+      if (!HasUGlobal && !HasGroup && !HasUGroup) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeNoMemory);
       }
     } else {
-      if (uglobal != barrierMode) {
+      if (Uglobal != BarrierMode) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS);
       }
     }
@@ -1829,30 +1854,29 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
                            (unsigned)hlsl::DXIL::MemoryTypeFlag::ValidMask,
                            "memory type", "BarrierByMemoryType");
     ValidateBarrierFlagArg(ValCtx, CI, DI.get_SemanticFlags(),
-                           (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask,
-                           "semantic", "BarrierByMemoryType");
-    if (!isLibFunc && shaderKind != DXIL::ShaderKind::Node &&
+                           getSemanticFlagValidMask(pSM), "semantic",
+                           "BarrierByMemoryType");
+    if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node &&
         OP::BarrierRequiresNode(CI)) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode);
     }
-    if (!isCSLike && !isLibFunc && OP::BarrierRequiresGroup(CI)) {
+    if (!IsCSLike && !IsLibFunc && OP::BarrierRequiresGroup(CI)) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS);
     }
   } break;
   case DXIL::OpCode::BarrierByNodeRecordHandle:
   case DXIL::OpCode::BarrierByMemoryHandle: {
-    std::string opName = opcode == DXIL::OpCode::BarrierByNodeRecordHandle
+    std::string OpName = Opcode == DXIL::OpCode::BarrierByNodeRecordHandle
                              ? "barrierByNodeRecordHandle"
                              : "barrierByMemoryHandle";
     DxilInst_BarrierByMemoryHandle DIMH(CI);
     ValidateBarrierFlagArg(ValCtx, CI, DIMH.get_SemanticFlags(),
-                           (unsigned)hlsl::DXIL::BarrierSemanticFlag::ValidMask,
-                           "semantic", opName);
-    if (!isLibFunc && shaderKind != DXIL::ShaderKind::Node &&
+                           getSemanticFlagValidMask(pSM), "semantic", OpName);
+    if (!IsLibFunc && ShaderKind != DXIL::ShaderKind::Node &&
         OP::BarrierRequiresNode(CI)) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierRequiresNode);
     }
-    if (!isCSLike && !isLibFunc && OP::BarrierRequiresGroup(CI)) {
+    if (!IsCSLike && !IsLibFunc && OP::BarrierRequiresGroup(CI)) {
       ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS);
     }
   } break;
@@ -1862,9 +1886,33 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
                                   {"CreateHandleForLib", "Library"});
     }
     break;
+
+  // Shader Execution Reordering
+  case DXIL::OpCode::MaybeReorderThread: {
+    Value *HitObject = CI->getArgOperand(1);
+    Value *CoherenceHintBits = CI->getArgOperand(2);
+    Value *NumCoherenceHintBits = CI->getArgOperand(3);
+
+    if (isa<UndefValue>(HitObject))
+      ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject);
+
+    if (isa<UndefValue>(NumCoherenceHintBits))
+      ValCtx.EmitInstrError(
+          CI, ValidationRule::InstrMayReorderThreadUndefCoherenceHintParam);
+
+    ConstantInt *NumCoherenceHintBitsConst =
+        dyn_cast<ConstantInt>(NumCoherenceHintBits);
+    const bool HasCoherenceHint =
+        NumCoherenceHintBitsConst &&
+        NumCoherenceHintBitsConst->getLimitedValue() != 0;
+    if (HasCoherenceHint && isa<UndefValue>(CoherenceHintBits))
+      ValCtx.EmitInstrError(
+          CI, ValidationRule::InstrMayReorderThreadUndefCoherenceHintParam);
+  } break;
+
   case DXIL::OpCode::AtomicBinOp:
   case DXIL::OpCode::AtomicCompareExchange: {
-    Type *pOverloadType = OP::GetOverloadType(opcode, CI->getCalledFunction());
+    Type *pOverloadType = OP::GetOverloadType(Opcode, CI->getCalledFunction());
     if ((pOverloadType->isIntegerTy(64)) && !pSM->IsSM66Plus())
       ValCtx.EmitInstrFormatError(
           CI, ValidationRule::SmOpcodeInInvalidFunction,
@@ -1890,73 +1938,73 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
     break;
 
   case DXIL::OpCode::ThreadId: // SV_DispatchThreadID
-    if (shaderKind != DXIL::ShaderKind::Node) {
+    if (ShaderKind != DXIL::ShaderKind::Node) {
       break;
     }
 
-    if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting)
+    if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting)
       break;
 
     ValCtx.EmitInstrFormatError(
         CI, ValidationRule::InstrSVConflictingLaunchMode,
-        {"ThreadId", "SV_DispatchThreadID", GetLaunchTypeStr(nodeLaunchType)});
+        {"ThreadId", "SV_DispatchThreadID", GetLaunchTypeStr(NodeLaunchType)});
     break;
 
   case DXIL::OpCode::GroupId: // SV_GroupId
-    if (shaderKind != DXIL::ShaderKind::Node) {
+    if (ShaderKind != DXIL::ShaderKind::Node) {
       break;
     }
 
-    if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting)
+    if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting)
       break;
 
     ValCtx.EmitInstrFormatError(
         CI, ValidationRule::InstrSVConflictingLaunchMode,
-        {"GroupId", "SV_GroupId", GetLaunchTypeStr(nodeLaunchType)});
+        {"GroupId", "SV_GroupId", GetLaunchTypeStr(NodeLaunchType)});
     break;
 
   case DXIL::OpCode::ThreadIdInGroup: // SV_GroupThreadID
-    if (shaderKind != DXIL::ShaderKind::Node) {
+    if (ShaderKind != DXIL::ShaderKind::Node) {
       break;
     }
 
-    if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting ||
-        nodeLaunchType == DXIL::NodeLaunchType::Coalescing)
+    if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting ||
+        NodeLaunchType == DXIL::NodeLaunchType::Coalescing)
       break;
 
     ValCtx.EmitInstrFormatError(CI,
                                 ValidationRule::InstrSVConflictingLaunchMode,
                                 {"ThreadIdInGroup", "SV_GroupThreadID",
-                                 GetLaunchTypeStr(nodeLaunchType)});
+                                 GetLaunchTypeStr(NodeLaunchType)});
 
     break;
 
   case DXIL::OpCode::FlattenedThreadIdInGroup: // SV_GroupIndex
-    if (shaderKind != DXIL::ShaderKind::Node) {
+    if (ShaderKind != DXIL::ShaderKind::Node) {
       break;
     }
 
-    if (nodeLaunchType == DXIL::NodeLaunchType::Broadcasting ||
-        nodeLaunchType == DXIL::NodeLaunchType::Coalescing)
+    if (NodeLaunchType == DXIL::NodeLaunchType::Broadcasting ||
+        NodeLaunchType == DXIL::NodeLaunchType::Coalescing)
       break;
 
     ValCtx.EmitInstrFormatError(CI,
                                 ValidationRule::InstrSVConflictingLaunchMode,
                                 {"FlattenedThreadIdInGroup", "SV_GroupIndex",
-                                 GetLaunchTypeStr(nodeLaunchType)});
+                                 GetLaunchTypeStr(NodeLaunchType)});
 
     break;
 
   default:
-    // TODO: make sure every opcode is checked.
+    // TODO: make sure every Opcode is checked.
     // Skip opcodes don't need special check.
     break;
   }
 }
 
 static bool IsDxilFunction(llvm::Function *F) {
-  unsigned argSize = F->arg_size();
-  if (argSize < 1) {
+  unsigned ArgSize = F->arg_size();
+  if (ArgSize < 1) {
     // Cannot be a DXIL operation.
     return false;
   }
@@ -1991,9 +2039,9 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) {
   }
 
   const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
-  OP *hlslOP = ValCtx.DxilMod.GetOP();
-  bool isDxilOp = OP::IsDxilOpFunc(F);
-  Type *voidTy = Type::getVoidTy(F->getContext());
+  OP *HlslOP = ValCtx.DxilMod.GetOP();
+  bool IsDxilOp = OP::IsDxilOpFunc(F);
+  Type *VoidTy = Type::getVoidTy(F->getContext());
 
   for (User *user : F->users()) {
     CallInst *CI = dyn_cast<CallInst>(user);
@@ -2004,32 +2052,32 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) {
     }
 
     // Skip call to external user defined function
-    if (!isDxilOp)
+    if (!IsDxilOp)
       continue;
 
-    Value *argOpcode = CI->getArgOperand(0);
-    ConstantInt *constOpcode = dyn_cast<ConstantInt>(argOpcode);
-    if (!constOpcode) {
-      // opcode not immediate; function body will validate this error.
+    Value *ArgOpcode = CI->getArgOperand(0);
+    ConstantInt *ConstOpcode = dyn_cast<ConstantInt>(ArgOpcode);
+    if (!ConstOpcode) {
+      // Opcode not immediate; function body will validate this error.
       continue;
     }
 
-    unsigned opcode = constOpcode->getLimitedValue();
-    if (opcode >= (unsigned)DXIL::OpCode::NumOpCodes) {
-      // invalid opcode; function body will validate this error.
+    unsigned Opcode = ConstOpcode->getLimitedValue();
+    if (Opcode >= (unsigned)DXIL::OpCode::NumOpCodes) {
+      // invalid Opcode; function body will validate this error.
       continue;
     }
 
-    DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode;
+    DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode;
 
     // In some cases, no overloads are provided (void is exclusive to others)
-    Function *dxilFunc;
-    if (hlslOP->IsOverloadLegal(dxilOpcode, voidTy)) {
-      dxilFunc = hlslOP->GetOpFunc(dxilOpcode, voidTy);
+    Function *DxilFunc;
+    if (HlslOP->IsOverloadLegal(DxilOpcode, VoidTy)) {
+      DxilFunc = HlslOP->GetOpFunc(DxilOpcode, VoidTy);
     } else {
-      Type *Ty = OP::GetOverloadType(dxilOpcode, CI->getCalledFunction());
+      Type *Ty = OP::GetOverloadType(DxilOpcode, CI->getCalledFunction());
       try {
-        if (!hlslOP->IsOverloadLegal(dxilOpcode, Ty)) {
+        if (!HlslOP->IsOverloadLegal(DxilOpcode, Ty)) {
           ValCtx.EmitInstrError(CI, ValidationRule::InstrOload);
           continue;
         }
@@ -2037,89 +2085,92 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) {
         ValCtx.EmitInstrError(CI, ValidationRule::InstrOload);
         continue;
       }
-      dxilFunc = hlslOP->GetOpFunc(dxilOpcode, Ty->getScalarType());
+      DxilFunc = HlslOP->GetOpFunc(DxilOpcode, Ty);
     }
 
-    if (!dxilFunc) {
-      // Cannot find dxilFunction based on opcode and type.
+    if (!DxilFunc) {
+      // Cannot find DxilFunction based on Opcode and type.
       ValCtx.EmitInstrError(CI, ValidationRule::InstrOload);
       continue;
     }
 
-    if (dxilFunc->getFunctionType() != F->getFunctionType()) {
+    if (DxilFunc->getFunctionType() != F->getFunctionType()) {
       ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrCallOload,
-                                  {dxilFunc->getName()});
+                                  {DxilFunc->getName()});
       continue;
     }
 
     unsigned major = pSM->GetMajor();
     unsigned minor = pSM->GetMinor();
     if (ValCtx.isLibProfile) {
-      Function *callingFunction = CI->getParent()->getParent();
+      Function *CallingFunction = CI->getParent()->getParent();
       DXIL::ShaderKind SK = DXIL::ShaderKind::Library;
-      if (ValCtx.DxilMod.HasDxilFunctionProps(callingFunction))
-        SK = ValCtx.DxilMod.GetDxilFunctionProps(callingFunction).shaderKind;
-      else if (ValCtx.DxilMod.IsPatchConstantShader(callingFunction))
+      if (ValCtx.DxilMod.HasDxilFunctionProps(CallingFunction))
+        SK = ValCtx.DxilMod.GetDxilFunctionProps(CallingFunction).shaderKind;
+      else if (ValCtx.DxilMod.IsPatchConstantShader(CallingFunction))
         SK = DXIL::ShaderKind::Hull;
-      if (!ValidateOpcodeInProfile(dxilOpcode, SK, major, minor)) {
+      if (!ValidateOpcodeInProfile(DxilOpcode, SK, major, minor)) {
         // Opcode not available in profile.
         // produces: "lib_6_3(ps)", or "lib_6_3(anyhit)" for shader types
         // Or: "lib_6_3(lib)" for library function
-        std::string shaderModel = pSM->GetName();
-        shaderModel += std::string("(") + ShaderModel::GetKindName(SK) + ")";
+        std::string ShaderModel = pSM->GetName();
+        ShaderModel += std::string("(") + ShaderModel::GetKindName(SK) + ")";
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::SmOpcode,
-            {hlslOP->GetOpCodeName(dxilOpcode), shaderModel});
+            {HlslOP->GetOpCodeName(DxilOpcode), ShaderModel});
         continue;
       }
     } else {
-      if (!ValidateOpcodeInProfile(dxilOpcode, pSM->GetKind(), major, minor)) {
+      if (!ValidateOpcodeInProfile(DxilOpcode, pSM->GetKind(), major, minor)) {
         // Opcode not available in profile.
         ValCtx.EmitInstrFormatError(
             CI, ValidationRule::SmOpcode,
-            {hlslOP->GetOpCodeName(dxilOpcode), pSM->GetName()});
+            {HlslOP->GetOpCodeName(DxilOpcode), pSM->GetName()});
         continue;
       }
     }
 
     // Check more detail.
-    ValidateDxilOperationCallInProfile(CI, dxilOpcode, pSM, ValCtx);
+    ValidateDxilOperationCallInProfile(CI, DxilOpcode, pSM, ValCtx);
   }
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 // Instruction validation functions.                                         //
 
-static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) {
-  if (ST == hlslOP->GetBinaryWithCarryType())
+static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *HlslOP) {
+  if (ST == HlslOP->GetBinaryWithCarryType())
     return true;
-  if (ST == hlslOP->GetBinaryWithTwoOutputsType())
+  if (ST == HlslOP->GetBinaryWithTwoOutputsType())
     return true;
-  if (ST == hlslOP->GetFourI32Type())
+  if (ST == HlslOP->GetFourI32Type())
     return true;
-  if (ST == hlslOP->GetFourI16Type())
+  if (ST == HlslOP->GetFourI16Type())
     return true;
-  if (ST == hlslOP->GetDimensionsType())
+  if (ST == HlslOP->GetDimensionsType())
     return true;
-  if (ST == hlslOP->GetHandleType())
+  if (ST == HlslOP->GetHandleType())
     return true;
-  if (ST == hlslOP->GetSamplePosType())
+  if (ST == HlslOP->GetSamplePosType())
     return true;
-  if (ST == hlslOP->GetSplitDoubleType())
+  if (ST == HlslOP->GetSplitDoubleType())
     return true;
 
   unsigned EltNum = ST->getNumElements();
+  Type *EltTy = ST->getElementType(0);
   switch (EltNum) {
   case 2:
+    // Check if it's a native vector resret.
+    if (EltTy->isVectorTy())
+      return ST == HlslOP->GetResRetType(EltTy);
+    LLVM_FALLTHROUGH;
   case 4:
-  case 8: { // 2 for doubles, 8 for halfs.
-    Type *EltTy = ST->getElementType(0);
-    return ST == hlslOP->GetCBufferRetType(EltTy);
-  } break;
-  case 5: {
-    Type *EltTy = ST->getElementType(0);
-    return ST == hlslOP->GetResRetType(EltTy);
-  } break;
+  case 8: // 2 for doubles, 8 for halfs.
+    return ST == HlslOP->GetCBufferRetType(EltTy);
+    break;
+  case 5:
+    return ST == HlslOP->GetResRetType(EltTy);
+    break;
   default:
     return false;
   }
@@ -2129,11 +2180,11 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) {
 // inner type (UDT struct member) may be: [N dim array of]( UDT struct | scalar
 // ) scalar type may be: ( float(16|32|64) | int(16|32|64) )
 static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
-                         bool bInner = false) {
+                         bool IsInner = false) {
   DXASSERT_NOMSG(Ty != nullptr);
   if (Ty->isPointerTy()) {
     Type *EltTy = Ty->getPointerElementType();
-    if (bInner || EltTy->isPointerTy()) {
+    if (IsInner || EltTy->isPointerTy()) {
       ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoPtrToPtr);
       return false;
     }
@@ -2141,7 +2192,7 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
   }
   if (Ty->isArrayTy()) {
     Type *EltTy = Ty->getArrayElementType();
-    if (!bInner && isa<ArrayType>(EltTy)) {
+    if (!IsInner && isa<ArrayType>(EltTy)) {
       // Outermost array should be converted to single-dim,
       // but arrays inside struct are allowed to be multi-dim
       ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoMultiDim);
@@ -2152,7 +2203,7 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
     Ty = EltTy;
   }
   if (Ty->isStructTy()) {
-    bool result = true;
+    bool Result = true;
     StructType *ST = cast<StructType>(Ty);
 
     StringRef Name = ST->getName();
@@ -2160,28 +2211,28 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
       // Allow handle type.
       if (ValCtx.HandleTy == Ty)
         return true;
-      hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP();
-      if (IsDxilBuiltinStructType(ST, hlslOP)) {
+      hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP();
+      if (IsDxilBuiltinStructType(ST, HlslOP)) {
         ValCtx.EmitTypeError(Ty, ValidationRule::InstrDxilStructUser);
-        result = false;
+        Result = false;
       }
 
       ValCtx.EmitTypeError(Ty, ValidationRule::DeclDxilNsReserved);
-      result = false;
+      Result = false;
     }
     for (auto e : ST->elements()) {
-      if (!ValidateType(e, ValCtx, /*bInner*/ true)) {
-        result = false;
+      if (!ValidateType(e, ValCtx, /*IsInner*/ true)) {
+        Result = false;
       }
     }
-    return result;
+    return Result;
   }
   if (Ty->isFloatTy() || Ty->isHalfTy() || Ty->isDoubleTy()) {
     return true;
   }
   if (Ty->isIntegerTy()) {
-    unsigned width = Ty->getIntegerBitWidth();
-    if (width != 1 && width != 8 && width != 16 && width != 32 && width != 64) {
+    unsigned Width = Ty->getIntegerBitWidth();
+    if (Width != 1 && Width != 8 && Width != 16 && Width != 32 && Width != 64) {
       ValCtx.EmitTypeError(Ty, ValidationRule::TypesIntWidth);
       return false;
     }
@@ -2193,6 +2244,9 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
     return true;
 
   if (Ty->isVectorTy()) {
+    if (Ty->getVectorNumElements() > 1 &&
+        ValCtx.DxilMod.GetShaderModel()->IsSM69Plus())
+      return true;
     ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoVector);
     return false;
   }
@@ -2201,13 +2255,13 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx,
 }
 
 static bool GetNodeOperandAsInt(ValidationContext &ValCtx, MDNode *pMD,
-                                unsigned index, uint64_t *pValue) {
-  *pValue = 0;
-  if (pMD->getNumOperands() < index) {
+                                unsigned Index, uint64_t *PValue) {
+  *PValue = 0;
+  if (pMD->getNumOperands() < Index) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed);
     return false;
   }
-  ConstantAsMetadata *C = dyn_cast<ConstantAsMetadata>(pMD->getOperand(index));
+  ConstantAsMetadata *C = dyn_cast<ConstantAsMetadata>(pMD->getOperand(Index));
   if (C == nullptr) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed);
     return false;
@@ -2217,7 +2271,7 @@ static bool GetNodeOperandAsInt(ValidationContext &ValCtx, MDNode *pMD,
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed);
     return false;
   }
-  *pValue = CI->getValue().getZExtValue();
+  *PValue = CI->getValue().getZExtValue();
   return true;
 }
 
@@ -2231,14 +2285,14 @@ static bool IsPrecise(Instruction &I, ValidationContext &ValCtx) {
     return false;
   }
 
-  uint64_t val;
-  if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &val)) {
+  uint64_t Val;
+  if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &Val)) {
     return false;
   }
-  if (val == 1) {
+  if (Val == 1) {
     return true;
   }
-  if (val != 0) {
+  if (Val != 0) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaValueRange);
   }
   return false;
@@ -2257,12 +2311,12 @@ static bool IsValueMinPrec(DxilModule &DxilMod, Value *V) {
 }
 
 static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx,
-                                 CallInst *setMeshOutputCounts,
-                                 CallInst *getMeshPayload) {
+                                 CallInst *SetMeshOutputCounts,
+                                 CallInst *GetMeshPayload) {
   if (ValCtx.DxilMod.HasDxilFunctionProps(F)) {
-    DXIL::ShaderKind shaderKind =
+    DXIL::ShaderKind ShaderKind =
         ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind;
-    if (shaderKind != DXIL::ShaderKind::Mesh)
+    if (ShaderKind != DXIL::ShaderKind::Mesh)
       return;
   } else {
     return;
@@ -2271,10 +2325,10 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx,
   DominatorTreeAnalysis DTA;
   DominatorTree DT = DTA.run(*F);
 
-  for (auto b = F->begin(), bend = F->end(); b != bend; ++b) {
-    bool foundSetMeshOutputCountsInCurrentBB = false;
-    for (auto i = b->begin(), iend = b->end(); i != iend; ++i) {
-      llvm::Instruction &I = *i;
+  for (auto B = F->begin(), BEnd = F->end(); B != BEnd; ++B) {
+    bool FoundSetMeshOutputCountsInCurrentBb = false;
+    for (auto It = B->begin(), ItEnd = B->end(); It != ItEnd; ++It) {
+      llvm::Instruction &I = *It;
 
       // Calls to external functions.
       CallInst *CI = dyn_cast<CallInst>(&I);
@@ -2290,22 +2344,22 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx,
             continue;
           }
 
-          if (CI == setMeshOutputCounts) {
-            foundSetMeshOutputCountsInCurrentBB = true;
+          if (CI == SetMeshOutputCounts) {
+            FoundSetMeshOutputCountsInCurrentBb = true;
           }
-          Value *opcodeVal = CI->getOperand(0);
-          ConstantInt *OpcodeConst = dyn_cast<ConstantInt>(opcodeVal);
-          unsigned opcode = OpcodeConst->getLimitedValue();
-          DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode;
-
-          if (dxilOpcode == DXIL::OpCode::StoreVertexOutput ||
-              dxilOpcode == DXIL::OpCode::StorePrimitiveOutput ||
-              dxilOpcode == DXIL::OpCode::EmitIndices) {
-            if (setMeshOutputCounts == nullptr) {
+          Value *OpcodeVal = CI->getOperand(0);
+          ConstantInt *OpcodeConst = dyn_cast<ConstantInt>(OpcodeVal);
+          unsigned Opcode = OpcodeConst->getLimitedValue();
+          DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode;
+
+          if (DxilOpcode == DXIL::OpCode::StoreVertexOutput ||
+              DxilOpcode == DXIL::OpCode::StorePrimitiveOutput ||
+              DxilOpcode == DXIL::OpCode::EmitIndices) {
+            if (SetMeshOutputCounts == nullptr) {
               ValCtx.EmitInstrError(
                   &I, ValidationRule::InstrMissingSetMeshOutputCounts);
-            } else if (!foundSetMeshOutputCountsInCurrentBB &&
-                       !DT.dominates(setMeshOutputCounts->getParent(),
+            } else if (!FoundSetMeshOutputCountsInCurrentBb &&
+                       !DT.dominates(SetMeshOutputCounts->getParent(),
                                      I.getParent())) {
               ValCtx.EmitInstrError(
                   &I, ValidationRule::InstrNonDominatingSetMeshOutputCounts);
@@ -2316,61 +2370,61 @@ static void ValidateMsIntrinsics(Function *F, ValidationContext &ValCtx,
     }
   }
 
-  if (getMeshPayload) {
-    PointerType *payloadPTy = cast<PointerType>(getMeshPayload->getType());
-    StructType *payloadTy =
-        cast<StructType>(payloadPTy->getPointerElementType());
+  if (GetMeshPayload) {
+    PointerType *PayloadPTy = cast<PointerType>(GetMeshPayload->getType());
+    StructType *PayloadTy =
+        cast<StructType>(PayloadPTy->getPointerElementType());
     const DataLayout &DL = F->getParent()->getDataLayout();
-    unsigned payloadSize = DL.getTypeAllocSize(payloadTy);
+    unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy);
 
-    DxilFunctionProps &prop = ValCtx.DxilMod.GetDxilFunctionProps(F);
+    DxilFunctionProps &Prop = ValCtx.DxilMod.GetDxilFunctionProps(F);
 
-    if (prop.ShaderProps.MS.payloadSizeInBytes < payloadSize) {
+    if (Prop.ShaderProps.MS.payloadSizeInBytes < PayloadSize) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmMeshShaderPayloadSizeDeclared,
-          {F->getName(), std::to_string(payloadSize),
-           std::to_string(prop.ShaderProps.MS.payloadSizeInBytes)});
+          {F->getName(), std::to_string(PayloadSize),
+           std::to_string(Prop.ShaderProps.MS.payloadSizeInBytes)});
     }
 
-    if (prop.ShaderProps.MS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) {
+    if (Prop.ShaderProps.MS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmMeshShaderPayloadSize,
-          {F->getName(), std::to_string(prop.ShaderProps.MS.payloadSizeInBytes),
+          {F->getName(), std::to_string(Prop.ShaderProps.MS.payloadSizeInBytes),
            std::to_string(DXIL::kMaxMSASPayloadBytes)});
     }
   }
 }
 
 static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx,
-                                 CallInst *dispatchMesh) {
+                                 CallInst *DispatchMesh) {
   if (ValCtx.DxilMod.HasDxilFunctionProps(F)) {
-    DXIL::ShaderKind shaderKind =
+    DXIL::ShaderKind ShaderKind =
         ValCtx.DxilMod.GetDxilFunctionProps(F).shaderKind;
-    if (shaderKind != DXIL::ShaderKind::Amplification)
+    if (ShaderKind != DXIL::ShaderKind::Amplification)
       return;
 
-    if (dispatchMesh) {
-      DxilInst_DispatchMesh dispatchMeshCall(dispatchMesh);
-      Value *operandVal = dispatchMeshCall.get_payload();
-      Type *payloadTy = operandVal->getType();
+    if (DispatchMesh) {
+      DxilInst_DispatchMesh DispatchMeshCall(DispatchMesh);
+      Value *OperandVal = DispatchMeshCall.get_payload();
+      Type *PayloadTy = OperandVal->getType();
       const DataLayout &DL = F->getParent()->getDataLayout();
-      unsigned payloadSize = DL.getTypeAllocSize(payloadTy);
+      unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy);
 
-      DxilFunctionProps &prop = ValCtx.DxilMod.GetDxilFunctionProps(F);
+      DxilFunctionProps &Prop = ValCtx.DxilMod.GetDxilFunctionProps(F);
 
-      if (prop.ShaderProps.AS.payloadSizeInBytes < payloadSize) {
+      if (Prop.ShaderProps.AS.payloadSizeInBytes < PayloadSize) {
         ValCtx.EmitInstrFormatError(
-            dispatchMesh,
+            DispatchMesh,
             ValidationRule::SmAmplificationShaderPayloadSizeDeclared,
-            {F->getName(), std::to_string(payloadSize),
-             std::to_string(prop.ShaderProps.AS.payloadSizeInBytes)});
+            {F->getName(), std::to_string(PayloadSize),
+             std::to_string(Prop.ShaderProps.AS.payloadSizeInBytes)});
       }
 
-      if (prop.ShaderProps.AS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) {
+      if (Prop.ShaderProps.AS.payloadSizeInBytes > DXIL::kMaxMSASPayloadBytes) {
         ValCtx.EmitInstrFormatError(
-            dispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize,
+            DispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize,
             {F->getName(),
-             std::to_string(prop.ShaderProps.AS.payloadSizeInBytes),
+             std::to_string(Prop.ShaderProps.AS.payloadSizeInBytes),
              std::to_string(DXIL::kMaxMSASPayloadBytes)});
       }
     }
@@ -2379,7 +2433,7 @@ static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx,
     return;
   }
 
-  if (dispatchMesh == nullptr) {
+  if (DispatchMesh == nullptr) {
     ValCtx.EmitFnError(F, ValidationRule::InstrNotOnceDispatchMesh);
     return;
   }
@@ -2387,30 +2441,30 @@ static void ValidateAsIntrinsics(Function *F, ValidationContext &ValCtx,
   PostDominatorTree PDT;
   PDT.runOnFunction(*F);
 
-  if (!PDT.dominates(dispatchMesh->getParent(), &F->getEntryBlock())) {
-    ValCtx.EmitInstrError(dispatchMesh,
+  if (!PDT.dominates(DispatchMesh->getParent(), &F->getEntryBlock())) {
+    ValCtx.EmitInstrError(DispatchMesh,
                           ValidationRule::InstrNonDominatingDispatchMesh);
   }
 
-  Function *dispatchMeshFunc = dispatchMesh->getCalledFunction();
-  FunctionType *dispatchMeshFuncTy = dispatchMeshFunc->getFunctionType();
-  PointerType *payloadPTy =
-      cast<PointerType>(dispatchMeshFuncTy->getParamType(4));
-  StructType *payloadTy = cast<StructType>(payloadPTy->getPointerElementType());
+  Function *DispatchMeshFunc = DispatchMesh->getCalledFunction();
+  FunctionType *DispatchMeshFuncTy = DispatchMeshFunc->getFunctionType();
+  PointerType *PayloadPTy =
+      cast<PointerType>(DispatchMeshFuncTy->getParamType(4));
+  StructType *PayloadTy = cast<StructType>(PayloadPTy->getPointerElementType());
   const DataLayout &DL = F->getParent()->getDataLayout();
-  unsigned payloadSize = DL.getTypeAllocSize(payloadTy);
+  unsigned PayloadSize = DL.getTypeAllocSize(PayloadTy);
 
-  if (payloadSize > DXIL::kMaxMSASPayloadBytes) {
+  if (PayloadSize > DXIL::kMaxMSASPayloadBytes) {
     ValCtx.EmitInstrFormatError(
-        dispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize,
-        {F->getName(), std::to_string(payloadSize),
+        DispatchMesh, ValidationRule::SmAmplificationShaderPayloadSize,
+        {F->getName(), std::to_string(PayloadSize),
          std::to_string(DXIL::kMaxMSASPayloadBytes)});
   }
 }
 
-static void ValidateControlFlowHint(BasicBlock &bb, ValidationContext &ValCtx) {
+static void ValidateControlFlowHint(BasicBlock &BB, ValidationContext &ValCtx) {
   // Validate controlflow hint.
-  TerminatorInst *TI = bb.getTerminator();
+  TerminatorInst *TI = BB.getTerminator();
   if (!TI)
     return;
 
@@ -2421,33 +2475,33 @@ static void ValidateControlFlowHint(BasicBlock &bb, ValidationContext &ValCtx) {
   if (pNode->getNumOperands() < 3)
     return;
 
-  bool bHasBranch = false;
-  bool bHasFlatten = false;
-  bool bForceCase = false;
+  bool HasBranch = false;
+  bool HasFlatten = false;
+  bool ForceCase = false;
 
-  for (unsigned i = 2; i < pNode->getNumOperands(); i++) {
-    uint64_t value = 0;
-    if (GetNodeOperandAsInt(ValCtx, pNode, i, &value)) {
-      DXIL::ControlFlowHint hint = static_cast<DXIL::ControlFlowHint>(value);
-      switch (hint) {
+  for (unsigned I = 2; I < pNode->getNumOperands(); I++) {
+    uint64_t Value = 0;
+    if (GetNodeOperandAsInt(ValCtx, pNode, I, &Value)) {
+      DXIL::ControlFlowHint Hint = static_cast<DXIL::ControlFlowHint>(Value);
+      switch (Hint) {
       case DXIL::ControlFlowHint::Flatten:
-        bHasFlatten = true;
+        HasFlatten = true;
         break;
       case DXIL::ControlFlowHint::Branch:
-        bHasBranch = true;
+        HasBranch = true;
         break;
       case DXIL::ControlFlowHint::ForceCase:
-        bForceCase = true;
+        ForceCase = true;
         break;
       default:
         ValCtx.EmitMetaError(pNode, ValidationRule::MetaInvalidControlFlowHint);
       }
     }
   }
-  if (bHasBranch && bHasFlatten) {
+  if (HasBranch && HasFlatten) {
     ValCtx.EmitMetaError(pNode, ValidationRule::MetaBranchFlatten);
   }
-  if (bForceCase && !isa<SwitchInst>(TI)) {
+  if (ForceCase && !isa<SwitchInst>(TI)) {
     ValCtx.EmitMetaError(pNode, ValidationRule::MetaForceCaseOnSwitch);
   }
 }
@@ -2460,30 +2514,30 @@ static void ValidateTBAAMetadata(MDNode *Node, ValidationContext &ValCtx) {
     }
   } break;
   case 2: {
-    MDNode *rootNode = dyn_cast<MDNode>(Node->getOperand(1));
-    if (!rootNode) {
+    MDNode *RootNode = dyn_cast<MDNode>(Node->getOperand(1));
+    if (!RootNode) {
       ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed);
     } else {
-      ValidateTBAAMetadata(rootNode, ValCtx);
+      ValidateTBAAMetadata(RootNode, ValCtx);
     }
   } break;
   case 3: {
-    MDNode *rootNode = dyn_cast<MDNode>(Node->getOperand(1));
-    if (!rootNode) {
+    MDNode *RootNode = dyn_cast<MDNode>(Node->getOperand(1));
+    if (!RootNode) {
       ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed);
     } else {
-      ValidateTBAAMetadata(rootNode, ValCtx);
+      ValidateTBAAMetadata(RootNode, ValCtx);
     }
-    ConstantAsMetadata *pointsToConstMem =
+    ConstantAsMetadata *PointsToConstMem =
         dyn_cast<ConstantAsMetadata>(Node->getOperand(2));
-    if (!pointsToConstMem) {
+    if (!PointsToConstMem) {
       ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed);
     } else {
-      ConstantInt *isConst =
-          dyn_cast<ConstantInt>(pointsToConstMem->getValue());
-      if (!isConst) {
+      ConstantInt *IsConst =
+          dyn_cast<ConstantInt>(PointsToConstMem->getValue());
+      if (!IsConst) {
         ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed);
-      } else if (isConst->getValue().getLimitedValue() > 1) {
+      } else if (IsConst->getValue().getLimitedValue() > 1) {
         ValCtx.EmitMetaError(Node, ValidationRule::MetaWellFormed);
       }
     }
@@ -2564,11 +2618,11 @@ static void ValidateNonUniformMetadata(Instruction &I, MDNode *pMD,
   if (pMD->getNumOperands() != 1) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed);
   }
-  uint64_t val;
-  if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &val)) {
+  uint64_t Val;
+  if (!GetNodeOperandAsInt(ValCtx, pMD, 0, &Val)) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaWellFormed);
   }
-  if (val != 1) {
+  if (Val != 1) {
     ValCtx.EmitMetaError(pMD, ValidationRule::MetaValueRange);
   }
 }
@@ -2603,31 +2657,31 @@ static void ValidateInstructionMetadata(Instruction *I,
 }
 
 static void ValidateFunctionAttribute(Function *F, ValidationContext &ValCtx) {
-  AttributeSet attrSet = F->getAttributes().getFnAttributes();
+  AttributeSet AttrSet = F->getAttributes().getFnAttributes();
   // fp32-denorm-mode
-  if (attrSet.hasAttribute(AttributeSet::FunctionIndex,
+  if (AttrSet.hasAttribute(AttributeSet::FunctionIndex,
                            DXIL::kFP32DenormKindString)) {
-    Attribute attr = attrSet.getAttribute(AttributeSet::FunctionIndex,
+    Attribute Attr = AttrSet.getAttribute(AttributeSet::FunctionIndex,
                                           DXIL::kFP32DenormKindString);
-    StringRef value = attr.getValueAsString();
-    if (!value.equals(DXIL::kFP32DenormValueAnyString) &&
-        !value.equals(DXIL::kFP32DenormValueFtzString) &&
-        !value.equals(DXIL::kFP32DenormValuePreserveString)) {
-      ValCtx.EmitFnAttributeError(F, attr.getKindAsString(),
-                                  attr.getValueAsString());
+    StringRef StrValue = Attr.getValueAsString();
+    if (!StrValue.equals(DXIL::kFP32DenormValueAnyString) &&
+        !StrValue.equals(DXIL::kFP32DenormValueFtzString) &&
+        !StrValue.equals(DXIL::kFP32DenormValuePreserveString)) {
+      ValCtx.EmitFnAttributeError(F, Attr.getKindAsString(),
+                                  Attr.getValueAsString());
     }
   }
   // TODO: If validating libraries, we should remove all unknown function
   // attributes. For each attribute, check if it is a known attribute
-  for (unsigned I = 0, E = attrSet.getNumSlots(); I != E; ++I) {
-    for (auto AttrIter = attrSet.begin(I), AttrEnd = attrSet.end(I);
+  for (unsigned I = 0, E = AttrSet.getNumSlots(); I != E; ++I) {
+    for (auto AttrIter = AttrSet.begin(I), AttrEnd = AttrSet.end(I);
          AttrIter != AttrEnd; ++AttrIter) {
       if (!AttrIter->isStringAttribute()) {
         continue;
       }
-      StringRef kind = AttrIter->getKindAsString();
-      if (!kind.equals(DXIL::kFP32DenormKindString) &&
-          !kind.equals(DXIL::kWaveOpsIncludeHelperLanesString)) {
+      StringRef Kind = AttrIter->getKindAsString();
+      if (!Kind.equals(DXIL::kFP32DenormKindString) &&
+          !Kind.equals(DXIL::kWaveOpsIncludeHelperLanesString)) {
         ValCtx.EmitFnAttributeError(F, AttrIter->getKindAsString(),
                                     AttrIter->getValueAsString());
       }
@@ -2669,21 +2723,38 @@ static bool IsLLVMInstructionAllowedForLib(Instruction &I,
   }
 }
 
+// Shader model specific checks for valid LLVM instructions.
+// Currently only checks for pre 6.9 usage of vector operations.
+// Returns false if shader model is pre 6.9 and I represents a vector
+// operation. Returns true otherwise.
+static bool IsLLVMInstructionAllowedForShaderModel(Instruction &I,
+                                                   ValidationContext &ValCtx) {
+  if (ValCtx.DxilMod.GetShaderModel()->IsSM69Plus())
+    return true;
+  unsigned Opcode = I.getOpcode();
+  if (Opcode == Instruction::InsertElement ||
+      Opcode == Instruction::ExtractElement ||
+      Opcode == Instruction::ShuffleVector)
+    return false;
+
+  return true;
+}
+
 static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
   bool SupportsMinPrecision =
       ValCtx.DxilMod.GetGlobalFlags() & DXIL::kEnableMinPrecision;
   bool SupportsLifetimeIntrinsics =
       ValCtx.DxilMod.GetShaderModel()->IsSM66Plus();
-  SmallVector<CallInst *, 16> gradientOps;
-  SmallVector<CallInst *, 16> barriers;
-  CallInst *setMeshOutputCounts = nullptr;
-  CallInst *getMeshPayload = nullptr;
-  CallInst *dispatchMesh = nullptr;
-  hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP();
+  SmallVector<CallInst *, 16> GradientOps;
+  SmallVector<CallInst *, 16> Barriers;
+  CallInst *SetMeshOutputCounts = nullptr;
+  CallInst *GetMeshPayload = nullptr;
+  CallInst *DispatchMesh = nullptr;
+  hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP();
 
-  for (auto b = F->begin(), bend = F->end(); b != bend; ++b) {
-    for (auto i = b->begin(), iend = b->end(); i != iend; ++i) {
-      llvm::Instruction &I = *i;
+  for (auto B = F->begin(), BEnd = F->end(); B != BEnd; ++B) {
+    for (auto It = B->begin(), ItEnd = B->end(); It != ItEnd; ++It) {
+      llvm::Instruction &I = *It;
 
       if (I.hasMetadata()) {
 
@@ -2691,7 +2762,8 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
       }
 
       // Instructions must be allowed.
-      if (!IsLLVMInstructionAllowed(I)) {
+      if (!IsLLVMInstructionAllowed(I) ||
+          !IsLLVMInstructionAllowedForShaderModel(I, ValCtx)) {
         if (!IsLLVMInstructionAllowedForLib(I, ValCtx)) {
           ValCtx.EmitInstrError(&I, ValidationRule::InstrAllowed);
           continue;
@@ -2721,27 +2793,27 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
             continue;
           }
 
-          Value *opcodeVal = CI->getOperand(0);
-          ConstantInt *OpcodeConst = dyn_cast<ConstantInt>(opcodeVal);
+          Value *OpcodeVal = CI->getOperand(0);
+          ConstantInt *OpcodeConst = dyn_cast<ConstantInt>(OpcodeVal);
           if (OpcodeConst == nullptr) {
             ValCtx.EmitInstrFormatError(&I, ValidationRule::InstrOpConst,
                                         {"Opcode", "DXIL operation"});
             continue;
           }
 
-          unsigned opcode = OpcodeConst->getLimitedValue();
-          if (opcode >= static_cast<unsigned>(DXIL::OpCode::NumOpCodes)) {
+          unsigned Opcode = OpcodeConst->getLimitedValue();
+          if (Opcode >= static_cast<unsigned>(DXIL::OpCode::NumOpCodes)) {
             ValCtx.EmitInstrFormatError(
                 &I, ValidationRule::InstrIllegalDXILOpCode,
                 {std::to_string((unsigned)DXIL::OpCode::NumOpCodes),
-                 std::to_string(opcode)});
+                 std::to_string(Opcode)});
             continue;
           }
-          DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode;
+          DXIL::OpCode DxilOpcode = (DXIL::OpCode)Opcode;
 
           bool IllegalOpFunc = true;
-          for (auto &it : hlslOP->GetOpFuncList(dxilOpcode)) {
-            if (it.second == FCalled) {
+          for (auto &It : HlslOP->GetOpFuncList(DxilOpcode)) {
+            if (It.second == FCalled) {
               IllegalOpFunc = false;
               break;
             }
@@ -2750,46 +2822,46 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
           if (IllegalOpFunc) {
             ValCtx.EmitInstrFormatError(
                 &I, ValidationRule::InstrIllegalDXILOpFunction,
-                {FCalled->getName(), OP::GetOpCodeName(dxilOpcode)});
+                {FCalled->getName(), OP::GetOpCodeName(DxilOpcode)});
             continue;
           }
 
-          if (OP::IsDxilOpGradient(dxilOpcode)) {
-            gradientOps.push_back(CI);
+          if (OP::IsDxilOpGradient(DxilOpcode)) {
+            GradientOps.push_back(CI);
           }
 
-          if (dxilOpcode == DXIL::OpCode::Barrier) {
-            barriers.push_back(CI);
+          if (DxilOpcode == DXIL::OpCode::Barrier) {
+            Barriers.push_back(CI);
           }
           // External function validation will check the parameter
           // list. This function will check that the call does not
           // violate any rules.
 
-          if (dxilOpcode == DXIL::OpCode::SetMeshOutputCounts) {
+          if (DxilOpcode == DXIL::OpCode::SetMeshOutputCounts) {
             // validate the call count of SetMeshOutputCounts
-            if (setMeshOutputCounts != nullptr) {
+            if (SetMeshOutputCounts != nullptr) {
               ValCtx.EmitInstrError(
                   &I, ValidationRule::InstrMultipleSetMeshOutputCounts);
             }
-            setMeshOutputCounts = CI;
+            SetMeshOutputCounts = CI;
           }
 
-          if (dxilOpcode == DXIL::OpCode::GetMeshPayload) {
+          if (DxilOpcode == DXIL::OpCode::GetMeshPayload) {
             // validate the call count of GetMeshPayload
-            if (getMeshPayload != nullptr) {
+            if (GetMeshPayload != nullptr) {
               ValCtx.EmitInstrError(
                   &I, ValidationRule::InstrMultipleGetMeshPayload);
             }
-            getMeshPayload = CI;
+            GetMeshPayload = CI;
           }
 
-          if (dxilOpcode == DXIL::OpCode::DispatchMesh) {
+          if (DxilOpcode == DXIL::OpCode::DispatchMesh) {
             // validate the call count of DispatchMesh
-            if (dispatchMesh != nullptr) {
+            if (DispatchMesh != nullptr) {
               ValCtx.EmitInstrError(&I,
                                     ValidationRule::InstrNotOnceDispatchMesh);
             }
-            dispatchMesh = CI;
+            DispatchMesh = CI;
           }
         }
         continue;
@@ -2797,23 +2869,23 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
 
       for (Value *op : I.operands()) {
         if (isa<UndefValue>(op)) {
-          bool legalUndef = isa<PHINode>(&I);
+          bool LegalUndef = isa<PHINode>(&I);
           if (isa<InsertElementInst>(&I)) {
-            legalUndef = op == I.getOperand(0);
+            LegalUndef = op == I.getOperand(0);
           }
           if (isa<ShuffleVectorInst>(&I)) {
-            legalUndef = op == I.getOperand(1);
+            LegalUndef = op == I.getOperand(1);
           }
           if (isa<StoreInst>(&I)) {
-            legalUndef = op == I.getOperand(0);
+            LegalUndef = op == I.getOperand(0);
           }
 
-          if (!legalUndef)
+          if (!LegalUndef)
             ValCtx.EmitInstrError(&I,
                                   ValidationRule::InstrNoReadingUninitialized);
         } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(op)) {
-          for (Value *opCE : CE->operands()) {
-            if (isa<UndefValue>(opCE)) {
+          for (Value *OpCE : CE->operands()) {
+            if (isa<UndefValue>(OpCE)) {
               ValCtx.EmitInstrError(
                   &I, ValidationRule::InstrNoReadingUninitialized);
             }
@@ -2843,8 +2915,8 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
         }
       }
 
-      unsigned opcode = I.getOpcode();
-      switch (opcode) {
+      unsigned Opcode = I.getOpcode();
+      switch (Opcode) {
       case Instruction::Alloca: {
         AllocaInst *AI = cast<AllocaInst>(&I);
         // TODO: validate address space and alignment
@@ -2885,26 +2957,26 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
           continue;
         }
         GetElementPtrInst *GEP = cast<GetElementPtrInst>(&I);
-        bool allImmIndex = true;
+        bool AllImmIndex = true;
         for (auto Idx = GEP->idx_begin(), E = GEP->idx_end(); Idx != E; Idx++) {
           if (!isa<ConstantInt>(Idx)) {
-            allImmIndex = false;
+            AllImmIndex = false;
             break;
           }
         }
-        if (allImmIndex) {
+        if (AllImmIndex) {
           const DataLayout &DL = ValCtx.DL;
 
           Value *Ptr = GEP->getPointerOperand();
-          unsigned size =
+          unsigned Size =
               DL.getTypeAllocSize(Ptr->getType()->getPointerElementType());
-          unsigned valSize =
+          unsigned ValSize =
               DL.getTypeAllocSize(GEP->getType()->getPointerElementType());
 
           SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
-          unsigned offset =
+          unsigned Offset =
               DL.getIndexedOffset(GEP->getPointerOperandType(), Indices);
-          if ((offset + valSize) > size) {
+          if ((Offset + ValSize) > Size) {
             ValCtx.EmitInstrError(GEP, ValidationRule::InstrInBoundsAccess);
           }
         }
@@ -2978,16 +3050,16 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
       case Instruction::AtomicCmpXchg:
       case Instruction::AtomicRMW: {
         Value *Ptr = I.getOperand(AtomicRMWInst::getPointerOperandIndex());
-        PointerType *ptrType = cast<PointerType>(Ptr->getType());
-        Type *elType = ptrType->getElementType();
+        PointerType *PtrType = cast<PointerType>(Ptr->getType());
+        Type *ElType = PtrType->getElementType();
         const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
-        if ((elType->isIntegerTy(64)) && !pSM->IsSM66Plus())
+        if ((ElType->isIntegerTy(64)) && !pSM->IsSM66Plus())
           ValCtx.EmitInstrFormatError(
               &I, ValidationRule::SmOpcodeInInvalidFunction,
               {"64-bit atomic operations", "Shader Model 6.6+"});
 
-        if (ptrType->getAddressSpace() != DXIL::kTGSMAddrSpace &&
-            ptrType->getAddressSpace() != DXIL::kNodeRecordAddrSpace)
+        if (PtrType->getAddressSpace() != DXIL::kTGSMAddrSpace &&
+            PtrType->getAddressSpace() != DXIL::kNodeRecordAddrSpace)
           ValCtx.EmitInstrError(
               &I, ValidationRule::InstrAtomicOpNonGroupsharedOrRecord);
 
@@ -3038,12 +3110,12 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
         }
       }
     }
-    ValidateControlFlowHint(*b, ValCtx);
+    ValidateControlFlowHint(*B, ValCtx);
   }
 
-  ValidateMsIntrinsics(F, ValCtx, setMeshOutputCounts, getMeshPayload);
+  ValidateMsIntrinsics(F, ValCtx, SetMeshOutputCounts, GetMeshPayload);
 
-  ValidateAsIntrinsics(F, ValCtx, dispatchMesh);
+  ValidateAsIntrinsics(F, ValCtx, DispatchMesh);
 }
 
 static void ValidateNodeInputRecord(Function *F, ValidationContext &ValCtx) {
@@ -3051,39 +3123,39 @@ static void ValidateNodeInputRecord(Function *F, ValidationContext &ValCtx) {
   // to do here
   if (!ValCtx.DxilMod.HasDxilFunctionProps(F))
     return;
-  auto &props = ValCtx.DxilMod.GetDxilFunctionProps(F);
-  if (!props.IsNode())
+  auto &Props = ValCtx.DxilMod.GetDxilFunctionProps(F);
+  if (!Props.IsNode())
     return;
-  if (props.InputNodes.size() > 1) {
+  if (Props.InputNodes.size() > 1) {
     ValCtx.EmitFnFormatError(
         F, ValidationRule::DeclMultipleNodeInputs,
-        {F->getName(), std::to_string(props.InputNodes.size())});
+        {F->getName(), std::to_string(Props.InputNodes.size())});
   }
-  for (auto &input : props.InputNodes) {
-    if (!input.Flags.RecordTypeMatchesLaunchType(props.Node.LaunchType)) {
+  for (auto &input : Props.InputNodes) {
+    if (!input.Flags.RecordTypeMatchesLaunchType(Props.Node.LaunchType)) {
       // We allow EmptyNodeInput here, as that may have been added implicitly
       // if there was no input specified
       if (input.Flags.IsEmptyInput())
         continue;
 
-      llvm::StringRef validInputs = "";
-      switch (props.Node.LaunchType) {
+      llvm::StringRef ValidInputs = "";
+      switch (Props.Node.LaunchType) {
       case DXIL::NodeLaunchType::Broadcasting:
-        validInputs = "{RW}DispatchNodeInputRecord";
+        ValidInputs = "{RW}DispatchNodeInputRecord";
         break;
       case DXIL::NodeLaunchType::Coalescing:
-        validInputs = "{RW}GroupNodeInputRecords or EmptyNodeInput";
+        ValidInputs = "{RW}GroupNodeInputRecords or EmptyNodeInput";
         break;
       case DXIL::NodeLaunchType::Thread:
-        validInputs = "{RW}ThreadNodeInputRecord";
+        ValidInputs = "{RW}ThreadNodeInputRecord";
         break;
       default:
         llvm_unreachable("invalid launch type");
       }
       ValCtx.EmitFnFormatError(
           F, ValidationRule::DeclNodeLaunchInputType,
-          {ShaderModel::GetNodeLaunchTypeName(props.Node.LaunchType),
-           F->getName(), validInputs});
+          {ShaderModel::GetNodeLaunchTypeName(Props.Node.LaunchType),
+           F->getName(), ValidInputs});
     }
   }
 }
@@ -3094,26 +3166,26 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) {
     if (F.isIntrinsic() || IsDxilFunction(&F))
       return;
   } else {
-    DXIL::ShaderKind shaderKind = DXIL::ShaderKind::Library;
-    bool isShader = ValCtx.DxilMod.HasDxilFunctionProps(&F);
-    unsigned numUDTShaderArgs = 0;
-    if (isShader) {
-      shaderKind = ValCtx.DxilMod.GetDxilFunctionProps(&F).shaderKind;
-      switch (shaderKind) {
+    DXIL::ShaderKind ShaderKind = DXIL::ShaderKind::Library;
+    bool IsShader = ValCtx.DxilMod.HasDxilFunctionProps(&F);
+    unsigned NumUDTShaderArgs = 0;
+    if (IsShader) {
+      ShaderKind = ValCtx.DxilMod.GetDxilFunctionProps(&F).shaderKind;
+      switch (ShaderKind) {
       case DXIL::ShaderKind::AnyHit:
       case DXIL::ShaderKind::ClosestHit:
-        numUDTShaderArgs = 2;
+        NumUDTShaderArgs = 2;
         break;
       case DXIL::ShaderKind::Miss:
       case DXIL::ShaderKind::Callable:
-        numUDTShaderArgs = 1;
+        NumUDTShaderArgs = 1;
         break;
       case DXIL::ShaderKind::Compute: {
         DxilModule &DM = ValCtx.DxilMod;
         if (DM.HasDxilEntryProps(&F)) {
-          DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F);
+          DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F);
           // Check that compute has no node metadata
-          if (entryProps.props.IsNode()) {
+          if (EntryProps.props.IsNode()) {
             ValCtx.EmitFnFormatError(&F, ValidationRule::MetaComputeWithNode,
                                      {F.getName()});
           }
@@ -3124,45 +3196,45 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) {
         break;
       }
     } else {
-      isShader = ValCtx.DxilMod.IsPatchConstantShader(&F);
+      IsShader = ValCtx.DxilMod.IsPatchConstantShader(&F);
     }
 
     // Entry function should not have parameter.
-    if (isShader && 0 == numUDTShaderArgs && !F.arg_empty())
+    if (IsShader && 0 == NumUDTShaderArgs && !F.arg_empty())
       ValCtx.EmitFnFormatError(&F, ValidationRule::FlowFunctionCall,
                                {F.getName()});
 
     // Shader functions should return void.
-    if (isShader && !F.getReturnType()->isVoidTy())
+    if (IsShader && !F.getReturnType()->isVoidTy())
       ValCtx.EmitFnFormatError(&F, ValidationRule::DeclShaderReturnVoid,
                                {F.getName()});
 
-    auto ArgFormatError = [&](Function &F, Argument &arg, ValidationRule rule) {
-      if (arg.hasName())
-        ValCtx.EmitFnFormatError(&F, rule, {arg.getName().str(), F.getName()});
+    auto ArgFormatError = [&](Function &F, Argument &Arg, ValidationRule Rule) {
+      if (Arg.hasName())
+        ValCtx.EmitFnFormatError(&F, Rule, {Arg.getName().str(), F.getName()});
       else
-        ValCtx.EmitFnFormatError(&F, rule,
-                                 {std::to_string(arg.getArgNo()), F.getName()});
+        ValCtx.EmitFnFormatError(&F, Rule,
+                                 {std::to_string(Arg.getArgNo()), F.getName()});
     };
 
-    unsigned numArgs = 0;
-    for (auto &arg : F.args()) {
-      Type *argTy = arg.getType();
-      if (argTy->isPointerTy())
-        argTy = argTy->getPointerElementType();
-
-      numArgs++;
-      if (numUDTShaderArgs) {
-        if (arg.getArgNo() >= numUDTShaderArgs) {
-          ArgFormatError(F, arg, ValidationRule::DeclExtraArgs);
-        } else if (!argTy->isStructTy()) {
-          switch (shaderKind) {
+    unsigned NumArgs = 0;
+    for (auto &Arg : F.args()) {
+      Type *ArgTy = Arg.getType();
+      if (ArgTy->isPointerTy())
+        ArgTy = ArgTy->getPointerElementType();
+
+      NumArgs++;
+      if (NumUDTShaderArgs) {
+        if (Arg.getArgNo() >= NumUDTShaderArgs) {
+          ArgFormatError(F, Arg, ValidationRule::DeclExtraArgs);
+        } else if (!ArgTy->isStructTy()) {
+          switch (ShaderKind) {
           case DXIL::ShaderKind::Callable:
-            ArgFormatError(F, arg, ValidationRule::DeclParamStruct);
+            ArgFormatError(F, Arg, ValidationRule::DeclParamStruct);
             break;
           default:
-            ArgFormatError(F, arg,
-                           arg.getArgNo() == 0
+            ArgFormatError(F, Arg,
+                           Arg.getArgNo() == 0
                                ? ValidationRule::DeclPayloadStruct
                                : ValidationRule::DeclAttrStruct);
           }
@@ -3170,24 +3242,24 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) {
         continue;
       }
 
-      while (argTy->isArrayTy()) {
-        argTy = argTy->getArrayElementType();
+      while (ArgTy->isArrayTy()) {
+        ArgTy = ArgTy->getArrayElementType();
       }
 
-      if (argTy->isStructTy() && !ValCtx.isLibProfile) {
-        ArgFormatError(F, arg, ValidationRule::DeclFnFlattenParam);
+      if (ArgTy->isStructTy() && !ValCtx.isLibProfile) {
+        ArgFormatError(F, Arg, ValidationRule::DeclFnFlattenParam);
         break;
       }
     }
 
-    if (numArgs < numUDTShaderArgs && shaderKind != DXIL::ShaderKind::Node) {
-      StringRef argType[2] = {
-          shaderKind == DXIL::ShaderKind::Callable ? "params" : "payload",
+    if (NumArgs < NumUDTShaderArgs && ShaderKind != DXIL::ShaderKind::Node) {
+      StringRef ArgType[2] = {
+          ShaderKind == DXIL::ShaderKind::Callable ? "params" : "payload",
           "attributes"};
-      for (unsigned i = numArgs; i < numUDTShaderArgs; i++) {
+      for (unsigned I = NumArgs; I < NumUDTShaderArgs; I++) {
         ValCtx.EmitFnFormatError(
             &F, ValidationRule::DeclShaderMissingArg,
-            {ShaderModel::GetKindName(shaderKind), F.getName(), argType[i]});
+            {ShaderModel::GetKindName(ShaderKind), F.getName(), ArgType[I]});
       }
     }
 
@@ -3224,25 +3296,25 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) {
 
 static void ValidateGlobalVariable(GlobalVariable &GV,
                                    ValidationContext &ValCtx) {
-  bool isInternalGV =
+  bool IsInternalGv =
       dxilutil::IsStaticGlobal(&GV) || dxilutil::IsSharedMemoryGlobal(&GV);
 
   if (ValCtx.isLibProfile) {
-    auto isCBufferGlobal =
+    auto IsCBufferGlobal =
         [&](const std::vector<std::unique_ptr<DxilCBuffer>> &ResTab) -> bool {
       for (auto &Res : ResTab)
         if (Res->GetGlobalSymbol() == &GV)
           return true;
       return false;
     };
-    auto isResourceGlobal =
+    auto IsResourceGlobal =
         [&](const std::vector<std::unique_ptr<DxilResource>> &ResTab) -> bool {
       for (auto &Res : ResTab)
         if (Res->GetGlobalSymbol() == &GV)
           return true;
       return false;
     };
-    auto isSamplerGlobal =
+    auto IsSamplerGlobal =
         [&](const std::vector<std::unique_ptr<DxilSampler>> &ResTab) -> bool {
       for (auto &Res : ResTab)
         if (Res->GetGlobalSymbol() == &GV)
@@ -3250,32 +3322,32 @@ static void ValidateGlobalVariable(GlobalVariable &GV,
       return false;
     };
 
-    bool isRes = isCBufferGlobal(ValCtx.DxilMod.GetCBuffers());
-    isRes |= isResourceGlobal(ValCtx.DxilMod.GetUAVs());
-    isRes |= isResourceGlobal(ValCtx.DxilMod.GetSRVs());
-    isRes |= isSamplerGlobal(ValCtx.DxilMod.GetSamplers());
-    isInternalGV |= isRes;
+    bool IsRes = IsCBufferGlobal(ValCtx.DxilMod.GetCBuffers());
+    IsRes |= IsResourceGlobal(ValCtx.DxilMod.GetUAVs());
+    IsRes |= IsResourceGlobal(ValCtx.DxilMod.GetSRVs());
+    IsRes |= IsSamplerGlobal(ValCtx.DxilMod.GetSamplers());
+    IsInternalGv |= IsRes;
 
     // Allow special dx.ishelper for library target
     if (GV.getName().compare(DXIL::kDxIsHelperGlobalName) == 0) {
       Type *Ty = GV.getType()->getPointerElementType();
       if (Ty->isIntegerTy() && Ty->getScalarSizeInBits() == 32) {
-        isInternalGV = true;
+        IsInternalGv = true;
       }
     }
   }
 
-  if (!isInternalGV) {
+  if (!IsInternalGv) {
     if (!GV.user_empty()) {
-      bool hasInstructionUser = false;
+      bool HasInstructionUser = false;
       for (User *U : GV.users()) {
         if (isa<Instruction>(U)) {
-          hasInstructionUser = true;
+          HasInstructionUser = true;
           break;
         }
       }
       // External GV should not have instruction user.
-      if (hasInstructionUser) {
+      if (HasInstructionUser) {
         ValCtx.EmitGlobalVariableFormatError(
             &GV, ValidationRule::DeclNotUsedExternal, {GV.getName()});
       }
@@ -3298,14 +3370,14 @@ static void ValidateGlobalVariable(GlobalVariable &GV,
 }
 
 static void CollectFixAddressAccess(Value *V,
-                                    std::vector<StoreInst *> &fixAddrTGSMList) {
+                                    std::vector<StoreInst *> &FixAddrTGSMList) {
   for (User *U : V->users()) {
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
       if (isa<ConstantExpr>(GEP) || GEP->hasAllConstantIndices()) {
-        CollectFixAddressAccess(GEP, fixAddrTGSMList);
+        CollectFixAddressAccess(GEP, FixAddrTGSMList);
       }
     } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
-      fixAddrTGSMList.emplace_back(SI);
+      FixAddrTGSMList.emplace_back(SI);
     }
   }
 }
@@ -3315,16 +3387,16 @@ static bool IsDivergent(Value *V) {
   return false;
 }
 
-static void ValidateTGSMRaceCondition(std::vector<StoreInst *> &fixAddrTGSMList,
+static void ValidateTGSMRaceCondition(std::vector<StoreInst *> &FixAddrTGSMList,
                                       ValidationContext &ValCtx) {
-  std::unordered_set<Function *> fixAddrTGSMFuncSet;
-  for (StoreInst *I : fixAddrTGSMList) {
+  std::unordered_set<Function *> FixAddrTGSMFuncSet;
+  for (StoreInst *I : FixAddrTGSMList) {
     BasicBlock *BB = I->getParent();
-    fixAddrTGSMFuncSet.insert(BB->getParent());
+    FixAddrTGSMFuncSet.insert(BB->getParent());
   }
 
   for (auto &F : ValCtx.DxilMod.GetModule()->functions()) {
-    if (F.isDeclaration() || !fixAddrTGSMFuncSet.count(&F))
+    if (F.isDeclaration() || !FixAddrTGSMFuncSet.count(&F))
       continue;
 
     PostDominatorTree PDT;
@@ -3332,7 +3404,7 @@ static void ValidateTGSMRaceCondition(std::vector<StoreInst *> &fixAddrTGSMList,
 
     BasicBlock *Entry = &F.getEntryBlock();
 
-    for (StoreInst *SI : fixAddrTGSMList) {
+    for (StoreInst *SI : FixAddrTGSMList) {
       BasicBlock *BB = SI->getParent();
       if (BB->getParent() == &F) {
         if (PDT.dominates(BB, Entry)) {
@@ -3351,7 +3423,7 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) {
   bool TGSMAllowed = pSM->IsCS() || pSM->IsAS() || pSM->IsMS() || pSM->IsLib();
 
   unsigned TGSMSize = 0;
-  std::vector<StoreInst *> fixAddrTGSMList;
+  std::vector<StoreInst *> FixAddrTGSMList;
   const DataLayout &DL = M.GetModule()->getDataLayout();
   for (GlobalVariable &GV : M.GetModule()->globals()) {
     ValidateGlobalVariable(GV, ValCtx);
@@ -3366,9 +3438,9 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) {
           if (Instruction *I = dyn_cast<Instruction>(U)) {
             llvm::Function *F = I->getParent()->getParent();
             if (M.HasDxilEntryProps(F)) {
-              DxilFunctionProps &props = M.GetDxilEntryProps(F).props;
-              if (!props.IsCS() && !props.IsAS() && !props.IsMS() &&
-                  !props.IsNode()) {
+              DxilFunctionProps &Props = M.GetDxilEntryProps(F).props;
+              if (!Props.IsCS() && !Props.IsAS() && !Props.IsMS() &&
+                  !Props.IsNode()) {
                 ValCtx.EmitInstrFormatError(I,
                                             ValidationRule::SmTGSMUnsupported,
                                             {"from non-compute entry points"});
@@ -3378,7 +3450,7 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) {
         }
       }
       TGSMSize += DL.getTypeAllocSize(GV.getType()->getElementType());
-      CollectFixAddressAccess(&GV, fixAddrTGSMList);
+      CollectFixAddressAccess(&GV, FixAddrTGSMList);
     }
   }
 
@@ -3402,8 +3474,8 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) {
         GV, Rule, {std::to_string(TGSMSize), std::to_string(MaxSize)});
   }
 
-  if (!fixAddrTGSMList.empty()) {
-    ValidateTGSMRaceCondition(fixAddrTGSMList, ValCtx);
+  if (!FixAddrTGSMList.empty()) {
+    ValidateTGSMRaceCondition(FixAddrTGSMList, ValCtx);
   }
 }
 
@@ -3416,20 +3488,20 @@ static void ValidateValidatorVersion(ValidationContext &ValCtx) {
   if (pNode->getNumOperands() == 1) {
     MDTuple *pVerValues = dyn_cast<MDTuple>(pNode->getOperand(0));
     if (pVerValues != nullptr && pVerValues->getNumOperands() == 2) {
-      uint64_t majorVer, minorVer;
-      if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &majorVer) &&
-          GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) {
-        unsigned curMajor, curMinor;
-        GetValidationVersion(&curMajor, &curMinor);
+      uint64_t MajorVer, MinorVer;
+      if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &MajorVer) &&
+          GetNodeOperandAsInt(ValCtx, pVerValues, 1, &MinorVer)) {
+        unsigned CurMajor, CurMinor;
+        GetValidationVersion(&CurMajor, &CurMinor);
         // This will need to be updated as major/minor versions evolve,
         // depending on the degree of compat across versions.
-        if (majorVer == curMajor && minorVer <= curMinor) {
+        if (MajorVer == CurMajor && MinorVer <= CurMinor) {
           return;
         } else {
           ValCtx.EmitFormatError(
               ValidationRule::MetaVersionSupported,
-              {"Validator", std::to_string(majorVer), std::to_string(minorVer),
-               std::to_string(curMajor), std::to_string(curMinor)});
+              {"Validator", std::to_string(MajorVer), std::to_string(MinorVer),
+               std::to_string(CurMajor), std::to_string(CurMinor)});
           return;
         }
       }
@@ -3447,19 +3519,19 @@ static void ValidateDxilVersion(ValidationContext &ValCtx) {
   if (pNode->getNumOperands() == 1) {
     MDTuple *pVerValues = dyn_cast<MDTuple>(pNode->getOperand(0));
     if (pVerValues != nullptr && pVerValues->getNumOperands() == 2) {
-      uint64_t majorVer, minorVer;
-      if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &majorVer) &&
-          GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) {
+      uint64_t MajorVer, MinorVer;
+      if (GetNodeOperandAsInt(ValCtx, pVerValues, 0, &MajorVer) &&
+          GetNodeOperandAsInt(ValCtx, pVerValues, 1, &MinorVer)) {
         // This will need to be updated as dxil major/minor versions evolve,
         // depending on the degree of compat across versions.
-        if ((majorVer == DXIL::kDxilMajor && minorVer <= DXIL::kDxilMinor) &&
-            (majorVer == ValCtx.m_DxilMajor &&
-             minorVer == ValCtx.m_DxilMinor)) {
+        if ((MajorVer == DXIL::kDxilMajor && MinorVer <= DXIL::kDxilMinor) &&
+            (MajorVer == ValCtx.m_DxilMajor &&
+             MinorVer == ValCtx.m_DxilMinor)) {
           return;
         } else {
           ValCtx.EmitFormatError(ValidationRule::MetaVersionSupported,
-                                 {"Dxil", std::to_string(majorVer),
-                                  std::to_string(minorVer),
+                                 {"Dxil", std::to_string(MajorVer),
+                                  std::to_string(MinorVer),
                                   std::to_string(DXIL::kDxilMajor),
                                   std::to_string(DXIL::kDxilMinor)});
           return;
@@ -3477,16 +3549,16 @@ static void ValidateTypeAnnotation(ValidationContext &ValCtx) {
     NamedMDNode *TA = pModule->getNamedMetadata("dx.typeAnnotations");
     if (TA == nullptr)
       return;
-    for (unsigned i = 0, end = TA->getNumOperands(); i < end; ++i) {
-      MDTuple *TANode = dyn_cast<MDTuple>(TA->getOperand(i));
+    for (unsigned I = 0, End = TA->getNumOperands(); I < End; ++I) {
+      MDTuple *TANode = dyn_cast<MDTuple>(TA->getOperand(I));
       if (TANode->getNumOperands() < 3) {
         ValCtx.EmitMetaError(TANode, ValidationRule::MetaWellFormed);
         return;
       }
-      ConstantInt *tag = mdconst::extract<ConstantInt>(TANode->getOperand(0));
-      uint64_t tagValue = tag->getZExtValue();
-      if (tagValue != DxilMDHelper::kDxilTypeSystemStructTag &&
-          tagValue != DxilMDHelper::kDxilTypeSystemFunctionTag) {
+      ConstantInt *Tag = mdconst::extract<ConstantInt>(TANode->getOperand(0));
+      uint64_t TagValue = Tag->getZExtValue();
+      if (TagValue != DxilMDHelper::kDxilTypeSystemStructTag &&
+          TagValue != DxilMDHelper::kDxilTypeSystemFunctionTag) {
         ValCtx.EmitMetaError(TANode, ValidationRule::MetaWellFormed);
         return;
       }
@@ -3495,11 +3567,11 @@ static void ValidateTypeAnnotation(ValidationContext &ValCtx) {
 }
 
 static void ValidateBitcode(ValidationContext &ValCtx) {
-  std::string diagStr;
-  raw_string_ostream diagStream(diagStr);
-  if (llvm::verifyModule(ValCtx.M, &diagStream)) {
+  std::string DiagStr;
+  raw_string_ostream DiagStream(DiagStr);
+  if (llvm::verifyModule(ValCtx.M, &DiagStream)) {
     ValCtx.EmitError(ValidationRule::BitcodeValid);
-    dxilutil::EmitErrorOnContext(ValCtx.M.getContext(), diagStream.str());
+    dxilutil::EmitErrorOnContext(ValCtx.M.getContext(), DiagStream.str());
   }
 }
 
@@ -3513,18 +3585,18 @@ static void ValidateWaveSize(ValidationContext &ValCtx,
   if (!EPs)
     return;
 
-  for (unsigned i = 0, end = EPs->getNumOperands(); i < end; ++i) {
-    MDTuple *EPNodeRef = dyn_cast<MDTuple>(EPs->getOperand(i));
+  for (unsigned I = 0, End = EPs->getNumOperands(); I < End; ++I) {
+    MDTuple *EPNodeRef = dyn_cast<MDTuple>(EPs->getOperand(I));
     if (EPNodeRef->getNumOperands() < 5) {
       ValCtx.EmitMetaError(EPNodeRef, ValidationRule::MetaWellFormed);
       return;
     }
     // get access to the digit that represents the metadata number that
     // would store entry properties
-    const llvm::MDOperand &mOp =
+    const llvm::MDOperand &MOp =
         EPNodeRef->getOperand(EPNodeRef->getNumOperands() - 1);
     // the final operand to the entry points tuple should be a tuple.
-    if (mOp == nullptr || (mOp.get())->getMetadataID() != Metadata::MDTupleKind)
+    if (MOp == nullptr || (MOp.get())->getMetadataID() != Metadata::MDTupleKind)
       continue;
 
     // get access to the node that stores entry properties
@@ -3532,29 +3604,29 @@ static void ValidateWaveSize(ValidationContext &ValCtx,
         EPNodeRef->getOperand(EPNodeRef->getNumOperands() - 1));
     // find any incompatible tags inside the entry properties
     // increment j by 2 to only analyze tags, not values
-    bool foundTag = false;
-    for (unsigned j = 0, end2 = EPropNode->getNumOperands(); j < end2; j += 2) {
-      const MDOperand &propertyTagOp = EPropNode->getOperand(j);
+    bool FoundTag = false;
+    for (unsigned J = 0, End2 = EPropNode->getNumOperands(); J < End2; J += 2) {
+      const MDOperand &PropertyTagOp = EPropNode->getOperand(J);
       // note, we are only looking for tags, which will be a constant
       // integer
-      DXASSERT(!(propertyTagOp == nullptr ||
-                 (propertyTagOp.get())->getMetadataID() !=
+      DXASSERT(!(PropertyTagOp == nullptr ||
+                 (PropertyTagOp.get())->getMetadataID() !=
                      Metadata::ConstantAsMetadataKind),
                "tag operand should be a constant integer.");
 
-      ConstantInt *tag = mdconst::extract<ConstantInt>(propertyTagOp);
-      uint64_t tagValue = tag->getZExtValue();
+      ConstantInt *Tag = mdconst::extract<ConstantInt>(PropertyTagOp);
+      uint64_t TagValue = Tag->getZExtValue();
 
       // legacy wavesize is only supported between 6.6 and 6.7, so we
       // should fail if we find the ranged wave size metadata tag
-      if (tagValue == DxilMDHelper::kDxilRangedWaveSizeTag) {
+      if (TagValue == DxilMDHelper::kDxilRangedWaveSizeTag) {
         // if this tag is already present in the
         // current entry point, emit an error
-        if (foundTag) {
+        if (FoundTag) {
           ValCtx.EmitFormatError(ValidationRule::SmWaveSizeTagDuplicate, {});
           return;
         }
-        foundTag = true;
+        FoundTag = true;
         if (SM->IsSM66Plus() && !SM->IsSM68Plus()) {
 
           ValCtx.EmitFormatError(ValidationRule::SmWaveSizeRangeNeedsSM68Plus,
@@ -3563,36 +3635,36 @@ static void ValidateWaveSize(ValidationContext &ValCtx,
         }
         // get the metadata that contains the
         // parameters to the wavesize attribute
-        MDTuple *WaveTuple = dyn_cast<MDTuple>(EPropNode->getOperand(j + 1));
+        MDTuple *WaveTuple = dyn_cast<MDTuple>(EPropNode->getOperand(J + 1));
         if (WaveTuple->getNumOperands() != 3) {
           ValCtx.EmitFormatError(
               ValidationRule::SmWaveSizeRangeExpectsThreeParams, {});
           return;
         }
-        for (int k = 0; k < 3; k++) {
-          const MDOperand &param = WaveTuple->getOperand(k);
-          if (param->getMetadataID() != Metadata::ConstantAsMetadataKind) {
+        for (int K = 0; K < 3; K++) {
+          const MDOperand &Param = WaveTuple->getOperand(K);
+          if (Param->getMetadataID() != Metadata::ConstantAsMetadataKind) {
             ValCtx.EmitFormatError(
                 ValidationRule::SmWaveSizeNeedsConstantOperands, {});
             return;
           }
         }
 
-      } else if (tagValue == DxilMDHelper::kDxilWaveSizeTag) {
+      } else if (TagValue == DxilMDHelper::kDxilWaveSizeTag) {
         // if this tag is already present in the
         // current entry point, emit an error
-        if (foundTag) {
+        if (FoundTag) {
           ValCtx.EmitFormatError(ValidationRule::SmWaveSizeTagDuplicate, {});
           return;
         }
-        foundTag = true;
-        MDTuple *WaveTuple = dyn_cast<MDTuple>(EPropNode->getOperand(j + 1));
+        FoundTag = true;
+        MDTuple *WaveTuple = dyn_cast<MDTuple>(EPropNode->getOperand(J + 1));
         if (WaveTuple->getNumOperands() != 1) {
           ValCtx.EmitFormatError(ValidationRule::SmWaveSizeExpectsOneParam, {});
           return;
         }
-        const MDOperand &param = WaveTuple->getOperand(0);
-        if (param->getMetadataID() != Metadata::ConstantAsMetadataKind) {
+        const MDOperand &Param = WaveTuple->getOperand(0);
+        if (Param->getMetadataID() != Metadata::ConstantAsMetadataKind) {
           ValCtx.EmitFormatError(
               ValidationRule::SmWaveSizeNeedsConstantOperands, {});
           return;
@@ -3613,9 +3685,9 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
   ValidateDxilVersion(ValCtx);
 
   Module *pModule = &ValCtx.M;
-  const std::string &target = pModule->getTargetTriple();
-  if (target != "dxil-ms-dx") {
-    ValCtx.EmitFormatError(ValidationRule::MetaTarget, {target});
+  const std::string &Target = pModule->getTargetTriple();
+  if (Target != "dxil-ms-dx") {
+    ValCtx.EmitFormatError(ValidationRule::MetaTarget, {Target});
   }
 
   // The llvm.dbg.(cu/contents/defines/mainFileName/arg) named metadata nodes
@@ -3623,9 +3695,9 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
   // llvm.bitsets is also disallowed.
   //
   // These are verified in lib/IR/Verifier.cpp.
-  StringMap<bool> llvmNamedMeta;
-  llvmNamedMeta["llvm.ident"];
-  llvmNamedMeta["llvm.module.flags"];
+  StringMap<bool> LlvmNamedMeta;
+  LlvmNamedMeta["llvm.ident"];
+  LlvmNamedMeta["llvm.module.flags"];
 
   for (auto &NamedMetaNode : pModule->named_metadata()) {
     if (!DxilModule::IsKnownNamedMetaData(NamedMetaNode)) {
@@ -3633,7 +3705,7 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
       if (!name.startswith_lower("llvm.")) {
         ValCtx.EmitFormatError(ValidationRule::MetaKnown, {name.str()});
       } else {
-        if (llvmNamedMeta.count(name) == 0) {
+        if (LlvmNamedMeta.count(name) == 0) {
           ValCtx.EmitFormatError(ValidationRule::MetaKnown, {name.str()});
         }
       }
@@ -3666,35 +3738,35 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
 }
 
 static void ValidateResourceOverlap(
-    hlsl::DxilResourceBase &res,
-    SpacesAllocator<unsigned, DxilResourceBase> &spaceAllocator,
+    hlsl::DxilResourceBase &Res,
+    SpacesAllocator<unsigned, DxilResourceBase> &SpaceAllocator,
     ValidationContext &ValCtx) {
-  unsigned base = res.GetLowerBound();
-  if (ValCtx.isLibProfile && !res.IsAllocated()) {
+  unsigned Base = Res.GetLowerBound();
+  if (ValCtx.isLibProfile && !Res.IsAllocated()) {
     // Skip unallocated resource for library.
     return;
   }
-  unsigned size = res.GetRangeSize();
-  unsigned space = res.GetSpaceID();
+  unsigned Size = Res.GetRangeSize();
+  unsigned Space = Res.GetSpaceID();
 
-  auto &allocator = spaceAllocator.Get(space);
-  unsigned end = base + size - 1;
+  auto &Allocator = SpaceAllocator.Get(Space);
+  unsigned End = Base + Size - 1;
   // unbounded
-  if (end < base)
-    end = size;
-  const DxilResourceBase *conflictRes = allocator.Insert(&res, base, end);
-  if (conflictRes) {
+  if (End < Base)
+    End = Size;
+  const DxilResourceBase *ConflictRes = Allocator.Insert(&Res, Base, End);
+  if (ConflictRes) {
     ValCtx.EmitFormatError(
         ValidationRule::SmResourceRangeOverlap,
-        {ValCtx.GetResourceName(&res), std::to_string(base),
-         std::to_string(size), std::to_string(conflictRes->GetLowerBound()),
-         std::to_string(conflictRes->GetRangeSize()), std::to_string(space)});
+        {ValCtx.GetResourceName(&Res), std::to_string(Base),
+         std::to_string(Size), std::to_string(ConflictRes->GetLowerBound()),
+         std::to_string(ConflictRes->GetRangeSize()), std::to_string(Space)});
   }
 }
 
-static void ValidateResource(hlsl::DxilResource &res,
+static void ValidateResource(hlsl::DxilResource &Res,
                              ValidationContext &ValCtx) {
-  switch (res.GetKind()) {
+  switch (Res.GetKind()) {
   case DXIL::ResourceKind::RawBuffer:
   case DXIL::ResourceKind::TypedBuffer:
   case DXIL::ResourceKind::TBuffer:
@@ -3706,8 +3778,8 @@ static void ValidateResource(hlsl::DxilResource &res,
   case DXIL::ResourceKind::Texture3D:
   case DXIL::ResourceKind::TextureCube:
   case DXIL::ResourceKind::TextureCubeArray:
-    if (res.GetSampleCount() > 0) {
-      ValCtx.EmitResourceError(&res, ValidationRule::SmSampleCountOnlyOn2DMS);
+    if (Res.GetSampleCount() > 0) {
+      ValCtx.EmitResourceError(&Res, ValidationRule::SmSampleCountOnlyOn2DMS);
     }
     break;
   case DXIL::ResourceKind::Texture2DMS:
@@ -3718,16 +3790,16 @@ static void ValidateResource(hlsl::DxilResource &res,
     break;
   case DXIL::ResourceKind::FeedbackTexture2D:
   case DXIL::ResourceKind::FeedbackTexture2DArray:
-    if (res.GetSamplerFeedbackType() >= DXIL::SamplerFeedbackType::LastEntry)
-      ValCtx.EmitResourceError(&res,
+    if (Res.GetSamplerFeedbackType() >= DXIL::SamplerFeedbackType::LastEntry)
+      ValCtx.EmitResourceError(&Res,
                                ValidationRule::SmInvalidSamplerFeedbackType);
     break;
   default:
-    ValCtx.EmitResourceError(&res, ValidationRule::SmInvalidResourceKind);
+    ValCtx.EmitResourceError(&Res, ValidationRule::SmInvalidResourceKind);
     break;
   }
 
-  switch (res.GetCompType().GetKind()) {
+  switch (Res.GetCompType().GetKind()) {
   case DXIL::ComponentType::F32:
   case DXIL::ComponentType::SNormF32:
   case DXIL::ComponentType::UNormF32:
@@ -3741,266 +3813,266 @@ static void ValidateResource(hlsl::DxilResource &res,
   case DXIL::ComponentType::U16:
     break;
   default:
-    if (!res.IsStructuredBuffer() && !res.IsRawBuffer() &&
-        !res.IsFeedbackTexture())
-      ValCtx.EmitResourceError(&res, ValidationRule::SmInvalidResourceCompType);
+    if (!Res.IsStructuredBuffer() && !Res.IsRawBuffer() &&
+        !Res.IsFeedbackTexture())
+      ValCtx.EmitResourceError(&Res, ValidationRule::SmInvalidResourceCompType);
     break;
   }
 
-  if (res.IsStructuredBuffer()) {
-    unsigned stride = res.GetElementStride();
-    bool alignedTo4Bytes = (stride & 3) == 0;
-    if (!alignedTo4Bytes && ValCtx.M.GetDxilModule().GetUseMinPrecision()) {
+  if (Res.IsStructuredBuffer()) {
+    unsigned Stride = Res.GetElementStride();
+    bool AlignedTo4Bytes = (Stride & 3) == 0;
+    if (!AlignedTo4Bytes && ValCtx.M.GetDxilModule().GetUseMinPrecision()) {
       ValCtx.EmitResourceFormatError(
-          &res, ValidationRule::MetaStructBufAlignment,
-          {std::to_string(4), std::to_string(stride)});
+          &Res, ValidationRule::MetaStructBufAlignment,
+          {std::to_string(4), std::to_string(Stride)});
     }
-    if (stride > DXIL::kMaxStructBufferStride) {
+    if (Stride > DXIL::kMaxStructBufferStride) {
       ValCtx.EmitResourceFormatError(
-          &res, ValidationRule::MetaStructBufAlignmentOutOfBound,
+          &Res, ValidationRule::MetaStructBufAlignmentOutOfBound,
           {std::to_string(DXIL::kMaxStructBufferStride),
-           std::to_string(stride)});
+           std::to_string(Stride)});
     }
   }
 
-  if (res.IsAnyTexture() || res.IsTypedBuffer()) {
-    Type *RetTy = res.GetRetType();
-    unsigned size =
+  if (Res.IsAnyTexture() || Res.IsTypedBuffer()) {
+    Type *RetTy = Res.GetRetType();
+    unsigned Size =
         ValCtx.DxilMod.GetModule()->getDataLayout().getTypeAllocSize(RetTy);
-    if (size > 4 * 4) {
-      ValCtx.EmitResourceError(&res, ValidationRule::MetaTextureType);
+    if (Size > 4 * 4) {
+      ValCtx.EmitResourceError(&Res, ValidationRule::MetaTextureType);
     }
   }
 }
 
 static void CollectCBufferRanges(
-    DxilStructAnnotation *annotation,
-    SpanAllocator<unsigned, DxilFieldAnnotation> &constAllocator, unsigned base,
-    DxilTypeSystem &typeSys, StringRef cbName, ValidationContext &ValCtx) {
-  DXASSERT(((base + 15) & ~(0xf)) == base,
+    DxilStructAnnotation *Annotation,
+    SpanAllocator<unsigned, DxilFieldAnnotation> &ConstAllocator, unsigned Base,
+    DxilTypeSystem &TypeSys, StringRef CbName, ValidationContext &ValCtx) {
+  DXASSERT(((Base + 15) & ~(0xf)) == Base,
            "otherwise, base for struct is not aligned");
-  unsigned cbSize = annotation->GetCBufferSize();
+  unsigned CbSize = Annotation->GetCBufferSize();
 
-  const StructType *ST = annotation->GetStructType();
+  const StructType *ST = Annotation->GetStructType();
 
-  for (int i = annotation->GetNumFields() - 1; i >= 0; i--) {
-    DxilFieldAnnotation &fieldAnnotation = annotation->GetFieldAnnotation(i);
-    Type *EltTy = ST->getElementType(i);
+  for (int I = Annotation->GetNumFields() - 1; I >= 0; I--) {
+    DxilFieldAnnotation &FieldAnnotation = Annotation->GetFieldAnnotation(I);
+    Type *EltTy = ST->getElementType(I);
 
-    unsigned offset = fieldAnnotation.GetCBufferOffset();
+    unsigned Offset = FieldAnnotation.GetCBufferOffset();
 
     unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
-        fieldAnnotation, EltTy, typeSys);
+        FieldAnnotation, EltTy, TypeSys);
 
-    bool bOutOfBound = false;
+    bool IsOutOfBound = false;
     if (!EltTy->isAggregateType()) {
-      bOutOfBound = (offset + EltSize) > cbSize;
-      if (!bOutOfBound) {
-        if (constAllocator.Insert(&fieldAnnotation, base + offset,
-                                  base + offset + EltSize - 1)) {
+      IsOutOfBound = (Offset + EltSize) > CbSize;
+      if (!IsOutOfBound) {
+        if (ConstAllocator.Insert(&FieldAnnotation, Base + Offset,
+                                  Base + Offset + EltSize - 1)) {
           ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap,
-                                 {cbName, std::to_string(base + offset)});
+                                 {CbName, std::to_string(Base + Offset)});
         }
       }
     } else if (isa<ArrayType>(EltTy)) {
-      if (((offset + 15) & ~(0xf)) != offset) {
+      if (((Offset + 15) & ~(0xf)) != Offset) {
         ValCtx.EmitFormatError(ValidationRule::SmCBufferArrayOffsetAlignment,
-                               {cbName, std::to_string(offset)});
+                               {CbName, std::to_string(Offset)});
         continue;
       }
-      unsigned arrayCount = 1;
+      unsigned ArrayCount = 1;
       while (isa<ArrayType>(EltTy)) {
-        arrayCount *= EltTy->getArrayNumElements();
+        ArrayCount *= EltTy->getArrayNumElements();
         EltTy = EltTy->getArrayElementType();
       }
 
       DxilStructAnnotation *EltAnnotation = nullptr;
       if (StructType *EltST = dyn_cast<StructType>(EltTy))
-        EltAnnotation = typeSys.GetStructAnnotation(EltST);
+        EltAnnotation = TypeSys.GetStructAnnotation(EltST);
 
-      unsigned alignedEltSize = ((EltSize + 15) & ~(0xf));
-      unsigned arraySize = ((arrayCount - 1) * alignedEltSize) + EltSize;
-      bOutOfBound = (offset + arraySize) > cbSize;
+      unsigned AlignedEltSize = ((EltSize + 15) & ~(0xf));
+      unsigned ArraySize = ((ArrayCount - 1) * AlignedEltSize) + EltSize;
+      IsOutOfBound = (Offset + ArraySize) > CbSize;
 
-      if (!bOutOfBound) {
+      if (!IsOutOfBound) {
         // If we didn't care about gaps where elements could be placed with user
         // offsets, we could: recurse once if EltAnnotation, then allocate the
-        // rest if arrayCount > 1
+        // rest if ArrayCount > 1
 
-        unsigned arrayBase = base + offset;
+        unsigned ArrayBase = Base + Offset;
         if (!EltAnnotation) {
           if (EltSize > 0 &&
-              nullptr != constAllocator.Insert(&fieldAnnotation, arrayBase,
-                                               arrayBase + arraySize - 1)) {
+              nullptr != ConstAllocator.Insert(&FieldAnnotation, ArrayBase,
+                                               ArrayBase + ArraySize - 1)) {
             ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap,
-                                   {cbName, std::to_string(arrayBase)});
+                                   {CbName, std::to_string(ArrayBase)});
           }
         } else {
-          for (unsigned idx = 0; idx < arrayCount; idx++) {
-            CollectCBufferRanges(EltAnnotation, constAllocator, arrayBase,
-                                 typeSys, cbName, ValCtx);
-            arrayBase += alignedEltSize;
+          for (unsigned Idx = 0; Idx < ArrayCount; Idx++) {
+            CollectCBufferRanges(EltAnnotation, ConstAllocator, ArrayBase,
+                                 TypeSys, CbName, ValCtx);
+            ArrayBase += AlignedEltSize;
           }
         }
       }
     } else {
       StructType *EltST = cast<StructType>(EltTy);
-      unsigned structBase = base + offset;
-      bOutOfBound = (offset + EltSize) > cbSize;
-      if (!bOutOfBound) {
+      unsigned StructBase = Base + Offset;
+      IsOutOfBound = (Offset + EltSize) > CbSize;
+      if (!IsOutOfBound) {
         if (DxilStructAnnotation *EltAnnotation =
-                typeSys.GetStructAnnotation(EltST)) {
-          CollectCBufferRanges(EltAnnotation, constAllocator, structBase,
-                               typeSys, cbName, ValCtx);
+                TypeSys.GetStructAnnotation(EltST)) {
+          CollectCBufferRanges(EltAnnotation, ConstAllocator, StructBase,
+                               TypeSys, CbName, ValCtx);
         } else {
           if (EltSize > 0 &&
-              nullptr != constAllocator.Insert(&fieldAnnotation, structBase,
-                                               structBase + EltSize - 1)) {
+              nullptr != ConstAllocator.Insert(&FieldAnnotation, StructBase,
+                                               StructBase + EltSize - 1)) {
             ValCtx.EmitFormatError(ValidationRule::SmCBufferOffsetOverlap,
-                                   {cbName, std::to_string(structBase)});
+                                   {CbName, std::to_string(StructBase)});
           }
         }
       }
     }
 
-    if (bOutOfBound) {
+    if (IsOutOfBound) {
       ValCtx.EmitFormatError(ValidationRule::SmCBufferElementOverflow,
-                             {cbName, std::to_string(base + offset)});
+                             {CbName, std::to_string(Base + Offset)});
     }
   }
 }
 
-static void ValidateCBuffer(DxilCBuffer &cb, ValidationContext &ValCtx) {
-  Type *Ty = cb.GetHLSLType()->getPointerElementType();
-  if (cb.GetRangeSize() != 1 || Ty->isArrayTy()) {
+static void ValidateCBuffer(DxilCBuffer &Cb, ValidationContext &ValCtx) {
+  Type *Ty = Cb.GetHLSLType()->getPointerElementType();
+  if (Cb.GetRangeSize() != 1 || Ty->isArrayTy()) {
     Ty = Ty->getArrayElementType();
   }
   if (!isa<StructType>(Ty)) {
-    ValCtx.EmitResourceError(&cb,
+    ValCtx.EmitResourceError(&Cb,
                              ValidationRule::SmCBufferTemplateTypeMustBeStruct);
     return;
   }
-  if (cb.GetSize() > (DXIL::kMaxCBufferSize << 4)) {
-    ValCtx.EmitResourceFormatError(&cb, ValidationRule::SmCBufferSize,
-                                   {std::to_string(cb.GetSize())});
+  if (Cb.GetSize() > (DXIL::kMaxCBufferSize << 4)) {
+    ValCtx.EmitResourceFormatError(&Cb, ValidationRule::SmCBufferSize,
+                                   {std::to_string(Cb.GetSize())});
     return;
   }
   StructType *ST = cast<StructType>(Ty);
-  DxilTypeSystem &typeSys = ValCtx.DxilMod.GetTypeSystem();
-  DxilStructAnnotation *annotation = typeSys.GetStructAnnotation(ST);
-  if (!annotation)
+  DxilTypeSystem &TypeSys = ValCtx.DxilMod.GetTypeSystem();
+  DxilStructAnnotation *Annotation = TypeSys.GetStructAnnotation(ST);
+  if (!Annotation)
     return;
 
   // Collect constant ranges.
-  std::vector<std::pair<unsigned, unsigned>> constRanges;
-  SpanAllocator<unsigned, DxilFieldAnnotation> constAllocator(
+  std::vector<std::pair<unsigned, unsigned>> ConstRanges;
+  SpanAllocator<unsigned, DxilFieldAnnotation> ConstAllocator(
       0,
       // 4096 * 16 bytes.
       DXIL::kMaxCBufferSize << 4);
-  CollectCBufferRanges(annotation, constAllocator, 0, typeSys,
-                       ValCtx.GetResourceName(&cb), ValCtx);
+  CollectCBufferRanges(Annotation, ConstAllocator, 0, TypeSys,
+                       ValCtx.GetResourceName(&Cb), ValCtx);
 }
 
 static void ValidateResources(ValidationContext &ValCtx) {
-  const vector<unique_ptr<DxilResource>> &uavs = ValCtx.DxilMod.GetUAVs();
-  SpacesAllocator<unsigned, DxilResourceBase> uavAllocator;
+  const vector<unique_ptr<DxilResource>> &Uavs = ValCtx.DxilMod.GetUAVs();
+  SpacesAllocator<unsigned, DxilResourceBase> UavAllocator;
 
-  for (auto &uav : uavs) {
-    if (uav->IsROV()) {
+  for (auto &Uav : Uavs) {
+    if (Uav->IsROV()) {
       if (!ValCtx.DxilMod.GetShaderModel()->IsPS() && !ValCtx.isLibProfile) {
-        ValCtx.EmitResourceError(uav.get(), ValidationRule::SmROVOnlyInPS);
+        ValCtx.EmitResourceError(Uav.get(), ValidationRule::SmROVOnlyInPS);
       }
     }
-    switch (uav->GetKind()) {
+    switch (Uav->GetKind()) {
     case DXIL::ResourceKind::TextureCube:
     case DXIL::ResourceKind::TextureCubeArray:
-      ValCtx.EmitResourceError(uav.get(),
+      ValCtx.EmitResourceError(Uav.get(),
                                ValidationRule::SmInvalidTextureKindOnUAV);
       break;
     default:
       break;
     }
 
-    if (uav->HasCounter() && !uav->IsStructuredBuffer()) {
-      ValCtx.EmitResourceError(uav.get(),
+    if (Uav->HasCounter() && !Uav->IsStructuredBuffer()) {
+      ValCtx.EmitResourceError(Uav.get(),
                                ValidationRule::SmCounterOnlyOnStructBuf);
     }
-    if (uav->HasCounter() && uav->IsGloballyCoherent())
-      ValCtx.EmitResourceFormatError(uav.get(),
+    if (Uav->HasCounter() && Uav->IsGloballyCoherent())
+      ValCtx.EmitResourceFormatError(Uav.get(),
                                      ValidationRule::MetaGlcNotOnAppendConsume,
-                                     {ValCtx.GetResourceName(uav.get())});
+                                     {ValCtx.GetResourceName(Uav.get())});
 
-    ValidateResource(*uav, ValCtx);
-    ValidateResourceOverlap(*uav, uavAllocator, ValCtx);
+    ValidateResource(*Uav, ValCtx);
+    ValidateResourceOverlap(*Uav, UavAllocator, ValCtx);
   }
 
-  SpacesAllocator<unsigned, DxilResourceBase> srvAllocator;
-  const vector<unique_ptr<DxilResource>> &srvs = ValCtx.DxilMod.GetSRVs();
-  for (auto &srv : srvs) {
+  SpacesAllocator<unsigned, DxilResourceBase> SrvAllocator;
+  const vector<unique_ptr<DxilResource>> &Srvs = ValCtx.DxilMod.GetSRVs();
+  for (auto &srv : Srvs) {
     ValidateResource(*srv, ValCtx);
-    ValidateResourceOverlap(*srv, srvAllocator, ValCtx);
+    ValidateResourceOverlap(*srv, SrvAllocator, ValCtx);
   }
 
-  hlsl::DxilResourceBase *pNonDense;
-  if (!AreDxilResourcesDense(&ValCtx.M, &pNonDense)) {
-    ValCtx.EmitResourceError(pNonDense, ValidationRule::MetaDenseResIDs);
+  hlsl::DxilResourceBase *NonDenseRes;
+  if (!AreDxilResourcesDense(&ValCtx.M, &NonDenseRes)) {
+    ValCtx.EmitResourceError(NonDenseRes, ValidationRule::MetaDenseResIDs);
   }
 
-  SpacesAllocator<unsigned, DxilResourceBase> samplerAllocator;
+  SpacesAllocator<unsigned, DxilResourceBase> SamplerAllocator;
   for (auto &sampler : ValCtx.DxilMod.GetSamplers()) {
     if (sampler->GetSamplerKind() == DXIL::SamplerKind::Invalid) {
       ValCtx.EmitResourceError(sampler.get(),
                                ValidationRule::MetaValidSamplerMode);
     }
-    ValidateResourceOverlap(*sampler, samplerAllocator, ValCtx);
+    ValidateResourceOverlap(*sampler, SamplerAllocator, ValCtx);
   }
 
-  SpacesAllocator<unsigned, DxilResourceBase> cbufferAllocator;
+  SpacesAllocator<unsigned, DxilResourceBase> CbufferAllocator;
   for (auto &cbuffer : ValCtx.DxilMod.GetCBuffers()) {
     ValidateCBuffer(*cbuffer, ValCtx);
-    ValidateResourceOverlap(*cbuffer, cbufferAllocator, ValCtx);
+    ValidateResourceOverlap(*cbuffer, CbufferAllocator, ValCtx);
   }
 }
 
 static void ValidateShaderFlags(ValidationContext &ValCtx) {
-  ShaderFlags calcFlags;
-  ValCtx.DxilMod.CollectShaderFlagsForModule(calcFlags);
+  ShaderFlags CalcFlags;
+  ValCtx.DxilMod.CollectShaderFlagsForModule(CalcFlags);
 
   // Special case for validator version prior to 1.8.
   // If DXR 1.1 flag is set, but our computed flags do not have this set, then
   // this is due to prior versions setting the flag based on DXR 1.1 subobjects,
   // which are gone by this point.  Set the flag and the rest should match.
-  unsigned valMajor, valMinor;
-  ValCtx.DxilMod.GetValidatorVersion(valMajor, valMinor);
-  if (DXIL::CompareVersions(valMajor, valMinor, 1, 5) >= 0 &&
-      DXIL::CompareVersions(valMajor, valMinor, 1, 8) < 0 &&
+  unsigned ValMajor, ValMinor;
+  ValCtx.DxilMod.GetValidatorVersion(ValMajor, ValMinor);
+  if (DXIL::CompareVersions(ValMajor, ValMinor, 1, 5) >= 0 &&
+      DXIL::CompareVersions(ValMajor, ValMinor, 1, 8) < 0 &&
       ValCtx.DxilMod.m_ShaderFlags.GetRaytracingTier1_1() &&
-      !calcFlags.GetRaytracingTier1_1()) {
-    calcFlags.SetRaytracingTier1_1(true);
+      !CalcFlags.GetRaytracingTier1_1()) {
+    CalcFlags.SetRaytracingTier1_1(true);
   }
 
-  const uint64_t mask = ShaderFlags::GetShaderFlagsRawForCollection();
-  uint64_t declaredFlagsRaw = ValCtx.DxilMod.m_ShaderFlags.GetShaderFlagsRaw();
-  uint64_t calcFlagsRaw = calcFlags.GetShaderFlagsRaw();
+  const uint64_t Mask = ShaderFlags::GetShaderFlagsRawForCollection();
+  uint64_t DeclaredFlagsRaw = ValCtx.DxilMod.m_ShaderFlags.GetShaderFlagsRaw();
+  uint64_t CalcFlagsRaw = CalcFlags.GetShaderFlagsRaw();
 
-  declaredFlagsRaw &= mask;
-  calcFlagsRaw &= mask;
+  DeclaredFlagsRaw &= Mask;
+  CalcFlagsRaw &= Mask;
 
-  if (declaredFlagsRaw == calcFlagsRaw) {
+  if (DeclaredFlagsRaw == CalcFlagsRaw) {
     return;
   }
   ValCtx.EmitError(ValidationRule::MetaFlagsUsage);
 
   dxilutil::EmitNoteOnContext(ValCtx.M.getContext(),
                               Twine("Flags declared=") +
-                                  Twine(declaredFlagsRaw) + Twine(", actual=") +
-                                  Twine(calcFlagsRaw));
+                                  Twine(DeclaredFlagsRaw) + Twine(", actual=") +
+                                  Twine(CalcFlagsRaw));
 }
 
 static void ValidateSignatureElement(DxilSignatureElement &SE,
                                      ValidationContext &ValCtx) {
-  DXIL::SemanticKind semanticKind = SE.GetSemantic()->GetKind();
-  CompType::Kind compKind = SE.GetCompType().GetKind();
+  DXIL::SemanticKind SemanticKind = SE.GetSemantic()->GetKind();
+  CompType::Kind CompKind = SE.GetCompType().GetKind();
   DXIL::InterpolationMode Mode = SE.GetInterpolationMode()->GetKind();
 
   StringRef Name = SE.GetName();
@@ -4008,86 +4080,86 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     ValCtx.EmitSignatureError(&SE, ValidationRule::MetaSemanticLen);
   }
 
-  if (semanticKind > DXIL::SemanticKind::Arbitrary &&
-      semanticKind < DXIL::SemanticKind::Invalid) {
-    if (semanticKind != Semantic::GetByName(SE.GetName())->GetKind()) {
+  if (SemanticKind > DXIL::SemanticKind::Arbitrary &&
+      SemanticKind < DXIL::SemanticKind::Invalid) {
+    if (SemanticKind != Semantic::GetByName(SE.GetName())->GetKind()) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemaKindMatchesName,
                              {SE.GetName(), SE.GetSemantic()->GetName()});
     }
   }
 
-  unsigned compWidth = 0;
-  bool compFloat = false;
-  bool compInt = false;
-  bool compBool = false;
+  unsigned CompWidth = 0;
+  bool CompFloat = false;
+  bool CompInt = false;
+  bool CompBool = false;
 
-  switch (compKind) {
+  switch (CompKind) {
   case CompType::Kind::U64:
-    compWidth = 64;
-    compInt = true;
+    CompWidth = 64;
+    CompInt = true;
     break;
   case CompType::Kind::I64:
-    compWidth = 64;
-    compInt = true;
+    CompWidth = 64;
+    CompInt = true;
     break;
   // These should be translated for signatures:
   // case CompType::Kind::PackedS8x32:
   // case CompType::Kind::PackedU8x32:
   case CompType::Kind::U32:
-    compWidth = 32;
-    compInt = true;
+    CompWidth = 32;
+    CompInt = true;
     break;
   case CompType::Kind::I32:
-    compWidth = 32;
-    compInt = true;
+    CompWidth = 32;
+    CompInt = true;
     break;
   case CompType::Kind::U16:
-    compWidth = 16;
-    compInt = true;
+    CompWidth = 16;
+    CompInt = true;
     break;
   case CompType::Kind::I16:
-    compWidth = 16;
-    compInt = true;
+    CompWidth = 16;
+    CompInt = true;
     break;
   case CompType::Kind::I1:
-    compWidth = 1;
-    compBool = true;
+    CompWidth = 1;
+    CompBool = true;
     break;
   case CompType::Kind::F64:
-    compWidth = 64;
-    compFloat = true;
+    CompWidth = 64;
+    CompFloat = true;
     break;
   case CompType::Kind::F32:
-    compWidth = 32;
-    compFloat = true;
+    CompWidth = 32;
+    CompFloat = true;
     break;
   case CompType::Kind::F16:
-    compWidth = 16;
-    compFloat = true;
+    CompWidth = 16;
+    CompFloat = true;
     break;
   case CompType::Kind::SNormF64:
-    compWidth = 64;
-    compFloat = true;
+    CompWidth = 64;
+    CompFloat = true;
     break;
   case CompType::Kind::SNormF32:
-    compWidth = 32;
-    compFloat = true;
+    CompWidth = 32;
+    CompFloat = true;
     break;
   case CompType::Kind::SNormF16:
-    compWidth = 16;
-    compFloat = true;
+    CompWidth = 16;
+    CompFloat = true;
     break;
   case CompType::Kind::UNormF64:
-    compWidth = 64;
-    compFloat = true;
+    CompWidth = 64;
+    CompFloat = true;
     break;
   case CompType::Kind::UNormF32:
-    compWidth = 32;
-    compFloat = true;
+    CompWidth = 32;
+    CompFloat = true;
     break;
   case CompType::Kind::UNormF16:
-    compWidth = 16;
-    compFloat = true;
+    CompWidth = 16;
+    CompFloat = true;
     break;
   case CompType::Kind::Invalid:
   default:
@@ -4096,7 +4168,7 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     break;
   }
 
-  if (compInt || compBool) {
+  if (CompInt || CompBool) {
     switch (Mode) {
     case DXIL::InterpolationMode::Linear:
     case DXIL::InterpolationMode::LinearCentroid:
@@ -4113,91 +4185,91 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
   }
 
   // Elements that should not appear in the Dxil signature:
-  bool bAllowedInSig = true;
-  bool bShouldBeAllocated = true;
+  bool AllowedInSig = true;
+  bool ShouldBeAllocated = true;
   switch (SE.GetInterpretation()) {
   case DXIL::SemanticInterpretationKind::NA:
   case DXIL::SemanticInterpretationKind::NotInSig:
   case DXIL::SemanticInterpretationKind::Invalid:
-    bAllowedInSig = false;
+    AllowedInSig = false;
     LLVM_FALLTHROUGH;
   case DXIL::SemanticInterpretationKind::NotPacked:
   case DXIL::SemanticInterpretationKind::Shadow:
-    bShouldBeAllocated = false;
+    ShouldBeAllocated = false;
     break;
   default:
     break;
   }
 
-  const char *inputOutput = nullptr;
+  const char *InputOutput = nullptr;
   if (SE.IsInput())
-    inputOutput = "Input";
+    InputOutput = "Input";
   else if (SE.IsOutput())
-    inputOutput = "Output";
+    InputOutput = "Output";
   else
-    inputOutput = "PatchConstant";
+    InputOutput = "PatchConstant";
 
-  if (!bAllowedInSig) {
+  if (!AllowedInSig) {
     ValCtx.EmitFormatError(ValidationRule::SmSemantic,
                            {SE.GetName(),
                             ValCtx.DxilMod.GetShaderModel()->GetKindName(),
-                            inputOutput});
-  } else if (bShouldBeAllocated && !SE.IsAllocated()) {
+                            InputOutput});
+  } else if (ShouldBeAllocated && !SE.IsAllocated()) {
     ValCtx.EmitFormatError(ValidationRule::MetaSemanticShouldBeAllocated,
-                           {inputOutput, SE.GetName()});
-  } else if (!bShouldBeAllocated && SE.IsAllocated()) {
+                           {InputOutput, SE.GetName()});
+  } else if (!ShouldBeAllocated && SE.IsAllocated()) {
     ValCtx.EmitFormatError(ValidationRule::MetaSemanticShouldNotBeAllocated,
-                           {inputOutput, SE.GetName()});
+                           {InputOutput, SE.GetName()});
   }
 
-  bool bIsClipCull = false;
-  bool bIsTessfactor = false;
-  bool bIsBarycentric = false;
+  bool IsClipCull = false;
+  bool IsTessfactor = false;
+  bool IsBarycentric = false;
 
-  switch (semanticKind) {
+  switch (SemanticKind) {
   case DXIL::SemanticKind::Depth:
   case DXIL::SemanticKind::DepthGreaterEqual:
   case DXIL::SemanticKind::DepthLessEqual:
-    if (!compFloat || compWidth > 32 || SE.GetCols() != 1) {
+    if (!CompFloat || CompWidth > 32 || SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float"});
     }
     break;
   case DXIL::SemanticKind::Coverage:
-    DXASSERT(!SE.IsInput() || !bAllowedInSig,
+    DXASSERT(!SE.IsInput() || !AllowedInSig,
              "else internal inconsistency between semantic interpretation "
              "table and validation code");
     LLVM_FALLTHROUGH;
   case DXIL::SemanticKind::InnerCoverage:
   case DXIL::SemanticKind::OutputControlPointID:
-    if (compKind != CompType::Kind::U32 || SE.GetCols() != 1) {
+    if (CompKind != CompType::Kind::U32 || SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "uint"});
     }
     break;
   case DXIL::SemanticKind::Position:
-    if (!compFloat || compWidth > 32 || SE.GetCols() != 4) {
+    if (!CompFloat || CompWidth > 32 || SE.GetCols() != 4) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float4"});
     }
     break;
   case DXIL::SemanticKind::Target:
-    if (compWidth > 32) {
+    if (CompWidth > 32) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float/int/uint"});
     }
     break;
   case DXIL::SemanticKind::ClipDistance:
   case DXIL::SemanticKind::CullDistance:
-    bIsClipCull = true;
-    if (!compFloat || compWidth > 32) {
+    IsClipCull = true;
+    if (!CompFloat || CompWidth > 32) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float"});
     }
     // NOTE: clip cull distance size is checked at ValidateSignature.
     break;
   case DXIL::SemanticKind::IsFrontFace: {
-    if (!(compInt && compWidth == 32) || SE.GetCols() != 1) {
+    if (!(CompInt && CompWidth == 32) || SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "uint"});
     }
@@ -4211,14 +4283,14 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
   case DXIL::SemanticKind::SampleIndex:
   case DXIL::SemanticKind::StencilRef:
   case DXIL::SemanticKind::ShadingRate:
-    if ((compKind != CompType::Kind::U32 && compKind != CompType::Kind::U16) ||
+    if ((CompKind != CompType::Kind::U32 && CompKind != CompType::Kind::U16) ||
         SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "uint"});
     }
     break;
   case DXIL::SemanticKind::CullPrimitive: {
-    if (!(compBool && compWidth == 1) || SE.GetCols() != 1) {
+    if (!(CompBool && CompWidth == 1) || SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "bool"});
     }
@@ -4226,8 +4298,8 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
   case DXIL::SemanticKind::TessFactor:
   case DXIL::SemanticKind::InsideTessFactor:
     // NOTE: the size check is at CheckPatchConstantSemantic.
-    bIsTessfactor = true;
-    if (!compFloat || compWidth > 32) {
+    IsTessfactor = true;
+    if (!CompFloat || CompWidth > 32) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float"});
     }
@@ -4236,12 +4308,12 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     break;
   case DXIL::SemanticKind::DomainLocation:
   case DXIL::SemanticKind::Invalid:
-    DXASSERT(!bAllowedInSig, "else internal inconsistency between semantic "
-                             "interpretation table and validation code");
+    DXASSERT(!AllowedInSig, "else internal inconsistency between semantic "
+                            "interpretation table and validation code");
     break;
   case DXIL::SemanticKind::Barycentrics:
-    bIsBarycentric = true;
-    if (!compFloat || compWidth > 32) {
+    IsBarycentric = true;
+    if (!CompFloat || CompWidth > 32) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float"});
     }
@@ -4286,32 +4358,32 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     }
   }
 
-  if (semanticKind == DXIL::SemanticKind::Target) {
-    // Verify packed row == semantic index
-    unsigned row = SE.GetStartRow();
+  if (SemanticKind == DXIL::SemanticKind::Target) {
+    // Verify packed Row == semantic index
+    unsigned Row = SE.GetStartRow();
     for (unsigned i : SE.GetSemanticIndexVec()) {
-      if (row != i) {
+      if (Row != i) {
         ValCtx.EmitSignatureError(&SE,
                                   ValidationRule::SmPSTargetIndexMatchesRow);
       }
-      ++row;
+      ++Row;
     }
-    // Verify packed col is 0
+    // Verify packed Col is 0
     if (SE.GetStartCol() != 0) {
       ValCtx.EmitSignatureError(&SE, ValidationRule::SmPSTargetCol0);
     }
-    // Verify max row used < 8
+    // Verify max Row used < 8
     if (SE.GetStartRow() + SE.GetRows() > 8) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax,
                              {"SV_Target", "7"});
     }
-  } else if (bAllowedInSig && semanticKind != DXIL::SemanticKind::Arbitrary) {
-    if (bIsBarycentric) {
+  } else if (AllowedInSig && SemanticKind != DXIL::SemanticKind::Arbitrary) {
+    if (IsBarycentric) {
       if (SE.GetSemanticStartIndex() > 1) {
         ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax,
                                {SE.GetSemantic()->GetName(), "1"});
       }
-    } else if (!bIsClipCull && SE.GetSemanticStartIndex() > 0) {
+    } else if (!IsClipCull && SE.GetSemanticStartIndex() > 0) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax,
                              {SE.GetSemantic()->GetName(), "0"});
     }
@@ -4319,17 +4391,17 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     // with the exception of tessfactors, which are validated in
     // CheckPatchConstantSemantic and ClipDistance/CullDistance, which have
     // other custom constraints.
-    if (!bIsTessfactor && !bIsClipCull && SE.GetRows() > 1) {
+    if (!IsTessfactor && !IsClipCull && SE.GetRows() > 1) {
       ValCtx.EmitSignatureError(&SE, ValidationRule::MetaSystemValueRows);
     }
   }
 
   if (SE.GetCols() + (SE.IsAllocated() ? SE.GetStartCol() : 0) > 4) {
-    unsigned size = (SE.GetRows() - 1) * 4 + SE.GetCols();
+    unsigned Size = (SE.GetRows() - 1) * 4 + SE.GetCols();
     ValCtx.EmitFormatError(ValidationRule::MetaSignatureOutOfRange,
                            {SE.GetName(), std::to_string(SE.GetStartRow()),
                             std::to_string(SE.GetStartCol()),
-                            std::to_string(size)});
+                            std::to_string(Size)});
   }
 
   if (!SE.GetInterpolationMode()->IsValid()) {
@@ -4338,8 +4410,8 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
 }
 
 static void ValidateSignatureOverlap(DxilSignatureElement &E,
-                                     unsigned maxScalars,
-                                     DxilSignatureAllocator &allocator,
+                                     unsigned MaxScalars,
+                                     DxilSignatureAllocator &Allocator,
                                      ValidationContext &ValCtx) {
 
   // Skip entries that are not or should not be allocated.  Validation occurs in
@@ -4357,16 +4429,16 @@ static void ValidateSignatureOverlap(DxilSignatureElement &E,
     break;
   }
 
-  DxilPackElement PE(&E, allocator.UseMinPrecision());
-  DxilSignatureAllocator::ConflictType conflict =
-      allocator.DetectRowConflict(&PE, E.GetStartRow());
-  if (conflict == DxilSignatureAllocator::kNoConflict ||
-      conflict == DxilSignatureAllocator::kInsufficientFreeComponents)
-    conflict =
-        allocator.DetectColConflict(&PE, E.GetStartRow(), E.GetStartCol());
-  switch (conflict) {
+  DxilPackElement PE(&E, Allocator.UseMinPrecision());
+  DxilSignatureAllocator::ConflictType Conflict =
+      Allocator.DetectRowConflict(&PE, E.GetStartRow());
+  if (Conflict == DxilSignatureAllocator::kNoConflict ||
+      Conflict == DxilSignatureAllocator::kInsufficientFreeComponents)
+    Conflict =
+        Allocator.DetectColConflict(&PE, E.GetStartRow(), E.GetStartCol());
+  switch (Conflict) {
   case DxilSignatureAllocator::kNoConflict:
-    allocator.PlaceElement(&PE, E.GetStartRow(), E.GetStartCol());
+    Allocator.PlaceElement(&PE, E.GetStartRow(), E.GetStartCol());
     break;
   case DxilSignatureAllocator::kConflictsWithIndexed:
     ValCtx.EmitFormatError(ValidationRule::MetaSignatureIndexConflict,
@@ -4428,59 +4500,59 @@ static void ValidateSignatureOverlap(DxilSignatureElement &E,
 }
 
 static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
-                              EntryStatus &Status, unsigned maxScalars) {
-  DxilSignatureAllocator allocator[DXIL::kNumOutputStreams] = {
+                              EntryStatus &Status, unsigned MaxScalars) {
+  DxilSignatureAllocator Allocator[DXIL::kNumOutputStreams] = {
       {32, ValCtx.DxilMod.GetUseMinPrecision()},
       {32, ValCtx.DxilMod.GetUseMinPrecision()},
       {32, ValCtx.DxilMod.GetUseMinPrecision()},
       {32, ValCtx.DxilMod.GetUseMinPrecision()}};
-  unordered_set<unsigned> semanticUsageSet[DXIL::kNumOutputStreams];
-  StringMap<unordered_set<unsigned>> semanticIndexMap[DXIL::kNumOutputStreams];
-  unordered_set<unsigned> clipcullRowSet[DXIL::kNumOutputStreams];
-  unsigned clipcullComponents[DXIL::kNumOutputStreams] = {0, 0, 0, 0};
+  unordered_set<unsigned> SemanticUsageSet[DXIL::kNumOutputStreams];
+  StringMap<unordered_set<unsigned>> SemanticIndexMap[DXIL::kNumOutputStreams];
+  unordered_set<unsigned> ClipcullRowSet[DXIL::kNumOutputStreams];
+  unsigned ClipcullComponents[DXIL::kNumOutputStreams] = {0, 0, 0, 0};
 
-  bool isOutput = S.IsOutput();
+  bool IsOutput = S.IsOutput();
   unsigned TargetMask = 0;
   DXIL::SemanticKind DepthKind = DXIL::SemanticKind::Invalid;
 
-  const InterpolationMode *prevBaryInterpMode = nullptr;
-  unsigned numBarycentrics = 0;
+  const InterpolationMode *PrevBaryInterpMode = nullptr;
+  unsigned NumBarycentrics = 0;
 
   for (auto &E : S.GetElements()) {
-    DXIL::SemanticKind semanticKind = E->GetSemantic()->GetKind();
+    DXIL::SemanticKind SemanticKind = E->GetSemantic()->GetKind();
     ValidateSignatureElement(*E, ValCtx);
-    // Avoid OOB indexing on streamId.
-    unsigned streamId = E->GetOutputStream();
-    if (streamId >= DXIL::kNumOutputStreams || !isOutput ||
+    // Avoid OOB indexing on StreamId.
+    unsigned StreamId = E->GetOutputStream();
+    if (StreamId >= DXIL::kNumOutputStreams || !IsOutput ||
         !ValCtx.DxilMod.GetShaderModel()->IsGS()) {
-      streamId = 0;
+      StreamId = 0;
     }
 
     // Semantic index overlap check, keyed by name.
-    std::string nameUpper(E->GetName());
-    std::transform(nameUpper.begin(), nameUpper.end(), nameUpper.begin(),
+    std::string NameUpper(E->GetName());
+    std::transform(NameUpper.begin(), NameUpper.end(), NameUpper.begin(),
                    ::toupper);
-    unordered_set<unsigned> &semIdxSet = semanticIndexMap[streamId][nameUpper];
-    for (unsigned semIdx : E->GetSemanticIndexVec()) {
-      if (semIdxSet.count(semIdx) > 0) {
+    unordered_set<unsigned> &SemIdxSet = SemanticIndexMap[StreamId][NameUpper];
+    for (unsigned SemIdx : E->GetSemanticIndexVec()) {
+      if (SemIdxSet.count(SemIdx) > 0) {
         ValCtx.EmitFormatError(ValidationRule::MetaNoSemanticOverlap,
-                               {E->GetName(), std::to_string(semIdx)});
+                               {E->GetName(), std::to_string(SemIdx)});
         return;
       } else
-        semIdxSet.insert(semIdx);
+        SemIdxSet.insert(SemIdx);
     }
 
     // SV_Target has special rules
-    if (semanticKind == DXIL::SemanticKind::Target) {
+    if (SemanticKind == DXIL::SemanticKind::Target) {
       // Validate target overlap
       if (E->GetStartRow() + E->GetRows() <= 8) {
-        unsigned mask = ((1 << E->GetRows()) - 1) << E->GetStartRow();
-        if (TargetMask & mask) {
+        unsigned Mask = ((1 << E->GetRows()) - 1) << E->GetStartRow();
+        if (TargetMask & Mask) {
           ValCtx.EmitFormatError(
               ValidationRule::MetaNoSemanticOverlap,
               {"SV_Target", std::to_string(E->GetStartRow())});
         }
-        TargetMask = TargetMask | mask;
+        TargetMask = TargetMask | Mask;
       }
       if (E->GetRows() > 1) {
         ValCtx.EmitSignatureError(E.get(), ValidationRule::SmNoPSOutputIdx);
@@ -4492,19 +4564,19 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
       continue;
 
     // validate system value semantic rules
-    switch (semanticKind) {
+    switch (SemanticKind) {
     case DXIL::SemanticKind::Arbitrary:
       break;
     case DXIL::SemanticKind::ClipDistance:
     case DXIL::SemanticKind::CullDistance:
       // Validate max 8 components across 2 rows (registers)
-      for (unsigned rowIdx = 0; rowIdx < E->GetRows(); rowIdx++)
-        clipcullRowSet[streamId].insert(E->GetStartRow() + rowIdx);
-      if (clipcullRowSet[streamId].size() > 2) {
+      for (unsigned RowIdx = 0; RowIdx < E->GetRows(); RowIdx++)
+        ClipcullRowSet[StreamId].insert(E->GetStartRow() + RowIdx);
+      if (ClipcullRowSet[StreamId].size() > 2) {
         ValCtx.EmitSignatureError(E.get(), ValidationRule::MetaClipCullMaxRows);
       }
-      clipcullComponents[streamId] += E->GetCols();
-      if (clipcullComponents[streamId] > 8) {
+      ClipcullComponents[StreamId] += E->GetCols();
+      if (ClipcullComponents[StreamId] > 8) {
         ValCtx.EmitSignatureError(E.get(),
                                   ValidationRule::MetaClipCullMaxComponents);
       }
@@ -4516,58 +4588,58 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
         ValCtx.EmitSignatureError(E.get(),
                                   ValidationRule::SmPSMultipleDepthSemantic);
       }
-      DepthKind = semanticKind;
+      DepthKind = SemanticKind;
       break;
     case DXIL::SemanticKind::Barycentrics: {
       // There can only be up to two SV_Barycentrics
       // with differeent perspective interpolation modes.
-      if (numBarycentrics++ > 1) {
+      if (NumBarycentrics++ > 1) {
         ValCtx.EmitSignatureError(
             E.get(), ValidationRule::MetaBarycentricsTwoPerspectives);
         break;
       }
-      const InterpolationMode *mode = E->GetInterpolationMode();
-      if (prevBaryInterpMode) {
-        if ((mode->IsAnyNoPerspective() &&
-             prevBaryInterpMode->IsAnyNoPerspective()) ||
-            (!mode->IsAnyNoPerspective() &&
-             !prevBaryInterpMode->IsAnyNoPerspective())) {
+      const InterpolationMode *Mode = E->GetInterpolationMode();
+      if (PrevBaryInterpMode) {
+        if ((Mode->IsAnyNoPerspective() &&
+             PrevBaryInterpMode->IsAnyNoPerspective()) ||
+            (!Mode->IsAnyNoPerspective() &&
+             !PrevBaryInterpMode->IsAnyNoPerspective())) {
           ValCtx.EmitSignatureError(
               E.get(), ValidationRule::MetaBarycentricsTwoPerspectives);
         }
       }
-      prevBaryInterpMode = mode;
+      PrevBaryInterpMode = Mode;
       break;
     }
     default:
-      if (semanticUsageSet[streamId].count(
-              static_cast<unsigned>(semanticKind)) > 0) {
+      if (SemanticUsageSet[StreamId].count(
+              static_cast<unsigned>(SemanticKind)) > 0) {
         ValCtx.EmitFormatError(ValidationRule::MetaDuplicateSysValue,
                                {E->GetSemantic()->GetName()});
       }
-      semanticUsageSet[streamId].insert(static_cast<unsigned>(semanticKind));
+      SemanticUsageSet[StreamId].insert(static_cast<unsigned>(SemanticKind));
       break;
     }
 
     // Packed element overlap check.
-    ValidateSignatureOverlap(*E.get(), maxScalars, allocator[streamId], ValCtx);
+    ValidateSignatureOverlap(*E.get(), MaxScalars, Allocator[StreamId], ValCtx);
 
-    if (isOutput && semanticKind == DXIL::SemanticKind::Position) {
+    if (IsOutput && SemanticKind == DXIL::SemanticKind::Position) {
       Status.hasOutputPosition[E->GetOutputStream()] = true;
     }
   }
 
   if (Status.hasViewID && S.IsInput() &&
       ValCtx.DxilMod.GetShaderModel()->GetKind() == DXIL::ShaderKind::Pixel) {
-    // Ensure sufficient space for ViewID:
-    DxilSignatureAllocator::DummyElement viewID;
-    viewID.rows = 1;
-    viewID.cols = 1;
-    viewID.kind = DXIL::SemanticKind::Arbitrary;
-    viewID.interpolation = DXIL::InterpolationMode::Constant;
-    viewID.interpretation = DXIL::SemanticInterpretationKind::SGV;
-    allocator[0].PackNext(&viewID, 0, 32);
-    if (!viewID.IsAllocated()) {
+    // Ensure sufficient space for ViewId:
+    DxilSignatureAllocator::DummyElement ViewId;
+    ViewId.rows = 1;
+    ViewId.cols = 1;
+    ViewId.kind = DXIL::SemanticKind::Arbitrary;
+    ViewId.interpolation = DXIL::InterpolationMode::Constant;
+    ViewId.interpretation = DXIL::SemanticInterpretationKind::SGV;
+    Allocator[0].PackNext(&ViewId, 0, 32);
+    if (!ViewId.IsAllocated()) {
       ValCtx.EmitError(ValidationRule::SmViewIDNeedsSlot);
     }
   }
@@ -4592,12 +4664,12 @@ static void ValidateConstantInterpModeSignature(ValidationContext &ValCtx,
 }
 
 static void ValidateEntrySignatures(ValidationContext &ValCtx,
-                                    const DxilEntryProps &entryProps,
+                                    const DxilEntryProps &EntryProps,
                                     EntryStatus &Status, Function &F) {
-  const DxilFunctionProps &props = entryProps.props;
-  const DxilEntrySignature &S = entryProps.sig;
+  const DxilFunctionProps &Props = EntryProps.props;
+  const DxilEntrySignature &S = EntryProps.sig;
 
-  if (props.IsRay()) {
+  if (Props.IsRay()) {
     // No signatures allowed
     if (!S.InputSignature.GetElements().empty() ||
         !S.OutputSignature.GetElements().empty() ||
@@ -4607,62 +4679,62 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
     }
 
     // Validate payload/attribute/params sizes
-    unsigned payloadSize = 0;
-    unsigned attrSize = 0;
-    auto itPayload = F.arg_begin();
-    auto itAttr = itPayload;
-    if (itAttr != F.arg_end())
-      itAttr++;
+    unsigned PayloadSize = 0;
+    unsigned AttrSize = 0;
+    auto ItPayload = F.arg_begin();
+    auto ItAttr = ItPayload;
+    if (ItAttr != F.arg_end())
+      ItAttr++;
     DataLayout DL(F.getParent());
-    switch (props.shaderKind) {
+    switch (Props.shaderKind) {
     case DXIL::ShaderKind::AnyHit:
     case DXIL::ShaderKind::ClosestHit:
-      if (itAttr != F.arg_end()) {
-        Type *Ty = itAttr->getType();
+      if (ItAttr != F.arg_end()) {
+        Type *Ty = ItAttr->getType();
         if (Ty->isPointerTy())
           Ty = Ty->getPointerElementType();
-        attrSize =
+        AttrSize =
             (unsigned)std::min(DL.getTypeAllocSize(Ty), (uint64_t)UINT_MAX);
       }
       LLVM_FALLTHROUGH;
     case DXIL::ShaderKind::Miss:
     case DXIL::ShaderKind::Callable:
-      if (itPayload != F.arg_end()) {
-        Type *Ty = itPayload->getType();
+      if (ItPayload != F.arg_end()) {
+        Type *Ty = ItPayload->getType();
         if (Ty->isPointerTy())
           Ty = Ty->getPointerElementType();
-        payloadSize =
+        PayloadSize =
             (unsigned)std::min(DL.getTypeAllocSize(Ty), (uint64_t)UINT_MAX);
       }
       break;
     }
-    if (props.ShaderProps.Ray.payloadSizeInBytes < payloadSize) {
+    if (Props.ShaderProps.Ray.payloadSizeInBytes < PayloadSize) {
       ValCtx.EmitFnFormatError(
           &F, ValidationRule::SmRayShaderPayloadSize,
-          {F.getName(), props.IsCallable() ? "params" : "payload"});
+          {F.getName(), Props.IsCallable() ? "params" : "payload"});
     }
-    if (props.ShaderProps.Ray.attributeSizeInBytes < attrSize) {
+    if (Props.ShaderProps.Ray.attributeSizeInBytes < AttrSize) {
       ValCtx.EmitFnFormatError(&F, ValidationRule::SmRayShaderPayloadSize,
                                {F.getName(), "attribute"});
     }
     return;
   }
 
-  bool isPS = props.IsPS();
-  bool isVS = props.IsVS();
-  bool isGS = props.IsGS();
-  bool isCS = props.IsCS();
-  bool isMS = props.IsMS();
+  bool IsPs = Props.IsPS();
+  bool IsVs = Props.IsVS();
+  bool IsGs = Props.IsGS();
+  bool IsCs = Props.IsCS();
+  bool IsMs = Props.IsMS();
 
-  if (isPS) {
+  if (IsPs) {
     // PS output no interp mode.
     ValidateNoInterpModeSignature(ValCtx, S.OutputSignature);
-  } else if (isVS) {
+  } else if (IsVs) {
     // VS input no interp mode.
     ValidateNoInterpModeSignature(ValCtx, S.InputSignature);
   }
 
-  if (isMS) {
+  if (IsMs) {
     // primitive output constant interp mode.
     ValidateConstantInterpModeSignature(ValCtx, S.PatchConstOrPrimSignature);
   } else {
@@ -4670,38 +4742,38 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
     ValidateNoInterpModeSignature(ValCtx, S.PatchConstOrPrimSignature);
   }
 
-  unsigned maxInputScalars = DXIL::kMaxInputTotalScalars;
-  unsigned maxOutputScalars = 0;
-  unsigned maxPatchConstantScalars = 0;
+  unsigned MaxInputScalars = DXIL::kMaxInputTotalScalars;
+  unsigned MaxOutputScalars = 0;
+  unsigned MaxPatchConstantScalars = 0;
 
-  switch (props.shaderKind) {
+  switch (Props.shaderKind) {
   case DXIL::ShaderKind::Compute:
     break;
   case DXIL::ShaderKind::Vertex:
   case DXIL::ShaderKind::Geometry:
   case DXIL::ShaderKind::Pixel:
-    maxOutputScalars = DXIL::kMaxOutputTotalScalars;
+    MaxOutputScalars = DXIL::kMaxOutputTotalScalars;
     break;
   case DXIL::ShaderKind::Hull:
   case DXIL::ShaderKind::Domain:
-    maxOutputScalars = DXIL::kMaxOutputTotalScalars;
-    maxPatchConstantScalars = DXIL::kMaxHSOutputPatchConstantTotalScalars;
+    MaxOutputScalars = DXIL::kMaxOutputTotalScalars;
+    MaxPatchConstantScalars = DXIL::kMaxHSOutputPatchConstantTotalScalars;
     break;
   case DXIL::ShaderKind::Mesh:
-    maxOutputScalars = DXIL::kMaxOutputTotalScalars;
-    maxPatchConstantScalars = DXIL::kMaxOutputTotalScalars;
+    MaxOutputScalars = DXIL::kMaxOutputTotalScalars;
+    MaxPatchConstantScalars = DXIL::kMaxOutputTotalScalars;
     break;
   case DXIL::ShaderKind::Amplification:
   default:
     break;
   }
 
-  ValidateSignature(ValCtx, S.InputSignature, Status, maxInputScalars);
-  ValidateSignature(ValCtx, S.OutputSignature, Status, maxOutputScalars);
+  ValidateSignature(ValCtx, S.InputSignature, Status, MaxInputScalars);
+  ValidateSignature(ValCtx, S.OutputSignature, Status, MaxOutputScalars);
   ValidateSignature(ValCtx, S.PatchConstOrPrimSignature, Status,
-                    maxPatchConstantScalars);
+                    MaxPatchConstantScalars);
 
-  if (isPS) {
+  if (IsPs) {
     // Gather execution information.
     hlsl::PSExecutionInfo PSExec;
     DxilSignatureElement *PosInterpSE = nullptr;
@@ -4743,10 +4815,10 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
     }
 
     // Validate PS output semantic.
-    const DxilSignature &outputSig = S.OutputSignature;
-    for (auto &SE : outputSig.GetElements()) {
-      Semantic::Kind semanticKind = SE->GetSemantic()->GetKind();
-      switch (semanticKind) {
+    const DxilSignature &OutputSig = S.OutputSignature;
+    for (auto &SE : OutputSig.GetElements()) {
+      Semantic::Kind SemanticKind = SE->GetSemantic()->GetKind();
+      switch (SemanticKind) {
       case Semantic::Kind::Target:
       case Semantic::Kind::Coverage:
       case Semantic::Kind::Depth:
@@ -4762,24 +4834,24 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
     }
   }
 
-  if (isGS) {
-    unsigned maxVertexCount = props.ShaderProps.GS.maxVertexCount;
-    unsigned outputScalarCount = 0;
-    const DxilSignature &outSig = S.OutputSignature;
-    for (auto &SE : outSig.GetElements()) {
-      outputScalarCount += SE->GetRows() * SE->GetCols();
+  if (IsGs) {
+    unsigned MaxVertexCount = Props.ShaderProps.GS.maxVertexCount;
+    unsigned OutputScalarCount = 0;
+    const DxilSignature &OutSig = S.OutputSignature;
+    for (auto &SE : OutSig.GetElements()) {
+      OutputScalarCount += SE->GetRows() * SE->GetCols();
     }
-    unsigned totalOutputScalars = maxVertexCount * outputScalarCount;
-    if (totalOutputScalars > DXIL::kMaxGSOutputTotalScalars) {
+    unsigned TotalOutputScalars = MaxVertexCount * OutputScalarCount;
+    if (TotalOutputScalars > DXIL::kMaxGSOutputTotalScalars) {
       ValCtx.EmitFnFormatError(
           &F, ValidationRule::SmGSTotalOutputVertexDataRange,
-          {std::to_string(maxVertexCount), std::to_string(outputScalarCount),
-           std::to_string(totalOutputScalars),
+          {std::to_string(MaxVertexCount), std::to_string(OutputScalarCount),
+           std::to_string(TotalOutputScalars),
            std::to_string(DXIL::kMaxGSOutputTotalScalars)});
     }
   }
 
-  if (isCS) {
+  if (IsCs) {
     if (!S.InputSignature.GetElements().empty() ||
         !S.OutputSignature.GetElements().empty() ||
         !S.PatchConstOrPrimSignature.GetElements().empty()) {
@@ -4787,7 +4859,7 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
     }
   }
 
-  if (isMS) {
+  if (IsMs) {
     unsigned VertexSignatureRows = S.OutputSignature.GetRowCount();
     if (VertexSignatureRows > DXIL::kMaxMSVSigRows) {
       ValCtx.EmitFnFormatError(
@@ -4809,31 +4881,31 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx,
 
     const unsigned kScalarSizeForMSAttributes = 4;
 #define ALIGN32(n) (((n) + 31) & ~31)
-    unsigned maxAlign32VertexCount =
-        ALIGN32(props.ShaderProps.MS.maxVertexCount);
-    unsigned maxAlign32PrimitiveCount =
-        ALIGN32(props.ShaderProps.MS.maxPrimitiveCount);
-    unsigned totalOutputScalars = 0;
+    unsigned MaxAlign32VertexCount =
+        ALIGN32(Props.ShaderProps.MS.maxVertexCount);
+    unsigned MaxAlign32PrimitiveCount =
+        ALIGN32(Props.ShaderProps.MS.maxPrimitiveCount);
+    unsigned TotalOutputScalars = 0;
     for (auto &SE : S.OutputSignature.GetElements()) {
-      totalOutputScalars +=
-          SE->GetRows() * SE->GetCols() * maxAlign32VertexCount;
+      TotalOutputScalars +=
+          SE->GetRows() * SE->GetCols() * MaxAlign32VertexCount;
     }
     for (auto &SE : S.PatchConstOrPrimSignature.GetElements()) {
-      totalOutputScalars +=
-          SE->GetRows() * SE->GetCols() * maxAlign32PrimitiveCount;
+      TotalOutputScalars +=
+          SE->GetRows() * SE->GetCols() * MaxAlign32PrimitiveCount;
     }
 
-    if (totalOutputScalars * kScalarSizeForMSAttributes >
+    if (TotalOutputScalars * kScalarSizeForMSAttributes >
         DXIL::kMaxMSOutputTotalBytes) {
       ValCtx.EmitFnFormatError(
           &F, ValidationRule::SmMeshShaderOutputSize,
           {F.getName(), std::to_string(DXIL::kMaxMSOutputTotalBytes)});
     }
 
-    unsigned totalInputOutputBytes =
-        totalOutputScalars * kScalarSizeForMSAttributes +
-        props.ShaderProps.MS.payloadSizeInBytes;
-    if (totalInputOutputBytes > DXIL::kMaxMSInputOutputTotalBytes) {
+    unsigned TotalInputOutputBytes =
+        TotalOutputScalars * kScalarSizeForMSAttributes +
+        Props.ShaderProps.MS.payloadSizeInBytes;
+    if (TotalInputOutputBytes > DXIL::kMaxMSInputOutputTotalBytes) {
       ValCtx.EmitFnFormatError(
           &F, ValidationRule::SmMeshShaderInOutSize,
           {F.getName(), std::to_string(DXIL::kMaxMSInputOutputTotalBytes)});
@@ -4846,9 +4918,9 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) {
   if (ValCtx.isLibProfile) {
     for (Function &F : DM.GetModule()->functions()) {
       if (DM.HasDxilEntryProps(&F)) {
-        DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F);
+        DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F);
         EntryStatus &Status = ValCtx.GetEntryStatus(&F);
-        ValidateEntrySignatures(ValCtx, entryProps, Status, F);
+        ValidateEntrySignatures(ValCtx, EntryProps, Status, F);
       }
     }
   } else {
@@ -4859,8 +4931,8 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) {
       return;
     }
     EntryStatus &Status = ValCtx.GetEntryStatus(Entry);
-    DxilEntryProps &entryProps = DM.GetDxilEntryProps(Entry);
-    ValidateEntrySignatures(ValCtx, entryProps, Status, *Entry);
+    DxilEntryProps &EntryProps = DM.GetDxilEntryProps(Entry);
+    ValidateEntrySignatures(ValCtx, EntryProps, Status, *Entry);
   }
 }
 
@@ -4869,14 +4941,14 @@ static void ValidateEntrySignatures(ValidationContext &ValCtx) {
 struct CompatibilityChecker {
   ValidationContext &ValCtx;
   Function *EntryFn;
-  const DxilFunctionProps &props;
-  DXIL::ShaderKind shaderKind;
+  const DxilFunctionProps &Props;
+  DXIL::ShaderKind ShaderKind;
 
   // These masks identify the potential conflict flags based on the entry
   // function's shader kind and properties when either UsesDerivatives or
   // RequiresGroup flags are set in ShaderCompatInfo.
-  uint32_t maskForDeriv = 0;
-  uint32_t maskForGroup = 0;
+  uint32_t MaskForDeriv = 0;
+  uint32_t MaskForGroup = 0;
 
   enum class ConflictKind : uint32_t {
     Stage,
@@ -4898,77 +4970,77 @@ struct CompatibilityChecker {
 
   CompatibilityChecker(ValidationContext &ValCtx, Function *EntryFn)
       : ValCtx(ValCtx), EntryFn(EntryFn),
-        props(ValCtx.DxilMod.GetDxilEntryProps(EntryFn).props),
-        shaderKind(props.shaderKind) {
+        Props(ValCtx.DxilMod.GetDxilEntryProps(EntryFn).props),
+        ShaderKind(Props.shaderKind) {
 
     // Precompute potential incompatibilities based on shader stage, shader kind
     // and entry attributes. These will turn into full conflicts if the entry
     // point's shader flags indicate that they use relevant features.
     if (!ValCtx.DxilMod.GetShaderModel()->IsSM66Plus() &&
-        (shaderKind == DXIL::ShaderKind::Mesh ||
-         shaderKind == DXIL::ShaderKind::Amplification ||
-         shaderKind == DXIL::ShaderKind::Compute)) {
-      maskForDeriv |=
+        (ShaderKind == DXIL::ShaderKind::Mesh ||
+         ShaderKind == DXIL::ShaderKind::Amplification ||
+         ShaderKind == DXIL::ShaderKind::Compute)) {
+      MaskForDeriv |=
           static_cast<uint32_t>(ConflictFlags::DerivInComputeShaderModel);
-    } else if (shaderKind == DXIL::ShaderKind::Node) {
+    } else if (ShaderKind == DXIL::ShaderKind::Node) {
       // Only broadcasting launch supports derivatives.
-      if (props.Node.LaunchType != DXIL::NodeLaunchType::Broadcasting)
-        maskForDeriv |= static_cast<uint32_t>(ConflictFlags::DerivLaunch);
+      if (Props.Node.LaunchType != DXIL::NodeLaunchType::Broadcasting)
+        MaskForDeriv |= static_cast<uint32_t>(ConflictFlags::DerivLaunch);
       // Thread launch node has no group.
-      if (props.Node.LaunchType == DXIL::NodeLaunchType::Thread)
-        maskForGroup |= static_cast<uint32_t>(ConflictFlags::RequiresGroup);
+      if (Props.Node.LaunchType == DXIL::NodeLaunchType::Thread)
+        MaskForGroup |= static_cast<uint32_t>(ConflictFlags::RequiresGroup);
     }
 
-    if (shaderKind == DXIL::ShaderKind::Mesh ||
-        shaderKind == DXIL::ShaderKind::Amplification ||
-        shaderKind == DXIL::ShaderKind::Compute ||
-        shaderKind == DXIL::ShaderKind::Node) {
+    if (ShaderKind == DXIL::ShaderKind::Mesh ||
+        ShaderKind == DXIL::ShaderKind::Amplification ||
+        ShaderKind == DXIL::ShaderKind::Compute ||
+        ShaderKind == DXIL::ShaderKind::Node) {
       // All compute-like stages
       // Thread dimensions must be either 1D and X is multiple of 4, or 2D
       // and X and Y must be multiples of 2.
-      if (props.numThreads[1] == 1 && props.numThreads[2] == 1) {
-        if ((props.numThreads[0] & 0x3) != 0)
-          maskForDeriv |=
+      if (Props.numThreads[1] == 1 && Props.numThreads[2] == 1) {
+        if ((Props.numThreads[0] & 0x3) != 0)
+          MaskForDeriv |=
               static_cast<uint32_t>(ConflictFlags::DerivThreadGroupDim);
-      } else if ((props.numThreads[0] & 0x1) || (props.numThreads[1] & 0x1))
-        maskForDeriv |=
+      } else if ((Props.numThreads[0] & 0x1) || (Props.numThreads[1] & 0x1))
+        MaskForDeriv |=
             static_cast<uint32_t>(ConflictFlags::DerivThreadGroupDim);
     } else {
       // other stages have no group
-      maskForGroup |= static_cast<uint32_t>(ConflictFlags::RequiresGroup);
+      MaskForGroup |= static_cast<uint32_t>(ConflictFlags::RequiresGroup);
     }
   }
 
   uint32_t
-  IdentifyConflict(const DxilModule::ShaderCompatInfo &compatInfo) const {
-    uint32_t conflictMask = 0;
+  IdentifyConflict(const DxilModule::ShaderCompatInfo &CompatInfo) const {
+    uint32_t ConflictMask = 0;
 
     // Compatibility check said this shader kind is not compatible.
-    if (0 == ((1 << (uint32_t)shaderKind) & compatInfo.mask))
-      conflictMask |= (uint32_t)ConflictFlags::Stage;
+    if (0 == ((1 << (uint32_t)ShaderKind) & CompatInfo.mask))
+      ConflictMask |= (uint32_t)ConflictFlags::Stage;
 
     // Compatibility check said this shader model is not compatible.
     if (DXIL::CompareVersions(ValCtx.DxilMod.GetShaderModel()->GetMajor(),
                               ValCtx.DxilMod.GetShaderModel()->GetMinor(),
-                              compatInfo.minMajor, compatInfo.minMinor) < 0)
-      conflictMask |= (uint32_t)ConflictFlags::ShaderModel;
+                              CompatInfo.minMajor, CompatInfo.minMinor) < 0)
+      ConflictMask |= (uint32_t)ConflictFlags::ShaderModel;
 
-    if (compatInfo.shaderFlags.GetUsesDerivatives())
-      conflictMask |= maskForDeriv;
+    if (CompatInfo.shaderFlags.GetUsesDerivatives())
+      ConflictMask |= MaskForDeriv;
 
-    if (compatInfo.shaderFlags.GetRequiresGroup())
-      conflictMask |= maskForGroup;
+    if (CompatInfo.shaderFlags.GetRequiresGroup())
+      ConflictMask |= MaskForGroup;
 
-    return conflictMask;
+    return ConflictMask;
   }
 
-  void Diagnose(Function *F, uint32_t conflictMask, ConflictKind conflict,
-                ValidationRule rule, ArrayRef<StringRef> args = {}) {
-    if (conflictMask & (1 << (unsigned)conflict))
-      ValCtx.EmitFnFormatError(F, rule, args);
+  void Diagnose(Function *F, uint32_t ConflictMask, ConflictKind Conflict,
+                ValidationRule Rule, ArrayRef<StringRef> Args = {}) {
+    if (ConflictMask & (1 << (unsigned)Conflict))
+      ValCtx.EmitFnFormatError(F, Rule, Args);
   }
 
-  void DiagnoseConflicts(Function *F, uint32_t conflictMask) {
+  void DiagnoseConflicts(Function *F, uint32_t ConflictMask) {
     // Emit a diagnostic indicating that either the entry function or a function
     // called by the entry function contains a disallowed operation.
     if (F == EntryFn)
@@ -4977,22 +5049,22 @@ struct CompatibilityChecker {
       ValCtx.EmitFnError(EntryFn, ValidationRule::SmIncompatibleCallInEntry);
 
     // Emit diagnostics for each conflict found in this function.
-    Diagnose(F, conflictMask, ConflictKind::Stage,
+    Diagnose(F, ConflictMask, ConflictKind::Stage,
              ValidationRule::SmIncompatibleStage,
-             {ShaderModel::GetKindName(props.shaderKind)});
-    Diagnose(F, conflictMask, ConflictKind::ShaderModel,
+             {ShaderModel::GetKindName(Props.shaderKind)});
+    Diagnose(F, ConflictMask, ConflictKind::ShaderModel,
              ValidationRule::SmIncompatibleShaderModel);
-    Diagnose(F, conflictMask, ConflictKind::DerivLaunch,
+    Diagnose(F, ConflictMask, ConflictKind::DerivLaunch,
              ValidationRule::SmIncompatibleDerivLaunch,
-             {GetLaunchTypeStr(props.Node.LaunchType)});
-    Diagnose(F, conflictMask, ConflictKind::DerivThreadGroupDim,
+             {GetLaunchTypeStr(Props.Node.LaunchType)});
+    Diagnose(F, ConflictMask, ConflictKind::DerivThreadGroupDim,
              ValidationRule::SmIncompatibleThreadGroupDim,
-             {std::to_string(props.numThreads[0]),
-              std::to_string(props.numThreads[1]),
-              std::to_string(props.numThreads[2])});
-    Diagnose(F, conflictMask, ConflictKind::DerivInComputeShaderModel,
+             {std::to_string(Props.numThreads[0]),
+              std::to_string(Props.numThreads[1]),
+              std::to_string(Props.numThreads[2])});
+    Diagnose(F, ConflictMask, ConflictKind::DerivInComputeShaderModel,
              ValidationRule::SmIncompatibleDerivInComputeShaderModel);
-    Diagnose(F, conflictMask, ConflictKind::RequiresGroup,
+    Diagnose(F, ConflictMask, ConflictKind::RequiresGroup,
              ValidationRule::SmIncompatibleRequiresGroup);
   }
 
@@ -5001,59 +5073,59 @@ struct CompatibilityChecker {
   // functions called by that function introduced the conflict.
   // In those cases, the called functions themselves will emit the diagnostic.
   // Return conflict mask for this function.
-  uint32_t Visit(Function *F, uint32_t &remainingMask,
-                 llvm::SmallPtrSet<Function *, 8> &visited, CallGraph &CG) {
+  uint32_t Visit(Function *F, uint32_t &RemainingMask,
+                 llvm::SmallPtrSet<Function *, 8> &Visited, CallGraph &CG) {
     // Recursive check looks for where a conflict is found and not present
     // in functions called by the current function.
     // - When a source is found, emit diagnostics and clear the conflict
     // flags introduced by this function from the working mask so we don't
     // report this conflict again.
-    // - When the remainingMask is 0, we are done.
+    // - When the RemainingMask is 0, we are done.
 
-    if (remainingMask == 0)
+    if (RemainingMask == 0)
       return 0; // Nothing left to search for.
-    if (!visited.insert(F).second)
+    if (!Visited.insert(F).second)
       return 0; // Already visited.
 
-    const DxilModule::ShaderCompatInfo *compatInfo =
+    const DxilModule::ShaderCompatInfo *CompatInfo =
         ValCtx.DxilMod.GetCompatInfoForFunction(F);
-    DXASSERT(compatInfo, "otherwise, compat info not computed in module");
-    if (!compatInfo)
+    DXASSERT(CompatInfo, "otherwise, compat info not computed in module");
+    if (!CompatInfo)
       return 0;
-    uint32_t maskForThisFunction = IdentifyConflict(*compatInfo);
+    uint32_t MaskForThisFunction = IdentifyConflict(*CompatInfo);
 
-    uint32_t maskForCalls = 0;
+    uint32_t MaskForCalls = 0;
     if (CallGraphNode *CGNode = CG[F]) {
       for (auto &Call : *CGNode) {
         Function *called = Call.second->getFunction();
         if (called->isDeclaration())
           continue;
-        maskForCalls |= Visit(called, remainingMask, visited, CG);
-        if (remainingMask == 0)
+        MaskForCalls |= Visit(called, RemainingMask, Visited, CG);
+        if (RemainingMask == 0)
           return 0; // Nothing left to search for.
       }
     }
 
     // Mask of incompatibilities introduced by this function.
-    uint32_t conflictsIntroduced =
-        remainingMask & maskForThisFunction & ~maskForCalls;
-    if (conflictsIntroduced) {
+    uint32_t ConflictsIntroduced =
+        RemainingMask & MaskForThisFunction & ~MaskForCalls;
+    if (ConflictsIntroduced) {
       // This function introduces at least one conflict.
-      DiagnoseConflicts(F, conflictsIntroduced);
+      DiagnoseConflicts(F, ConflictsIntroduced);
       // Mask off diagnosed incompatibilities.
-      remainingMask &= ~conflictsIntroduced;
+      RemainingMask &= ~ConflictsIntroduced;
     }
-    return maskForThisFunction;
+    return MaskForThisFunction;
   }
 
-  void FindIncompatibleCall(const DxilModule::ShaderCompatInfo &compatInfo) {
-    uint32_t conflictMask = IdentifyConflict(compatInfo);
-    if (conflictMask == 0)
+  void FindIncompatibleCall(const DxilModule::ShaderCompatInfo &CompatInfo) {
+    uint32_t ConflictMask = IdentifyConflict(CompatInfo);
+    if (ConflictMask == 0)
       return;
 
     CallGraph &CG = ValCtx.GetCallGraph();
-    llvm::SmallPtrSet<Function *, 8> visited;
-    Visit(EntryFn, conflictMask, visited, CG);
+    llvm::SmallPtrSet<Function *, 8> Visited;
+    Visit(EntryFn, ConflictMask, Visited, CG);
   }
 };
 
@@ -5062,14 +5134,14 @@ static void ValidateEntryCompatibility(ValidationContext &ValCtx) {
   DxilModule &DM = ValCtx.DxilMod;
   for (Function &F : DM.GetModule()->functions()) {
     if (DM.HasDxilEntryProps(&F)) {
-      const DxilModule::ShaderCompatInfo *compatInfo =
+      const DxilModule::ShaderCompatInfo *CompatInfo =
           DM.GetCompatInfoForFunction(&F);
-      DXASSERT(compatInfo, "otherwise, compat info not computed in module");
-      if (!compatInfo)
+      DXASSERT(CompatInfo, "otherwise, compat info not computed in module");
+      if (!CompatInfo)
         continue;
 
       CompatibilityChecker checker(ValCtx, &F);
-      checker.FindIncompatibleCall(*compatInfo);
+      checker.FindIncompatibleCall(*CompatInfo);
     }
   }
 }
@@ -5077,101 +5149,101 @@ static void ValidateEntryCompatibility(ValidationContext &ValCtx) {
 static void CheckPatchConstantSemantic(ValidationContext &ValCtx,
                                        const DxilEntryProps &EntryProps,
                                        EntryStatus &Status, Function *F) {
-  const DxilFunctionProps &props = EntryProps.props;
-  bool isHS = props.IsHS();
+  const DxilFunctionProps &Props = EntryProps.props;
+  bool IsHs = Props.IsHS();
 
-  DXIL::TessellatorDomain domain =
-      isHS ? props.ShaderProps.HS.domain : props.ShaderProps.DS.domain;
+  DXIL::TessellatorDomain Domain =
+      IsHs ? Props.ShaderProps.HS.domain : Props.ShaderProps.DS.domain;
 
-  const DxilSignature &patchConstantSig =
+  const DxilSignature &PatchConstantSig =
       EntryProps.sig.PatchConstOrPrimSignature;
 
-  const unsigned kQuadEdgeSize = 4;
-  const unsigned kQuadInsideSize = 2;
-  const unsigned kQuadDomainLocSize = 2;
+  const unsigned KQuadEdgeSize = 4;
+  const unsigned KQuadInsideSize = 2;
+  const unsigned KQuadDomainLocSize = 2;
 
-  const unsigned kTriEdgeSize = 3;
-  const unsigned kTriInsideSize = 1;
-  const unsigned kTriDomainLocSize = 3;
+  const unsigned KTriEdgeSize = 3;
+  const unsigned KTriInsideSize = 1;
+  const unsigned KTriDomainLocSize = 3;
 
-  const unsigned kIsolineEdgeSize = 2;
-  const unsigned kIsolineInsideSize = 0;
-  const unsigned kIsolineDomainLocSize = 3;
+  const unsigned KIsolineEdgeSize = 2;
+  const unsigned KIsolineInsideSize = 0;
+  const unsigned KIsolineDomainLocSize = 3;
 
-  const char *domainName = "";
+  const char *DomainName = "";
 
   DXIL::SemanticKind kEdgeSemantic = DXIL::SemanticKind::TessFactor;
-  unsigned edgeSize = 0;
+  unsigned EdgeSize = 0;
 
   DXIL::SemanticKind kInsideSemantic = DXIL::SemanticKind::InsideTessFactor;
-  unsigned insideSize = 0;
+  unsigned InsideSize = 0;
 
   Status.domainLocSize = 0;
 
-  switch (domain) {
+  switch (Domain) {
   case DXIL::TessellatorDomain::IsoLine:
-    domainName = "IsoLine";
-    edgeSize = kIsolineEdgeSize;
-    insideSize = kIsolineInsideSize;
-    Status.domainLocSize = kIsolineDomainLocSize;
+    DomainName = "IsoLine";
+    EdgeSize = KIsolineEdgeSize;
+    InsideSize = KIsolineInsideSize;
+    Status.domainLocSize = KIsolineDomainLocSize;
     break;
   case DXIL::TessellatorDomain::Tri:
-    domainName = "Tri";
-    edgeSize = kTriEdgeSize;
-    insideSize = kTriInsideSize;
-    Status.domainLocSize = kTriDomainLocSize;
+    DomainName = "Tri";
+    EdgeSize = KTriEdgeSize;
+    InsideSize = KTriInsideSize;
+    Status.domainLocSize = KTriDomainLocSize;
     break;
   case DXIL::TessellatorDomain::Quad:
-    domainName = "Quad";
-    edgeSize = kQuadEdgeSize;
-    insideSize = kQuadInsideSize;
-    Status.domainLocSize = kQuadDomainLocSize;
+    DomainName = "Quad";
+    EdgeSize = KQuadEdgeSize;
+    InsideSize = KQuadInsideSize;
+    Status.domainLocSize = KQuadDomainLocSize;
     break;
   default:
     // Don't bother with other tests if domain is invalid
     return;
   }
 
-  bool bFoundEdgeSemantic = false;
-  bool bFoundInsideSemantic = false;
-  for (auto &SE : patchConstantSig.GetElements()) {
-    Semantic::Kind kind = SE->GetSemantic()->GetKind();
-    if (kind == kEdgeSemantic) {
-      bFoundEdgeSemantic = true;
-      if (SE->GetRows() != edgeSize || SE->GetCols() > 1) {
+  bool FoundEdgeSemantic = false;
+  bool FoundInsideSemantic = false;
+  for (auto &SE : PatchConstantSig.GetElements()) {
+    Semantic::Kind Kind = SE->GetSemantic()->GetKind();
+    if (Kind == kEdgeSemantic) {
+      FoundEdgeSemantic = true;
+      if (SE->GetRows() != EdgeSize || SE->GetCols() > 1) {
         ValCtx.EmitFnFormatError(F, ValidationRule::SmTessFactorSizeMatchDomain,
                                  {std::to_string(SE->GetRows()),
-                                  std::to_string(SE->GetCols()), domainName,
-                                  std::to_string(edgeSize)});
+                                  std::to_string(SE->GetCols()), DomainName,
+                                  std::to_string(EdgeSize)});
       }
-    } else if (kind == kInsideSemantic) {
-      bFoundInsideSemantic = true;
-      if (SE->GetRows() != insideSize || SE->GetCols() > 1) {
+    } else if (Kind == kInsideSemantic) {
+      FoundInsideSemantic = true;
+      if (SE->GetRows() != InsideSize || SE->GetCols() > 1) {
         ValCtx.EmitFnFormatError(
             F, ValidationRule::SmInsideTessFactorSizeMatchDomain,
             {std::to_string(SE->GetRows()), std::to_string(SE->GetCols()),
-             domainName, std::to_string(insideSize)});
+             DomainName, std::to_string(InsideSize)});
       }
     }
   }
 
-  if (isHS) {
-    if (!bFoundEdgeSemantic) {
+  if (IsHs) {
+    if (!FoundEdgeSemantic) {
       ValCtx.EmitFnError(F, ValidationRule::SmTessFactorForDomain);
     }
-    if (!bFoundInsideSemantic && domain != DXIL::TessellatorDomain::IsoLine) {
+    if (!FoundInsideSemantic && Domain != DXIL::TessellatorDomain::IsoLine) {
       ValCtx.EmitFnError(F, ValidationRule::SmTessFactorForDomain);
     }
   }
 }
 
 static void ValidatePassThruHS(ValidationContext &ValCtx,
-                               const DxilEntryProps &entryProps, Function *F) {
+                               const DxilEntryProps &EntryProps, Function *F) {
   // Check pass thru HS.
   if (F->isDeclaration()) {
-    const auto &props = entryProps.props;
-    if (props.IsHS()) {
-      const auto &HS = props.ShaderProps.HS;
+    const auto &Props = EntryProps.props;
+    if (Props.IsHS()) {
+      const auto &HS = Props.ShaderProps.HS;
       if (HS.inputControlPoints < HS.outputControlPoints) {
         ValCtx.EmitFnError(
             F, ValidationRule::SmHullPassThruControlPointCountMatch);
@@ -5179,12 +5251,12 @@ static void ValidatePassThruHS(ValidationContext &ValCtx,
 
       // Check declared control point outputs storage amounts are ok to pass
       // through (less output storage than input for control points).
-      const DxilSignature &outSig = entryProps.sig.OutputSignature;
-      unsigned totalOutputCPScalars = 0;
-      for (auto &SE : outSig.GetElements()) {
-        totalOutputCPScalars += SE->GetRows() * SE->GetCols();
+      const DxilSignature &OutSig = EntryProps.sig.OutputSignature;
+      unsigned TotalOutputCpScalars = 0;
+      for (auto &SE : OutSig.GetElements()) {
+        TotalOutputCpScalars += SE->GetRows() * SE->GetCols();
       }
-      if (totalOutputCPScalars * HS.outputControlPoints >
+      if (TotalOutputCpScalars * HS.outputControlPoints >
           DXIL::kMaxHSOutputControlPointsTotalScalars) {
         ValCtx.EmitFnError(F,
                            ValidationRule::SmOutputControlPointsTotalScalars);
@@ -5199,35 +5271,35 @@ static void ValidatePassThruHS(ValidationContext &ValCtx,
 // validate wave size (currently allowed only on CS and node shaders but might
 // be supported on other shader types in the future)
 static void ValidateWaveSize(ValidationContext &ValCtx,
-                             const DxilEntryProps &entryProps, Function *F) {
-  const DxilFunctionProps &props = entryProps.props;
-  const hlsl::DxilWaveSize &waveSize = props.WaveSize;
+                             const DxilEntryProps &EntryProps, Function *F) {
+  const DxilFunctionProps &Props = EntryProps.props;
+  const hlsl::DxilWaveSize &WaveSize = Props.WaveSize;
 
-  switch (waveSize.Validate()) {
+  switch (WaveSize.Validate()) {
   case hlsl::DxilWaveSize::ValidationResult::Success:
     break;
   case hlsl::DxilWaveSize::ValidationResult::InvalidMin:
     ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue,
-                             {"Min", std::to_string(waveSize.Min),
+                             {"Min", std::to_string(WaveSize.Min),
                               std::to_string(DXIL::kMinWaveSize),
                               std::to_string(DXIL::kMaxWaveSize)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::InvalidMax:
     ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue,
-                             {"Max", std::to_string(waveSize.Max),
+                             {"Max", std::to_string(WaveSize.Max),
                               std::to_string(DXIL::kMinWaveSize),
                               std::to_string(DXIL::kMaxWaveSize)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::InvalidPreferred:
     ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizeValue,
-                             {"Preferred", std::to_string(waveSize.Preferred),
+                             {"Preferred", std::to_string(WaveSize.Preferred),
                               std::to_string(DXIL::kMinWaveSize),
                               std::to_string(DXIL::kMaxWaveSize)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::MaxOrPreferredWhenUndefined:
     ValCtx.EmitFnFormatError(
         F, ValidationRule::SmWaveSizeAllZeroWhenUndefined,
-        {std::to_string(waveSize.Max), std::to_string(waveSize.Preferred)});
+        {std::to_string(WaveSize.Max), std::to_string(WaveSize.Preferred)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::MaxEqualsMin:
     // This case is allowed because users may disable the ErrorDefault warning.
@@ -5235,227 +5307,227 @@ static void ValidateWaveSize(ValidationContext &ValCtx,
   case hlsl::DxilWaveSize::ValidationResult::PreferredWhenNoRange:
     ValCtx.EmitFnFormatError(
         F, ValidationRule::SmWaveSizeMaxAndPreferredZeroWhenNoRange,
-        {std::to_string(waveSize.Max), std::to_string(waveSize.Preferred)});
+        {std::to_string(WaveSize.Max), std::to_string(WaveSize.Preferred)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::MaxLessThanMin:
     ValCtx.EmitFnFormatError(
         F, ValidationRule::SmWaveSizeMaxGreaterThanMin,
-        {std::to_string(waveSize.Max), std::to_string(waveSize.Min)});
+        {std::to_string(WaveSize.Max), std::to_string(WaveSize.Min)});
     break;
   case hlsl::DxilWaveSize::ValidationResult::PreferredOutOfRange:
     ValCtx.EmitFnFormatError(F, ValidationRule::SmWaveSizePreferredInRange,
-                             {std::to_string(waveSize.Preferred),
-                              std::to_string(waveSize.Min),
-                              std::to_string(waveSize.Max)});
+                             {std::to_string(WaveSize.Preferred),
+                              std::to_string(WaveSize.Min),
+                              std::to_string(WaveSize.Max)});
     break;
   }
 
   // Check shader model and kind.
-  if (waveSize.IsDefined()) {
-    if (!props.IsCS() && !props.IsNode()) {
+  if (WaveSize.IsDefined()) {
+    if (!Props.IsCS() && !Props.IsNode()) {
       ValCtx.EmitFnError(F, ValidationRule::SmWaveSizeOnComputeOrNode);
     }
   }
 }
 
 static void ValidateEntryProps(ValidationContext &ValCtx,
-                               const DxilEntryProps &entryProps,
+                               const DxilEntryProps &EntryProps,
                                EntryStatus &Status, Function *F) {
-  const DxilFunctionProps &props = entryProps.props;
-  DXIL::ShaderKind ShaderType = props.shaderKind;
+  const DxilFunctionProps &Props = EntryProps.props;
+  DXIL::ShaderKind ShaderType = Props.shaderKind;
 
-  ValidateWaveSize(ValCtx, entryProps, F);
+  ValidateWaveSize(ValCtx, EntryProps, F);
 
-  if (ShaderType == DXIL::ShaderKind::Compute || props.IsNode()) {
-    unsigned x = props.numThreads[0];
-    unsigned y = props.numThreads[1];
-    unsigned z = props.numThreads[2];
+  if (ShaderType == DXIL::ShaderKind::Compute || Props.IsNode()) {
+    unsigned X = Props.numThreads[0];
+    unsigned Y = Props.numThreads[1];
+    unsigned Z = Props.numThreads[2];
 
-    unsigned threadsInGroup = x * y * z;
+    unsigned ThreadsInGroup = X * Y * Z;
 
-    if ((x < DXIL::kMinCSThreadGroupX) || (x > DXIL::kMaxCSThreadGroupX)) {
+    if ((X < DXIL::kMinCSThreadGroupX) || (X > DXIL::kMaxCSThreadGroupX)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"X", std::to_string(x),
+                               {"X", std::to_string(X),
                                 std::to_string(DXIL::kMinCSThreadGroupX),
                                 std::to_string(DXIL::kMaxCSThreadGroupX)});
     }
-    if ((y < DXIL::kMinCSThreadGroupY) || (y > DXIL::kMaxCSThreadGroupY)) {
+    if ((Y < DXIL::kMinCSThreadGroupY) || (Y > DXIL::kMaxCSThreadGroupY)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Y", std::to_string(y),
+                               {"Y", std::to_string(Y),
                                 std::to_string(DXIL::kMinCSThreadGroupY),
                                 std::to_string(DXIL::kMaxCSThreadGroupY)});
     }
-    if ((z < DXIL::kMinCSThreadGroupZ) || (z > DXIL::kMaxCSThreadGroupZ)) {
+    if ((Z < DXIL::kMinCSThreadGroupZ) || (Z > DXIL::kMaxCSThreadGroupZ)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Z", std::to_string(z),
+                               {"Z", std::to_string(Z),
                                 std::to_string(DXIL::kMinCSThreadGroupZ),
                                 std::to_string(DXIL::kMaxCSThreadGroupZ)});
     }
 
-    if (threadsInGroup > DXIL::kMaxCSThreadsPerGroup) {
+    if (ThreadsInGroup > DXIL::kMaxCSThreadsPerGroup) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup,
-                               {std::to_string(threadsInGroup),
+                               {std::to_string(ThreadsInGroup),
                                 std::to_string(DXIL::kMaxCSThreadsPerGroup)});
     }
 
-    // type of threadID, thread group ID take care by DXIL operation overload
+    // type of ThreadID, thread group ID take care by DXIL operation overload
     // check.
   } else if (ShaderType == DXIL::ShaderKind::Mesh) {
-    const auto &MS = props.ShaderProps.MS;
-    unsigned x = props.numThreads[0];
-    unsigned y = props.numThreads[1];
-    unsigned z = props.numThreads[2];
+    const auto &MS = Props.ShaderProps.MS;
+    unsigned X = Props.numThreads[0];
+    unsigned Y = Props.numThreads[1];
+    unsigned Z = Props.numThreads[2];
 
-    unsigned threadsInGroup = x * y * z;
+    unsigned ThreadsInGroup = X * Y * Z;
 
-    if ((x < DXIL::kMinMSASThreadGroupX) || (x > DXIL::kMaxMSASThreadGroupX)) {
+    if ((X < DXIL::kMinMSASThreadGroupX) || (X > DXIL::kMaxMSASThreadGroupX)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"X", std::to_string(x),
+                               {"X", std::to_string(X),
                                 std::to_string(DXIL::kMinMSASThreadGroupX),
                                 std::to_string(DXIL::kMaxMSASThreadGroupX)});
     }
-    if ((y < DXIL::kMinMSASThreadGroupY) || (y > DXIL::kMaxMSASThreadGroupY)) {
+    if ((Y < DXIL::kMinMSASThreadGroupY) || (Y > DXIL::kMaxMSASThreadGroupY)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Y", std::to_string(y),
+                               {"Y", std::to_string(Y),
                                 std::to_string(DXIL::kMinMSASThreadGroupY),
                                 std::to_string(DXIL::kMaxMSASThreadGroupY)});
     }
-    if ((z < DXIL::kMinMSASThreadGroupZ) || (z > DXIL::kMaxMSASThreadGroupZ)) {
+    if ((Z < DXIL::kMinMSASThreadGroupZ) || (Z > DXIL::kMaxMSASThreadGroupZ)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Z", std::to_string(z),
+                               {"Z", std::to_string(Z),
                                 std::to_string(DXIL::kMinMSASThreadGroupZ),
                                 std::to_string(DXIL::kMaxMSASThreadGroupZ)});
     }
 
-    if (threadsInGroup > DXIL::kMaxMSASThreadsPerGroup) {
+    if (ThreadsInGroup > DXIL::kMaxMSASThreadsPerGroup) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup,
-                               {std::to_string(threadsInGroup),
+                               {std::to_string(ThreadsInGroup),
                                 std::to_string(DXIL::kMaxMSASThreadsPerGroup)});
     }
 
-    // type of threadID, thread group ID take care by DXIL operation overload
+    // type of ThreadID, thread group ID take care by DXIL operation overload
     // check.
 
-    unsigned maxVertexCount = MS.maxVertexCount;
-    if (maxVertexCount > DXIL::kMaxMSOutputVertexCount) {
+    unsigned MaxVertexCount = MS.maxVertexCount;
+    if (MaxVertexCount > DXIL::kMaxMSOutputVertexCount) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmMeshShaderMaxVertexCount,
                                {std::to_string(DXIL::kMaxMSOutputVertexCount),
-                                std::to_string(maxVertexCount)});
+                                std::to_string(MaxVertexCount)});
     }
 
-    unsigned maxPrimitiveCount = MS.maxPrimitiveCount;
-    if (maxPrimitiveCount > DXIL::kMaxMSOutputPrimitiveCount) {
+    unsigned MaxPrimitiveCount = MS.maxPrimitiveCount;
+    if (MaxPrimitiveCount > DXIL::kMaxMSOutputPrimitiveCount) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmMeshShaderMaxPrimitiveCount,
           {std::to_string(DXIL::kMaxMSOutputPrimitiveCount),
-           std::to_string(maxPrimitiveCount)});
+           std::to_string(MaxPrimitiveCount)});
     }
   } else if (ShaderType == DXIL::ShaderKind::Amplification) {
-    unsigned x = props.numThreads[0];
-    unsigned y = props.numThreads[1];
-    unsigned z = props.numThreads[2];
+    unsigned X = Props.numThreads[0];
+    unsigned Y = Props.numThreads[1];
+    unsigned Z = Props.numThreads[2];
 
-    unsigned threadsInGroup = x * y * z;
+    unsigned ThreadsInGroup = X * Y * Z;
 
-    if ((x < DXIL::kMinMSASThreadGroupX) || (x > DXIL::kMaxMSASThreadGroupX)) {
+    if ((X < DXIL::kMinMSASThreadGroupX) || (X > DXIL::kMaxMSASThreadGroupX)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"X", std::to_string(x),
+                               {"X", std::to_string(X),
                                 std::to_string(DXIL::kMinMSASThreadGroupX),
                                 std::to_string(DXIL::kMaxMSASThreadGroupX)});
     }
-    if ((y < DXIL::kMinMSASThreadGroupY) || (y > DXIL::kMaxMSASThreadGroupY)) {
+    if ((Y < DXIL::kMinMSASThreadGroupY) || (Y > DXIL::kMaxMSASThreadGroupY)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Y", std::to_string(y),
+                               {"Y", std::to_string(Y),
                                 std::to_string(DXIL::kMinMSASThreadGroupY),
                                 std::to_string(DXIL::kMaxMSASThreadGroupY)});
     }
-    if ((z < DXIL::kMinMSASThreadGroupZ) || (z > DXIL::kMaxMSASThreadGroupZ)) {
+    if ((Z < DXIL::kMinMSASThreadGroupZ) || (Z > DXIL::kMaxMSASThreadGroupZ)) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmThreadGroupChannelRange,
-                               {"Z", std::to_string(z),
+                               {"Z", std::to_string(Z),
                                 std::to_string(DXIL::kMinMSASThreadGroupZ),
                                 std::to_string(DXIL::kMaxMSASThreadGroupZ)});
     }
 
-    if (threadsInGroup > DXIL::kMaxMSASThreadsPerGroup) {
+    if (ThreadsInGroup > DXIL::kMaxMSASThreadsPerGroup) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmMaxTheadGroup,
-                               {std::to_string(threadsInGroup),
+                               {std::to_string(ThreadsInGroup),
                                 std::to_string(DXIL::kMaxMSASThreadsPerGroup)});
     }
 
-    // type of threadID, thread group ID take care by DXIL operation overload
+    // type of ThreadID, thread group ID take care by DXIL operation overload
     // check.
   } else if (ShaderType == DXIL::ShaderKind::Domain) {
-    const auto &DS = props.ShaderProps.DS;
-    DXIL::TessellatorDomain domain = DS.domain;
-    if (domain >= DXIL::TessellatorDomain::LastEntry)
-      domain = DXIL::TessellatorDomain::Undefined;
-    unsigned inputControlPointCount = DS.inputControlPoints;
+    const auto &DS = Props.ShaderProps.DS;
+    DXIL::TessellatorDomain Domain = DS.domain;
+    if (Domain >= DXIL::TessellatorDomain::LastEntry)
+      Domain = DXIL::TessellatorDomain::Undefined;
+    unsigned InputControlPointCount = DS.inputControlPoints;
 
-    if (inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
+    if (InputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmDSInputControlPointCountRange,
           {std::to_string(DXIL::kMaxIAPatchControlPointCount),
-           std::to_string(inputControlPointCount)});
+           std::to_string(InputControlPointCount)});
     }
-    if (domain == DXIL::TessellatorDomain::Undefined) {
+    if (Domain == DXIL::TessellatorDomain::Undefined) {
       ValCtx.EmitFnError(F, ValidationRule::SmValidDomain);
     }
-    CheckPatchConstantSemantic(ValCtx, entryProps, Status, F);
+    CheckPatchConstantSemantic(ValCtx, EntryProps, Status, F);
   } else if (ShaderType == DXIL::ShaderKind::Hull) {
-    const auto &HS = props.ShaderProps.HS;
-    DXIL::TessellatorDomain domain = HS.domain;
-    if (domain >= DXIL::TessellatorDomain::LastEntry)
-      domain = DXIL::TessellatorDomain::Undefined;
-    unsigned inputControlPointCount = HS.inputControlPoints;
-    if (inputControlPointCount == 0) {
-      const DxilSignature &inputSig = entryProps.sig.InputSignature;
-      if (!inputSig.GetElements().empty()) {
+    const auto &HS = Props.ShaderProps.HS;
+    DXIL::TessellatorDomain Domain = HS.domain;
+    if (Domain >= DXIL::TessellatorDomain::LastEntry)
+      Domain = DXIL::TessellatorDomain::Undefined;
+    unsigned InputControlPointCount = HS.inputControlPoints;
+    if (InputControlPointCount == 0) {
+      const DxilSignature &InputSig = EntryProps.sig.InputSignature;
+      if (!InputSig.GetElements().empty()) {
         ValCtx.EmitFnError(F,
                            ValidationRule::SmZeroHSInputControlPointWithInput);
       }
-    } else if (inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
+    } else if (InputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmHSInputControlPointCountRange,
           {std::to_string(DXIL::kMaxIAPatchControlPointCount),
-           std::to_string(inputControlPointCount)});
+           std::to_string(InputControlPointCount)});
     }
 
-    unsigned outputControlPointCount = HS.outputControlPoints;
-    if (outputControlPointCount < DXIL::kMinIAPatchControlPointCount ||
-        outputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
+    unsigned OutputControlPointCount = HS.outputControlPoints;
+    if (OutputControlPointCount < DXIL::kMinIAPatchControlPointCount ||
+        OutputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::SmOutputControlPointCountRange,
           {std::to_string(DXIL::kMinIAPatchControlPointCount),
            std::to_string(DXIL::kMaxIAPatchControlPointCount),
-           std::to_string(outputControlPointCount)});
+           std::to_string(OutputControlPointCount)});
     }
-    if (domain == DXIL::TessellatorDomain::Undefined) {
+    if (Domain == DXIL::TessellatorDomain::Undefined) {
       ValCtx.EmitFnError(F, ValidationRule::SmValidDomain);
     }
-    DXIL::TessellatorPartitioning partition = HS.partition;
-    if (partition == DXIL::TessellatorPartitioning::Undefined) {
+    DXIL::TessellatorPartitioning Partition = HS.partition;
+    if (Partition == DXIL::TessellatorPartitioning::Undefined) {
       ValCtx.EmitFnError(F, ValidationRule::MetaTessellatorPartition);
     }
 
-    DXIL::TessellatorOutputPrimitive tessOutputPrimitive = HS.outputPrimitive;
-    if (tessOutputPrimitive == DXIL::TessellatorOutputPrimitive::Undefined ||
-        tessOutputPrimitive == DXIL::TessellatorOutputPrimitive::LastEntry) {
+    DXIL::TessellatorOutputPrimitive TessOutputPrimitive = HS.outputPrimitive;
+    if (TessOutputPrimitive == DXIL::TessellatorOutputPrimitive::Undefined ||
+        TessOutputPrimitive == DXIL::TessellatorOutputPrimitive::LastEntry) {
       ValCtx.EmitFnError(F, ValidationRule::MetaTessellatorOutputPrimitive);
     }
 
-    float maxTessFactor = HS.maxTessFactor;
-    if (maxTessFactor < DXIL::kHSMaxTessFactorLowerBound ||
-        maxTessFactor > DXIL::kHSMaxTessFactorUpperBound) {
+    float MaxTessFactor = HS.maxTessFactor;
+    if (MaxTessFactor < DXIL::kHSMaxTessFactorLowerBound ||
+        MaxTessFactor > DXIL::kHSMaxTessFactorUpperBound) {
       ValCtx.EmitFnFormatError(
           F, ValidationRule::MetaMaxTessFactor,
           {std::to_string(DXIL::kHSMaxTessFactorLowerBound),
            std::to_string(DXIL::kHSMaxTessFactorUpperBound),
-           std::to_string(maxTessFactor)});
+           std::to_string(MaxTessFactor)});
     }
     // Domain and OutPrimivtive match.
-    switch (domain) {
+    switch (Domain) {
     case DXIL::TessellatorDomain::IsoLine:
-      switch (tessOutputPrimitive) {
+      switch (TessOutputPrimitive) {
       case DXIL::TessellatorOutputPrimitive::TriangleCW:
       case DXIL::TessellatorOutputPrimitive::TriangleCCW:
         ValCtx.EmitFnError(F, ValidationRule::SmIsoLineOutputPrimitiveMismatch);
@@ -5465,7 +5537,7 @@ static void ValidateEntryProps(ValidationContext &ValCtx,
       }
       break;
     case DXIL::TessellatorDomain::Tri:
-      switch (tessOutputPrimitive) {
+      switch (TessOutputPrimitive) {
       case DXIL::TessellatorOutputPrimitive::Line:
         ValCtx.EmitFnError(F, ValidationRule::SmTriOutputPrimitiveMismatch);
         break;
@@ -5474,7 +5546,7 @@ static void ValidateEntryProps(ValidationContext &ValCtx,
       }
       break;
     case DXIL::TessellatorDomain::Quad:
-      switch (tessOutputPrimitive) {
+      switch (TessOutputPrimitive) {
       case DXIL::TessellatorOutputPrimitive::Line:
         ValCtx.EmitFnError(F, ValidationRule::SmTriOutputPrimitiveMismatch);
         break;
@@ -5487,39 +5559,39 @@ static void ValidateEntryProps(ValidationContext &ValCtx,
       break;
     }
 
-    CheckPatchConstantSemantic(ValCtx, entryProps, Status, F);
+    CheckPatchConstantSemantic(ValCtx, EntryProps, Status, F);
   } else if (ShaderType == DXIL::ShaderKind::Geometry) {
-    const auto &GS = props.ShaderProps.GS;
-    unsigned maxVertexCount = GS.maxVertexCount;
-    if (maxVertexCount > DXIL::kMaxGSOutputVertexCount) {
+    const auto &GS = Props.ShaderProps.GS;
+    unsigned MaxVertexCount = GS.maxVertexCount;
+    if (MaxVertexCount > DXIL::kMaxGSOutputVertexCount) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmGSOutputVertexCountRange,
                                {std::to_string(DXIL::kMaxGSOutputVertexCount),
-                                std::to_string(maxVertexCount)});
+                                std::to_string(MaxVertexCount)});
     }
 
-    unsigned instanceCount = GS.instanceCount;
-    if (instanceCount > DXIL::kMaxGSInstanceCount || instanceCount < 1) {
+    unsigned InstanceCount = GS.instanceCount;
+    if (InstanceCount > DXIL::kMaxGSInstanceCount || InstanceCount < 1) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmGSInstanceCountRange,
                                {std::to_string(DXIL::kMaxGSInstanceCount),
-                                std::to_string(instanceCount)});
+                                std::to_string(InstanceCount)});
     }
 
-    DXIL::PrimitiveTopology topo = DXIL::PrimitiveTopology::Undefined;
-    bool bTopoMismatch = false;
-    for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) {
-      if (GS.streamPrimitiveTopologies[i] !=
+    DXIL::PrimitiveTopology Topo = DXIL::PrimitiveTopology::Undefined;
+    bool TopoMismatch = false;
+    for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) {
+      if (GS.streamPrimitiveTopologies[I] !=
           DXIL::PrimitiveTopology::Undefined) {
-        if (topo == DXIL::PrimitiveTopology::Undefined)
-          topo = GS.streamPrimitiveTopologies[i];
-        else if (topo != GS.streamPrimitiveTopologies[i]) {
-          bTopoMismatch = true;
+        if (Topo == DXIL::PrimitiveTopology::Undefined)
+          Topo = GS.streamPrimitiveTopologies[I];
+        else if (Topo != GS.streamPrimitiveTopologies[I]) {
+          TopoMismatch = true;
           break;
         }
       }
     }
-    if (bTopoMismatch)
-      topo = DXIL::PrimitiveTopology::Undefined;
-    switch (topo) {
+    if (TopoMismatch)
+      Topo = DXIL::PrimitiveTopology::Undefined;
+    switch (Topo) {
     case DXIL::PrimitiveTopology::PointList:
     case DXIL::PrimitiveTopology::LineStrip:
     case DXIL::PrimitiveTopology::TriangleStrip:
@@ -5529,9 +5601,9 @@ static void ValidateEntryProps(ValidationContext &ValCtx,
     } break;
     }
 
-    DXIL::InputPrimitive inputPrimitive = GS.inputPrimitive;
-    unsigned VertexCount = GetNumVertices(inputPrimitive);
-    if (VertexCount == 0 && inputPrimitive != DXIL::InputPrimitive::Undefined) {
+    DXIL::InputPrimitive InputPrimitive = GS.inputPrimitive;
+    unsigned VertexCount = GetNumVertices(InputPrimitive);
+    if (VertexCount == 0 && InputPrimitive != DXIL::InputPrimitive::Undefined) {
       ValCtx.EmitFnError(F, ValidationRule::SmGSValidInputPrimitive);
     }
   }
@@ -5542,10 +5614,10 @@ static void ValidateShaderState(ValidationContext &ValCtx) {
   if (ValCtx.isLibProfile) {
     for (Function &F : DM.GetModule()->functions()) {
       if (DM.HasDxilEntryProps(&F)) {
-        DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F);
+        DxilEntryProps &EntryProps = DM.GetDxilEntryProps(&F);
         EntryStatus &Status = ValCtx.GetEntryStatus(&F);
-        ValidateEntryProps(ValCtx, entryProps, Status, &F);
-        ValidatePassThruHS(ValCtx, entryProps, &F);
+        ValidateEntryProps(ValCtx, EntryProps, Status, &F);
+        ValidatePassThruHS(ValCtx, EntryProps, &F);
       }
     }
   } else {
@@ -5556,33 +5628,33 @@ static void ValidateShaderState(ValidationContext &ValCtx) {
       return;
     }
     EntryStatus &Status = ValCtx.GetEntryStatus(Entry);
-    DxilEntryProps &entryProps = DM.GetDxilEntryProps(Entry);
-    ValidateEntryProps(ValCtx, entryProps, Status, Entry);
-    ValidatePassThruHS(ValCtx, entryProps, Entry);
+    DxilEntryProps &EntryProps = DM.GetDxilEntryProps(Entry);
+    ValidateEntryProps(ValCtx, EntryProps, Status, Entry);
+    ValidatePassThruHS(ValCtx, EntryProps, Entry);
   }
 }
 
 static CallGraphNode *
-CalculateCallDepth(CallGraphNode *node,
-                   std::unordered_map<CallGraphNode *, unsigned> &depthMap,
-                   std::unordered_set<CallGraphNode *> &callStack,
-                   std::unordered_set<Function *> &funcSet) {
-  unsigned depth = callStack.size();
-  funcSet.insert(node->getFunction());
-  for (auto it = node->begin(), ei = node->end(); it != ei; it++) {
-    CallGraphNode *toNode = it->second;
-    if (callStack.insert(toNode).second == false) {
+CalculateCallDepth(CallGraphNode *Node,
+                   std::unordered_map<CallGraphNode *, unsigned> &DepthMap,
+                   std::unordered_set<CallGraphNode *> &CallStack,
+                   std::unordered_set<Function *> &FuncSet) {
+  unsigned Depth = CallStack.size();
+  FuncSet.insert(Node->getFunction());
+  for (auto It = Node->begin(), EIt = Node->end(); It != EIt; It++) {
+    CallGraphNode *ToNode = It->second;
+    if (CallStack.insert(ToNode).second == false) {
       // Recursive.
-      return toNode;
+      return ToNode;
     }
-    if (depthMap[toNode] < depth)
-      depthMap[toNode] = depth;
+    if (DepthMap[ToNode] < Depth)
+      DepthMap[ToNode] = Depth;
     if (CallGraphNode *N =
-            CalculateCallDepth(toNode, depthMap, callStack, funcSet)) {
+            CalculateCallDepth(ToNode, DepthMap, CallStack, FuncSet)) {
       // Recursive
       return N;
     }
-    callStack.erase(toNode);
+    CallStack.erase(ToNode);
   }
 
   return nullptr;
@@ -5592,29 +5664,29 @@ static void ValidateCallGraph(ValidationContext &ValCtx) {
   // Build CallGraph.
   CallGraph &CG = ValCtx.GetCallGraph();
 
-  std::unordered_map<CallGraphNode *, unsigned> depthMap;
-  std::unordered_set<CallGraphNode *> callStack;
-  CallGraphNode *entryNode = CG[ValCtx.DxilMod.GetEntryFunction()];
-  depthMap[entryNode] = 0;
-  if (CallGraphNode *N = CalculateCallDepth(entryNode, depthMap, callStack,
+  std::unordered_map<CallGraphNode *, unsigned> DepthMap;
+  std::unordered_set<CallGraphNode *> CallStack;
+  CallGraphNode *EntryNode = CG[ValCtx.DxilMod.GetEntryFunction()];
+  DepthMap[EntryNode] = 0;
+  if (CallGraphNode *N = CalculateCallDepth(EntryNode, DepthMap, CallStack,
                                             ValCtx.entryFuncCallSet))
     ValCtx.EmitFnError(N->getFunction(), ValidationRule::FlowNoRecursion);
   if (ValCtx.DxilMod.GetShaderModel()->IsHS()) {
-    CallGraphNode *patchConstantNode =
+    CallGraphNode *PatchConstantNode =
         CG[ValCtx.DxilMod.GetPatchConstantFunction()];
-    depthMap[patchConstantNode] = 0;
-    callStack.clear();
+    DepthMap[PatchConstantNode] = 0;
+    CallStack.clear();
     if (CallGraphNode *N =
-            CalculateCallDepth(patchConstantNode, depthMap, callStack,
+            CalculateCallDepth(PatchConstantNode, DepthMap, CallStack,
                                ValCtx.patchConstFuncCallSet))
       ValCtx.EmitFnError(N->getFunction(), ValidationRule::FlowNoRecursion);
   }
 }
 
 static void ValidateFlowControl(ValidationContext &ValCtx) {
-  bool reducible =
+  bool Reducible =
       IsReducible(*ValCtx.DxilMod.GetModule(), IrreducibilityAction::Ignore);
-  if (!reducible) {
+  if (!Reducible) {
     ValCtx.EmitError(ValidationRule::FlowReducible);
     return;
   }
@@ -5629,28 +5701,28 @@ static void ValidateFlowControl(ValidationContext &ValCtx) {
     DominatorTree DT = DTA.run(F);
     LoopInfo LI;
     LI.Analyze(DT);
-    for (auto loopIt = LI.begin(); loopIt != LI.end(); loopIt++) {
-      Loop *loop = *loopIt;
-      SmallVector<BasicBlock *, 4> exitBlocks;
-      loop->getExitBlocks(exitBlocks);
-      if (exitBlocks.empty())
+    for (auto LoopIt = LI.begin(); LoopIt != LI.end(); LoopIt++) {
+      Loop *Loop = *LoopIt;
+      SmallVector<BasicBlock *, 4> ExitBlocks;
+      Loop->getExitBlocks(ExitBlocks);
+      if (ExitBlocks.empty())
         ValCtx.EmitFnError(&F, ValidationRule::FlowDeadLoop);
     }
 
     // validate that there is no use of a value that has been output-completed
     // for this function.
 
-    hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP();
+    hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP();
 
-    for (auto &it : hlslOP->GetOpFuncList(DXIL::OpCode::OutputComplete)) {
-      Function *pF = it.second;
+    for (auto &It : HlslOP->GetOpFuncList(DXIL::OpCode::OutputComplete)) {
+      Function *pF = It.second;
       if (!pF)
         continue;
 
       // first, collect all the output complete calls that are not dominated
       // by another OutputComplete call for the same handle value
       llvm::SmallMapVector<Value *, llvm::SmallPtrSet<CallInst *, 4>, 4>
-          handleToCI;
+          HandleToCI;
       for (User *U : pF->users()) {
         // all OutputComplete calls are instructions, and call instructions,
         // so there shouldn't need to be a null check.
@@ -5662,33 +5734,33 @@ static void ValidateFlowControl(ValidationContext &ValCtx) {
           continue;
 
         DxilInst_OutputComplete OutputComplete(CI);
-        Value *completedRecord = OutputComplete.get_output();
+        Value *CompletedRecord = OutputComplete.get_output();
 
-        auto vIt = handleToCI.find(completedRecord);
-        if (vIt == handleToCI.end()) {
+        auto vIt = HandleToCI.find(CompletedRecord);
+        if (vIt == HandleToCI.end()) {
           llvm::SmallPtrSet<CallInst *, 4> s;
           s.insert(CI);
-          handleToCI.insert(std::make_pair(completedRecord, s));
+          HandleToCI.insert(std::make_pair(CompletedRecord, s));
         } else {
           // if the handle is already in the map, make sure the map's set of
           // output complete calls that dominate the handle and do not dominate
           // each other gets updated if necessary
           bool CI_is_dominated = false;
-          for (auto ocIt = vIt->second.begin(); ocIt != vIt->second.end();) {
+          for (auto OcIt = vIt->second.begin(); OcIt != vIt->second.end();) {
             // if our new OC CI dominates an OC instruction in the set,
             // then replace the instruction in the set with the new OC CI.
 
-            if (DT.dominates(CI, *ocIt)) {
-              auto cur_it = ocIt++;
+            if (DT.dominates(CI, *OcIt)) {
+              auto cur_it = OcIt++;
               vIt->second.erase(*cur_it);
               continue;
             }
             // Remember if our new CI gets dominated by any CI in the set.
-            if (DT.dominates(*ocIt, CI)) {
+            if (DT.dominates(*OcIt, CI)) {
               CI_is_dominated = true;
               break;
             }
-            ocIt++;
+            OcIt++;
           }
           // if no CI in the set dominates our new CI,
           // the new CI should be added to the set
@@ -5697,14 +5769,14 @@ static void ValidateFlowControl(ValidationContext &ValCtx) {
         }
       }
 
-      for (auto handle_iter = handleToCI.begin(), e = handleToCI.end();
+      for (auto handle_iter = HandleToCI.begin(), e = HandleToCI.end();
            handle_iter != e; handle_iter++) {
         for (auto user_itr = handle_iter->first->user_begin();
              user_itr != handle_iter->first->user_end(); user_itr++) {
           User *pU = *user_itr;
-          Instruction *useInstr = cast<Instruction>(pU);
-          if (useInstr) {
-            if (CallInst *CI = dyn_cast<CallInst>(useInstr)) {
+          Instruction *UseInstr = cast<Instruction>(pU);
+          if (UseInstr) {
+            if (CallInst *CI = dyn_cast<CallInst>(UseInstr)) {
               // if the user is an output complete call that is in the set of
               // OutputComplete calls not dominated by another OutputComplete
               // call for the same handle value, no diagnostics need to be
@@ -5715,15 +5787,15 @@ static void ValidateFlowControl(ValidationContext &ValCtx) {
 
             // make sure any output complete call in the set
             // that dominates this use gets its diagnostic emitted.
-            for (auto ocIt = handle_iter->second.begin();
-                 ocIt != handle_iter->second.end(); ocIt++) {
-              Instruction *ocInstr = cast<Instruction>(*ocIt);
-              if (DT.dominates(ocInstr, useInstr)) {
+            for (auto OcIt = handle_iter->second.begin();
+                 OcIt != handle_iter->second.end(); OcIt++) {
+              Instruction *OcInstr = cast<Instruction>(*OcIt);
+              if (DT.dominates(OcInstr, UseInstr)) {
                 ValCtx.EmitInstrError(
-                    useInstr,
+                    UseInstr,
                     ValidationRule::InstrNodeRecordHandleUseAfterComplete);
                 ValCtx.EmitInstrNote(
-                    *ocIt, "record handle invalidated by OutputComplete");
+                    *OcIt, "record handle invalidated by OutputComplete");
                 break;
               }
             }
@@ -5739,57 +5811,57 @@ static void ValidateFlowControl(ValidationContext &ValCtx) {
 static void ValidateUninitializedOutput(ValidationContext &ValCtx,
                                         Function *F) {
   DxilModule &DM = ValCtx.DxilMod;
-  DxilEntryProps &entryProps = DM.GetDxilEntryProps(F);
+  DxilEntryProps &EntryProps = DM.GetDxilEntryProps(F);
   EntryStatus &Status = ValCtx.GetEntryStatus(F);
-  const DxilFunctionProps &props = entryProps.props;
+  const DxilFunctionProps &Props = EntryProps.props;
   // For HS only need to check Tessfactor which is in patch constant sig.
-  if (props.IsHS()) {
-    std::vector<unsigned> &patchConstOrPrimCols = Status.patchConstOrPrimCols;
-    const DxilSignature &patchConstSig =
-        entryProps.sig.PatchConstOrPrimSignature;
-    for (auto &E : patchConstSig.GetElements()) {
-      unsigned mask = patchConstOrPrimCols[E->GetID()];
-      unsigned requireMask = (1 << E->GetCols()) - 1;
+  if (Props.IsHS()) {
+    std::vector<unsigned> &PatchConstOrPrimCols = Status.patchConstOrPrimCols;
+    const DxilSignature &PatchConstSig =
+        EntryProps.sig.PatchConstOrPrimSignature;
+    for (auto &E : PatchConstSig.GetElements()) {
+      unsigned Mask = PatchConstOrPrimCols[E->GetID()];
+      unsigned RequireMask = (1 << E->GetCols()) - 1;
       // TODO: check other case uninitialized output is allowed.
-      if (mask != requireMask && !E->GetSemantic()->IsArbitrary()) {
+      if (Mask != RequireMask && !E->GetSemantic()->IsArbitrary()) {
         ValCtx.EmitFnFormatError(F, ValidationRule::SmUndefinedOutput,
                                  {E->GetName()});
       }
     }
     return;
   }
-  const DxilSignature &outSig = entryProps.sig.OutputSignature;
-  std::vector<unsigned> &outputCols = Status.outputCols;
-  for (auto &E : outSig.GetElements()) {
-    unsigned mask = outputCols[E->GetID()];
-    unsigned requireMask = (1 << E->GetCols()) - 1;
+  const DxilSignature &OutSig = EntryProps.sig.OutputSignature;
+  std::vector<unsigned> &OutputCols = Status.outputCols;
+  for (auto &E : OutSig.GetElements()) {
+    unsigned Mask = OutputCols[E->GetID()];
+    unsigned RequireMask = (1 << E->GetCols()) - 1;
     // TODO: check other case uninitialized output is allowed.
-    if (mask != requireMask && !E->GetSemantic()->IsArbitrary() &&
+    if (Mask != RequireMask && !E->GetSemantic()->IsArbitrary() &&
         E->GetSemantic()->GetKind() != Semantic::Kind::Target) {
       ValCtx.EmitFnFormatError(F, ValidationRule::SmUndefinedOutput,
                                {E->GetName()});
     }
   }
 
-  if (!props.IsGS()) {
-    unsigned posMask = Status.OutputPositionMask[0];
-    if (posMask != 0xf && Status.hasOutputPosition[0]) {
+  if (!Props.IsGS()) {
+    unsigned PosMask = Status.OutputPositionMask[0];
+    if (PosMask != 0xf && Status.hasOutputPosition[0]) {
       ValCtx.EmitFnError(F, ValidationRule::SmCompletePosition);
     }
   } else {
-    const auto &GS = props.ShaderProps.GS;
-    unsigned streamMask = 0;
-    for (size_t i = 0; i < _countof(GS.streamPrimitiveTopologies); ++i) {
-      if (GS.streamPrimitiveTopologies[i] !=
+    const auto &GS = Props.ShaderProps.GS;
+    unsigned StreamMask = 0;
+    for (size_t I = 0; I < _countof(GS.streamPrimitiveTopologies); ++I) {
+      if (GS.streamPrimitiveTopologies[I] !=
           DXIL::PrimitiveTopology::Undefined) {
-        streamMask |= 1 << i;
+        StreamMask |= 1 << I;
       }
     }
 
-    for (unsigned i = 0; i < DXIL::kNumOutputStreams; i++) {
-      if (streamMask & (1 << i)) {
-        unsigned posMask = Status.OutputPositionMask[i];
-        if (posMask != 0xf && Status.hasOutputPosition[i]) {
+    for (unsigned I = 0; I < DXIL::kNumOutputStreams; I++) {
+      if (StreamMask & (1 << I)) {
+        unsigned PosMask = Status.OutputPositionMask[I];
+        if (PosMask != 0xf && Status.hasOutputPosition[I]) {
           ValCtx.EmitFnError(F, ValidationRule::SmCompletePosition);
         }
       }
diff --git a/lib/HLSL/CMakeLists.txt b/lib/HLSL/CMakeLists.txt
index 947fc4c14f..21bb9523a7 100644
--- a/lib/HLSL/CMakeLists.txt
+++ b/lib/HLSL/CMakeLists.txt
@@ -25,6 +25,7 @@ add_llvm_library(LLVMHLSL
   DxilNoops.cpp
   DxilPreserveAllOutputs.cpp
   DxilRenameResourcesPass.cpp
+  DxilScalarizeVectorLoadStores.cpp
   DxilSimpleGVNHoist.cpp
   DxilSignatureValidation.cpp
   DxilTargetLowering.cpp
diff --git a/lib/HLSL/DxilCondenseResources.cpp b/lib/HLSL/DxilCondenseResources.cpp
index 82d5e14d00..529c203bdc 100644
--- a/lib/HLSL/DxilCondenseResources.cpp
+++ b/lib/HLSL/DxilCondenseResources.cpp
@@ -2061,7 +2061,8 @@ void DxilLowerCreateHandleForLib::ReplaceResourceUserWithHandle(
     };
 
     // Search all users for update counter
-    bool updateAnnotateHandle = res.IsGloballyCoherent();
+    bool updateAnnotateHandle =
+        res.IsGloballyCoherent() || res.IsReorderCoherent();
     if (!res.HasCounter()) {
       for (User *U : handle->users()) {
         if (IsDxilOp(U, hlsl::OP::OpCode::BufferUpdateCounter)) {
@@ -2321,6 +2322,7 @@ void InitTBuffer(const DxilCBuffer *pSource, DxilResource *pDest) {
   pDest->SetSampleCount(0);
   pDest->SetElementStride(0);
   pDest->SetGloballyCoherent(false);
+  pDest->SetReorderCoherent(false);
   pDest->SetHasCounter(false);
   pDest->SetRW(false);
   pDest->SetROV(false);
diff --git a/lib/HLSL/DxilGenerationPass.cpp b/lib/HLSL/DxilGenerationPass.cpp
index 7d902a4ed7..c3a6ad7dfc 100644
--- a/lib/HLSL/DxilGenerationPass.cpp
+++ b/lib/HLSL/DxilGenerationPass.cpp
@@ -88,6 +88,7 @@ void InitResource(const DxilResource *pSource, DxilResource *pDest) {
   pDest->SetSampleCount(pSource->GetSampleCount());
   pDest->SetElementStride(pSource->GetElementStride());
   pDest->SetGloballyCoherent(pSource->IsGloballyCoherent());
+  pDest->SetReorderCoherent(pSource->IsReorderCoherent());
   pDest->SetHasCounter(pSource->HasCounter());
   pDest->SetRW(pSource->IsRW());
   pDest->SetROV(pSource->IsROV());
diff --git a/lib/HLSL/DxilLinker.cpp b/lib/HLSL/DxilLinker.cpp
index 68c83fc037..75d1bf78e9 100644
--- a/lib/HLSL/DxilLinker.cpp
+++ b/lib/HLSL/DxilLinker.cpp
@@ -1247,6 +1247,10 @@ void DxilLinkJob::RunPreparePass(Module &M) {
   PM.add(createDxilReinsertNopsPass());
   PM.add(createAlwaysInlinerPass(/*InsertLifeTime*/ false));
 
+  // If we need SROA and dynamicindexvector to array,
+  // do it early to allow following scalarization to go forward.
+  PM.add(createDxilScalarizeVectorLoadStoresPass());
+
   // Remove unused functions.
   PM.add(createDxilDeadFunctionEliminationPass());
 
@@ -1255,6 +1259,12 @@ void DxilLinkJob::RunPreparePass(Module &M) {
   // For static global handle.
   PM.add(createLowerStaticGlobalIntoAlloca());
 
+  // Change dynamic indexing vector to array where vectors aren't
+  // supported, but might be there from the initial compile.
+  if (!pSM->IsSM69Plus())
+    PM.add(
+        createDynamicIndexingVectorToArrayPass(false /* ReplaceAllVector */));
+
   // Remove MultiDimArray from function call arg.
   PM.add(createMultiDimArrayToOneDimArrayPass());
 
diff --git a/lib/HLSL/DxilPatchShaderRecordBindings.cpp b/lib/HLSL/DxilPatchShaderRecordBindings.cpp
index 1873dcbcc4..e07a41a5c0 100644
--- a/lib/HLSL/DxilPatchShaderRecordBindings.cpp
+++ b/lib/HLSL/DxilPatchShaderRecordBindings.cpp
@@ -341,6 +341,7 @@ unsigned int DxilPatchShaderRecordBindings::AddHandle(
 
   if (pHandle) {
     pHandle->SetGloballyCoherent(false);
+    pHandle->SetReorderCoherent(false);
     pHandle->SetHasCounter(false);
     pHandle->SetCompType(CompType::getF32()); // TODO: Need to handle all types
   }
diff --git a/lib/HLSL/DxilScalarizeVectorLoadStores.cpp b/lib/HLSL/DxilScalarizeVectorLoadStores.cpp
new file mode 100644
index 0000000000..febcf32358
--- /dev/null
+++ b/lib/HLSL/DxilScalarizeVectorLoadStores.cpp
@@ -0,0 +1,231 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilScalarizeVectorLoadStores.cpp                                         //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Lowers native vector load stores to potentially multiple scalar calls.    //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/DXIL/DxilInstructions.h"
+#include "dxc/DXIL/DxilModule.h"
+#include "dxc/HLSL/DxilGenerationPass.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+using namespace hlsl;
+
+static void scalarizeVectorLoad(hlsl::OP *HlslOP, const DataLayout &DL,
+                                CallInst *CI);
+static void scalarizeVectorStore(hlsl::OP *HlslOP, const DataLayout &DL,
+                                 CallInst *CI);
+
+class DxilScalarizeVectorLoadStores : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilScalarizeVectorLoadStores() : ModulePass(ID) {}
+
+  StringRef getPassName() const override {
+    return "DXIL scalarize vector load/stores";
+  }
+
+  bool runOnModule(Module &M) override {
+    DxilModule &DM = M.GetOrCreateDxilModule();
+    // Shader Model 6.9 allows native vectors and doesn't need this pass.
+    if (DM.GetShaderModel()->IsSM69Plus())
+      return false;
+
+    bool Changed = false;
+
+    hlsl::OP *HlslOP = DM.GetOP();
+    for (auto FIt : HlslOP->GetOpFuncList(DXIL::OpCode::RawBufferVectorLoad)) {
+      Function *Func = FIt.second;
+      if (!Func)
+        continue;
+      for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) {
+        CallInst *CI = cast<CallInst>(*(U++));
+        scalarizeVectorLoad(HlslOP, M.getDataLayout(), CI);
+        Changed = true;
+      }
+    }
+    for (auto FIt : HlslOP->GetOpFuncList(DXIL::OpCode::RawBufferVectorStore)) {
+      Function *Func = FIt.second;
+      if (!Func)
+        continue;
+      for (auto U = Func->user_begin(), UE = Func->user_end(); U != UE;) {
+        CallInst *CI = cast<CallInst>(*(U++));
+        scalarizeVectorStore(HlslOP, M.getDataLayout(), CI);
+        Changed = true;
+      }
+    }
+    return Changed;
+  }
+};
+
+static unsigned GetRawBufferMask(unsigned NumComponents) {
+  switch (NumComponents) {
+  case 0:
+    return 0;
+  case 1:
+    return DXIL::kCompMask_X;
+  case 2:
+    return DXIL::kCompMask_X | DXIL::kCompMask_Y;
+  case 3:
+    return DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z;
+  case 4:
+  default:
+    return DXIL::kCompMask_All;
+  }
+  return DXIL::kCompMask_All;
+}
+
+static void scalarizeVectorLoad(hlsl::OP *HlslOP, const DataLayout &DL,
+                                CallInst *CI) {
+  IRBuilder<> Builder(CI);
+  // Collect the information required to break this into scalar ops from args.
+  DxilInst_RawBufferVectorLoad VecLd(CI);
+  OP::OpCode OpCode = OP::OpCode::RawBufferLoad;
+  llvm::Constant *OpArg = Builder.getInt32((unsigned)OpCode);
+  SmallVector<Value *, 10> Args;
+  Args.emplace_back(OpArg);                     // opcode @0.
+  Args.emplace_back(VecLd.get_buf());           // Resource handle @1.
+  Args.emplace_back(VecLd.get_index());         // Index @2.
+  Args.emplace_back(VecLd.get_elementOffset()); // Offset @3.
+  Args.emplace_back(nullptr);                   // Mask to be set later @4.
+  Args.emplace_back(VecLd.get_alignment());     // Alignment @5.
+
+  // Set offset to increment depending on whether the real offset is defined.
+  unsigned OffsetIdx;
+  if (isa<UndefValue>(VecLd.get_elementOffset()))
+    // Byte Address Buffers can't use offset, so use index.
+    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx;
+  else
+    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx;
+
+  StructType *ResRetTy = cast<StructType>(CI->getType());
+  Type *Ty = ResRetTy->getElementType(0);
+  unsigned NumComponents = Ty->getVectorNumElements();
+  Type *EltTy = Ty->getScalarType();
+  unsigned EltSize = DL.getTypeAllocSize(EltTy);
+
+  const unsigned MaxElemCount = 4;
+  SmallVector<Value *, 4> Elts(NumComponents);
+  Value *Ld = nullptr;
+  for (unsigned EIx = 0; EIx < NumComponents;) {
+    // Load 4 elements or however many less than 4 are left to load.
+    unsigned ChunkSize = std::min(NumComponents - EIx, MaxElemCount);
+    Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
+        HlslOP->GetI8Const(GetRawBufferMask(ChunkSize));
+    // If we've loaded a chunk already, update offset to next chunk.
+    if (EIx > 0)
+      Args[OffsetIdx] =
+          Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize));
+    Function *F = HlslOP->GetOpFunc(OpCode, EltTy);
+    Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(OpCode));
+    for (unsigned ChIx = 0; ChIx < ChunkSize; ChIx++, EIx++)
+      Elts[EIx] = Builder.CreateExtractValue(Ld, ChIx);
+  }
+
+  Value *RetValNew = UndefValue::get(VectorType::get(EltTy, NumComponents));
+  for (unsigned ElIx = 0; ElIx < NumComponents; ElIx++)
+    RetValNew = Builder.CreateInsertElement(RetValNew, Elts[ElIx], ElIx);
+
+  // Replace users of the vector extracted from the vector load resret.
+  Value *Status = nullptr;
+  for (auto CU = CI->user_begin(), CE = CI->user_end(); CU != CE;) {
+    auto EV = cast<ExtractValueInst>(*(CU++));
+    unsigned Ix = EV->getIndices()[0];
+    if (Ix == 0) {
+      // Handle value uses.
+      EV->replaceAllUsesWith(RetValNew);
+    } else if (Ix == 1) {
+      // Handle status uses.
+      if (!Status)
+        Status = Builder.CreateExtractValue(Ld, DXIL::kResRetStatusIndex);
+      EV->replaceAllUsesWith(Status);
+    }
+    EV->eraseFromParent();
+  }
+  CI->eraseFromParent();
+}
+
+static void scalarizeVectorStore(hlsl::OP *HlslOP, const DataLayout &DL,
+                                 CallInst *CI) {
+  IRBuilder<> Builder(CI);
+  // Collect the information required to break this into scalar ops from args.
+  DxilInst_RawBufferVectorStore VecSt(CI);
+  OP::OpCode OpCode = OP::OpCode::RawBufferStore;
+  llvm::Constant *OpArg = Builder.getInt32((unsigned)OpCode);
+  SmallVector<Value *, 10> Args;
+  Args.emplace_back(OpArg);                     // opcode @0.
+  Args.emplace_back(VecSt.get_uav());           // Resource handle @1.
+  Args.emplace_back(VecSt.get_index());         // Index @2.
+  Args.emplace_back(VecSt.get_elementOffset()); // Offset @3.
+  Args.emplace_back(nullptr);                   // Val0 to be set later @4.
+  Args.emplace_back(nullptr);                   // Val1 to be set later @5.
+  Args.emplace_back(nullptr);                   // Val2 to be set later @6.
+  Args.emplace_back(nullptr);                   // Val3 to be set later @7.
+  Args.emplace_back(nullptr);                   // Mask to be set later @8.
+  Args.emplace_back(VecSt.get_alignment());     // Alignment @9.
+
+  // Set offset to increment depending on whether the real offset is defined.
+  unsigned OffsetIdx;
+  if (isa<UndefValue>(VecSt.get_elementOffset()))
+    // Byte Address Buffers can't use offset, so use index.
+    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx;
+  else
+    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx;
+
+  Value *VecVal = VecSt.get_value0();
+
+  const unsigned MaxElemCount = 4;
+  Type *Ty = VecVal->getType();
+  const unsigned NumComponents = Ty->getVectorNumElements();
+  Type *EltTy = Ty->getScalarType();
+  Value *UndefVal = UndefValue::get(EltTy);
+  unsigned EltSize = DL.getTypeAllocSize(EltTy);
+  Function *F = HlslOP->GetOpFunc(OpCode, EltTy);
+  for (unsigned EIx = 0; EIx < NumComponents;) {
+    // Store 4 elements or however many less than 4 are left to store.
+    unsigned ChunkSize = std::min(NumComponents - EIx, MaxElemCount);
+    // For second and subsequent store calls, increment the resource-appropriate
+    // index or offset parameter.
+    if (EIx > 0)
+      Args[OffsetIdx] =
+          Builder.CreateAdd(Args[OffsetIdx], HlslOP->GetU32Const(4 * EltSize));
+    // Populate all value arguments either with the vector or undefs.
+    uint8_t Mask = 0;
+    unsigned ChIx = 0;
+    for (; ChIx < ChunkSize; ChIx++, EIx++) {
+      Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] =
+          Builder.CreateExtractElement(VecVal, EIx);
+      Mask |= (1 << ChIx);
+    }
+    for (; ChIx < MaxElemCount; ChIx++)
+      Args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx + ChIx] = UndefVal;
+
+    Args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] =
+        HlslOP->GetU8Const(Mask);
+    Builder.CreateCall(F, Args);
+  }
+  CI->eraseFromParent();
+}
+
+char DxilScalarizeVectorLoadStores::ID = 0;
+
+ModulePass *llvm::createDxilScalarizeVectorLoadStoresPass() {
+  return new DxilScalarizeVectorLoadStores();
+}
+
+INITIALIZE_PASS(DxilScalarizeVectorLoadStores,
+                "hlsl-dxil-scalarize-vector-load-stores",
+                "DXIL scalarize vector load/stores", false, false)
diff --git a/lib/HLSL/HLMatrixBitcastLowerPass.cpp b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
index 93ba3b9816..db20d8a324 100644
--- a/lib/HLSL/HLMatrixBitcastLowerPass.cpp
+++ b/lib/HLSL/HLMatrixBitcastLowerPass.cpp
@@ -76,6 +76,7 @@ Type *TryLowerMatTy(Type *Ty) {
 }
 
 class MatrixBitcastLowerPass : public FunctionPass {
+  bool SupportsVectors = false;
 
 public:
   static char ID; // Pass identification, replacement for typeid
@@ -83,6 +84,9 @@ class MatrixBitcastLowerPass : public FunctionPass {
 
   StringRef getPassName() const override { return "Matrix Bitcast lower"; }
   bool runOnFunction(Function &F) override {
+    DxilModule &DM = F.getParent()->GetOrCreateDxilModule();
+    SupportsVectors = DM.GetShaderModel()->IsSM69Plus();
+
     bool bUpdated = false;
     std::unordered_set<BitCastInst *> matCastSet;
     for (auto blkIt = F.begin(); blkIt != F.end(); ++blkIt) {
@@ -100,7 +104,6 @@ class MatrixBitcastLowerPass : public FunctionPass {
       }
     }
 
-    DxilModule &DM = F.getParent()->GetOrCreateDxilModule();
     // Remove bitcast which has CallInst user.
     if (DM.GetShaderModel()->IsLib()) {
       for (auto it = matCastSet.begin(); it != matCastSet.end();) {
@@ -185,7 +188,7 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) {
     User *U = *(it++);
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
       Type *EltTy = GEP->getType()->getPointerElementType();
-      if (HLMatrixType::isa(EltTy)) {
+      if (HLMatrixType MatTy = HLMatrixType::dyn_cast(EltTy)) {
         // Change gep matrixArray, 0, index
         // into
         //   gep oneDimArray, 0, index * matSize
@@ -193,10 +196,11 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) {
         SmallVector<Value *, 2> idxList(GEP->idx_begin(), GEP->idx_end());
         DXASSERT(idxList.size() == 2,
                  "else not one dim matrix array index to matrix");
-
-        HLMatrixType MatTy = HLMatrixType::cast(EltTy);
-        Value *matSize = Builder.getInt32(MatTy.getNumElements());
-        idxList.back() = Builder.CreateMul(idxList.back(), matSize);
+        unsigned NumElts = MatTy.getNumElements();
+        if (!SupportsVectors || NumElts == 1) {
+          Value *MatSize = Builder.getInt32(NumElts);
+          idxList.back() = Builder.CreateMul(idxList.back(), MatSize);
+        }
         Value *NewGEP = Builder.CreateGEP(A, idxList);
         lowerMatrix(GEP, NewGEP);
         DXASSERT(GEP->user_empty(), "else lower matrix fail");
@@ -211,13 +215,23 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) {
     } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
       if (VectorType *Ty = dyn_cast<VectorType>(LI->getType())) {
         IRBuilder<> Builder(LI);
-        Value *zeroIdx = Builder.getInt32(0);
-        unsigned vecSize = Ty->getNumElements();
-        Value *NewVec = UndefValue::get(LI->getType());
-        for (unsigned i = 0; i < vecSize; i++) {
-          Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder);
-          Value *Elt = Builder.CreateLoad(GEP);
-          NewVec = Builder.CreateInsertElement(NewVec, Elt, i);
+        Value *NewVec = nullptr;
+        unsigned VecSize = Ty->getVectorNumElements();
+        if (SupportsVectors && VecSize > 1) {
+          // Create a replacement load using the vector pointer.
+          Instruction *NewLd = LI->clone();
+          unsigned VecIdx = NewLd->getNumOperands() - 1;
+          NewLd->setOperand(VecIdx, A);
+          Builder.Insert(NewLd);
+          NewVec = NewLd;
+        } else {
+          Value *zeroIdx = Builder.getInt32(0);
+          NewVec = UndefValue::get(LI->getType());
+          for (unsigned i = 0; i < VecSize; i++) {
+            Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder);
+            Value *Elt = Builder.CreateLoad(GEP);
+            NewVec = Builder.CreateInsertElement(NewVec, Elt, i);
+          }
         }
         LI->replaceAllUsesWith(NewVec);
         LI->eraseFromParent();
@@ -228,12 +242,20 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) {
       Value *V = ST->getValueOperand();
       if (VectorType *Ty = dyn_cast<VectorType>(V->getType())) {
         IRBuilder<> Builder(LI);
-        Value *zeroIdx = Builder.getInt32(0);
-        unsigned vecSize = Ty->getNumElements();
-        for (unsigned i = 0; i < vecSize; i++) {
-          Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder);
-          Value *Elt = Builder.CreateExtractElement(V, i);
-          Builder.CreateStore(Elt, GEP);
+        if (SupportsVectors && Ty->getVectorNumElements() > 1) {
+          // Create a replacement store using the vector pointer.
+          Instruction *NewSt = ST->clone();
+          unsigned VecIdx = NewSt->getNumOperands() - 1;
+          NewSt->setOperand(VecIdx, A);
+          Builder.Insert(NewSt);
+        } else {
+          Value *zeroIdx = Builder.getInt32(0);
+          unsigned vecSize = Ty->getNumElements();
+          for (unsigned i = 0; i < vecSize; i++) {
+            Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder);
+            Value *Elt = Builder.CreateExtractElement(V, i);
+            Builder.CreateStore(Elt, GEP);
+          }
         }
         ST->eraseFromParent();
       } else {
diff --git a/lib/HLSL/HLModule.cpp b/lib/HLSL/HLModule.cpp
index 037885c9d8..bab6e23a30 100644
--- a/lib/HLSL/HLModule.cpp
+++ b/lib/HLSL/HLModule.cpp
@@ -604,6 +604,9 @@ MDTuple *HLModule::EmitHLResources() {
 
 void HLModule::LoadHLResources(const llvm::MDOperand &MDO) {
   const llvm::MDTuple *pSRVs, *pUAVs, *pCBuffers, *pSamplers;
+  // No resources. Nothing to do.
+  if (MDO.get() == nullptr)
+    return;
   m_pMDHelper->GetDxilResources(MDO, pSRVs, pUAVs, pCBuffers, pSamplers);
 
   // Load SRV records.
@@ -697,6 +700,7 @@ HLModule::AddResourceWithGlobalVariableAndProps(llvm::Constant *GV,
     Res->SetRW(true);
     Res->SetROV(RP.Basic.IsROV);
     Res->SetGloballyCoherent(RP.Basic.IsGloballyCoherent);
+    Res->SetReorderCoherent(RP.Basic.IsReorderCoherent);
     Res->SetHasCounter(RP.Basic.SamplerCmpOrHasCounter);
     Res->SetKind(RK);
     Res->SetGlobalSymbol(GV);
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index bc293357d6..be45021e41 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -7,8 +7,12 @@
 //                                                                           //
 // Lower functions to lower HL operations to DXIL operations.                //
 //                                                                           //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.              //
+// All rights reserved.                                                      //
+//                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
 
+#include "dxc/DXIL/DxilConstants.h"
 #define _USE_MATH_DEFINES
 #include <array>
 #include <cmath>
@@ -421,6 +425,14 @@ struct IntrinsicLower {
 // IOP intrinsics.
 namespace {
 
+// Creates the necessary scalar calls to for a "trivial" operation where only
+// call instructions to a single function type are needed.
+// The overload type `Ty` determines what scalarization might be required.
+// Elements of any vectors in `refArgs` are extracted  into scalars for each
+// call generated while the same scalar values are used unaltered in each call.
+// Utility objects `HlslOp` and `Builder` are used to generate calls to the
+// given `DxilFunc` for each set of scalar arguments.
+// The results are reconstructed into the given `RetTy` as needed.
 Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode,
                             ArrayRef<Value *> refArgs, Type *Ty, Type *RetTy,
                             OP *hlslOP, IRBuilder<> &Builder) {
@@ -456,12 +468,40 @@ Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode,
     }
   }
 }
-// Generates a DXIL operation over an overloaded type (Ty), returning a
-// RetTy value; when Ty is a vector, it will replicate per-element operations
-// into RetTy to rebuild it.
+
+// Creates a native vector call to for a "trivial" operation where only a single
+// call instruction is needed. The overload and return types are the same vector
+// type `Ty`.
+// Utility objects `HlslOp` and `Builder` are used to create a call to the given
+// `DxilFunc` with `RefArgs` arguments.
+Value *TrivialDxilVectorOperation(Function *Func, OP::OpCode Opcode,
+                                  ArrayRef<Value *> Args, Type *Ty, OP *OP,
+                                  IRBuilder<> &Builder) {
+  if (!Ty->isVoidTy())
+    return Builder.CreateCall(Func, Args, OP->GetOpCodeName(Opcode));
+  return Builder.CreateCall(Func, Args); // Cannot add name to void.
+}
+
+// Generates a DXIL operation with the overloaded type based on `Ty` and return
+// type `RetTy`. When Ty is a vector, it will either generate per-element calls
+// for each vector element and reconstruct the vector type from those results or
+// operate on and return native vectors depending on vector size and the
+// legality of the vector overload.
 Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
                             Type *Ty, Type *RetTy, OP *hlslOP,
                             IRBuilder<> &Builder) {
+
+  // If supported and the overload type is a vector with more than 1 element,
+  // create a native vector operation.
+  if (Ty->isVectorTy() && Ty->getVectorNumElements() > 1 &&
+      hlslOP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() &&
+      OP::IsOverloadLegal(opcode, Ty)) {
+    Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
+    return TrivialDxilVectorOperation(dxilFunc, opcode, refArgs, Ty, hlslOP,
+                                      Builder);
+  }
+
+  // Set overload type to the scalar type of `Ty` and generate call(s).
   Type *EltTy = Ty->getScalarType();
   Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy);
 
@@ -481,20 +521,34 @@ Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
   return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B);
 }
 
-Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy,
-                                    hlsl::OP *hlslOP, IRBuilder<> &Builder) {
-  Type *Ty = src->getType();
+// Translate call that converts to a dxil unary operation with a different
+// return type from the overload by passing the argument, explicit return type,
+// and helper objects to the scalarizing unary dxil operation creation.
+Value *TrivialUnaryOperationRet(CallInst *CI, IntrinsicOp IOP,
+                                OP::OpCode OpCode,
+                                HLOperationLowerHelper &Helper,
+                                HLObjectOperationLowerHelper *,
+                                bool &Translated) {
+  Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
+  Type *Ty = Src->getType();
 
-  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
-  Value *args[] = {opArg, src};
+  IRBuilder<> Builder(CI);
+  hlsl::OP *OP = &Helper.hlslOP;
+  Type *RetTy = CI->getType();
+  Constant *OpArg = OP->GetU32Const((unsigned)OpCode);
+  Value *Args[] = {OpArg, Src};
 
-  return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
+  return TrivialDxilOperation(OpCode, Args, Ty, RetTy, OP, Builder);
 }
 
-Value *TrivialDxilUnaryOperation(OP::OpCode opcode, Value *src,
-                                 hlsl::OP *hlslOP, IRBuilder<> &Builder) {
-  return TrivialDxilUnaryOperationRet(opcode, src, src->getType(), hlslOP,
-                                      Builder);
+Value *TrivialDxilUnaryOperation(OP::OpCode OpCode, Value *Src, hlsl::OP *Op,
+                                 IRBuilder<> &Builder) {
+  Type *Ty = Src->getType();
+
+  Constant *OpArg = Op->GetU32Const((unsigned)OpCode);
+  Value *Args[] = {OpArg, Src};
+
+  return TrivialDxilOperation(OpCode, Args, Ty, Ty, Op, Builder);
 }
 
 Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
@@ -518,6 +572,9 @@ Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
   return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
 }
 
+// Translate call that trivially converts to a dxil unary operation by passing
+// argument, return type, and helper objects to either scalarizing or native
+// vector dxil operation creation depending on version and vector size.
 Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                              HLOperationLowerHelper &helper,
                              HLObjectOperationLowerHelper *pObjHelper,
@@ -525,11 +582,13 @@ Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
   IRBuilder<> Builder(CI);
   hlsl::OP *hlslOP = &helper.hlslOP;
-  Value *retVal = TrivialDxilUnaryOperationRet(opcode, src0, CI->getType(),
-                                               hlslOP, Builder);
-  return retVal;
+
+  return TrivialDxilUnaryOperation(opcode, src0, hlslOP, Builder);
 }
 
+// Translate call that trivially converts to a dxil binary operation by passing
+// arguments, return type, and helper objects to either scalarizing or native
+// vector dxil operation creation depending on version and vector size.
 Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                               HLOperationLowerHelper &helper,
                               HLObjectOperationLowerHelper *pObjHelper,
@@ -544,6 +603,10 @@ Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   return binOp;
 }
 
+// Translate call that trivially converts to a dxil trinary (aka tertiary)
+// operation by passing arguments, return type, and helper objects to either
+// scalarizing or native vector dxil operation creation depending on version
+// and vector size.
 Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                                HLOperationLowerHelper &helper,
                                HLObjectOperationLowerHelper *pObjHelper,
@@ -735,6 +798,12 @@ bool CanUseFxcMulOnlyPatternForPow(IRBuilder<> &Builder, Value *x, Value *pow,
     }
   }
 
+  // Only apply on aggregates of 16 or fewer elements,
+  // representing the max 4x4 matrix size.
+  Type *Ty = x->getType();
+  if (Ty->isVectorTy() && Ty->getVectorNumElements() > 16)
+    return false;
+
   APFloat powAPF = isa<ConstantDataVector>(pow)
                        ? cast<ConstantDataVector>(pow)->getElementAsAPFloat(0)
                        : // should be a splat value
@@ -2016,7 +2085,7 @@ Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                            HLObjectOperationLowerHelper *pObjHelper,
                            bool &Translated) {
   Value *firstbitHi =
-      TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
+      TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated);
   // firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi);
   IRBuilder<> Builder(CI);
   Constant *neg1 = Builder.getInt32(-1);
@@ -2049,7 +2118,7 @@ Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                            HLObjectOperationLowerHelper *pObjHelper,
                            bool &Translated) {
   Value *firstbitLo =
-      TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
+      TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated);
   return firstbitLo;
 }
 
@@ -2428,17 +2497,22 @@ Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, Value *src1,
   return dotOP;
 }
 
-Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize,
-                     hlsl::OP *hlslOP, IRBuilder<> &Builder,
-                     bool Unsigned = false) {
-  auto madOpCode = Unsigned ? DXIL::OpCode::UMad : DXIL::OpCode::IMad;
+// Instead of using a DXIL intrinsic, implement a dot product operation using
+// multiply and add operations. Used for integer dots and long vectors.
+Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP,
+                 IRBuilder<> &Builder,
+                 DXIL::OpCode MadOpCode = DXIL::OpCode::IMad) {
   Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0);
   Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0);
-  Value *Result = Builder.CreateMul(Elt0, Elt1);
-  for (unsigned iVecElt = 1; iVecElt < vecSize; ++iVecElt) {
-    Elt0 = Builder.CreateExtractElement(arg0, iVecElt);
-    Elt1 = Builder.CreateExtractElement(arg1, iVecElt);
-    Result = TrivialDxilTrinaryOperation(madOpCode, Elt0, Elt1, Result, hlslOP,
+  Value *Result;
+  if (Elt0->getType()->isFloatingPointTy())
+    Result = Builder.CreateFMul(Elt0, Elt1);
+  else
+    Result = Builder.CreateMul(Elt0, Elt1);
+  for (unsigned Elt = 1; Elt < vecSize; ++Elt) {
+    Elt0 = Builder.CreateExtractElement(arg0, Elt);
+    Elt1 = Builder.CreateExtractElement(arg1, Elt);
+    Result = TrivialDxilTrinaryOperation(MadOpCode, Elt0, Elt1, Result, hlslOP,
                                          Builder);
   }
 
@@ -2477,12 +2551,16 @@ Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   unsigned vecSize = Ty->getVectorNumElements();
   Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
   IRBuilder<> Builder(CI);
-  if (Ty->getScalarType()->isFloatingPointTy()) {
+  Type *EltTy = Ty->getScalarType();
+  if (EltTy->isFloatingPointTy() && Ty->getVectorNumElements() <= 4)
     return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
-  } else {
-    return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder,
-                         IOP == IntrinsicOp::IOP_udot);
-  }
+
+  DXIL::OpCode MadOpCode = DXIL::OpCode::IMad;
+  if (IOP == IntrinsicOp::IOP_udot)
+    MadOpCode = DXIL::OpCode::UMad;
+  else if (EltTy->isFloatingPointTy())
+    MadOpCode = DXIL::OpCode::FMad;
+  return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode);
 }
 
 Value *TranslateNormalize(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -2985,23 +3063,6 @@ static Value *ScalarizeResRet(Type *RetTy, Value *ResRet,
   return retVal;
 }
 
-static Value *ScalarizeElements(Type *RetTy, ArrayRef<Value *> Elts,
-                                IRBuilder<> &Builder) {
-  // Extract value part.
-  Value *retVal = llvm::UndefValue::get(RetTy);
-  if (RetTy->isVectorTy()) {
-    unsigned vecSize = RetTy->getVectorNumElements();
-    DXASSERT(vecSize <= Elts.size(), "vector size mismatch");
-    for (unsigned i = 0; i < vecSize; i++) {
-      Value *retComp = Elts[i];
-      retVal = Builder.CreateInsertElement(retVal, retComp, i);
-    }
-  } else {
-    retVal = Elts[0];
-  }
-  return retVal;
-}
-
 void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder,
                   hlsl::OP *hlslOp) {
   if (status && !isa<UndefValue>(status)) {
@@ -3046,8 +3107,10 @@ Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
       if (arg0Ty->getScalarType()->isFloatingPointTy()) {
         return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
       } else {
-        return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder,
-                             IOP == IntrinsicOp::IOP_umul);
+        DXIL::OpCode MadOpCode = DXIL::OpCode::IMad;
+        if (IOP == IntrinsicOp::IOP_umul)
+          MadOpCode = DXIL::OpCode::UMad;
+        return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode);
       }
     } else {
       // mul(vector, scalar) == vector * scalar-splat
@@ -3941,14 +4004,41 @@ TranslateWriteSamplerFeedback(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 }
 
 // Load/Store intrinsics.
+OP::OpCode LoadOpFromResKind(DxilResource::Kind RK) {
+  switch (RK) {
+  case DxilResource::Kind::RawBuffer:
+  case DxilResource::Kind::StructuredBuffer:
+    return OP::OpCode::RawBufferLoad;
+  case DxilResource::Kind::TypedBuffer:
+    return OP::OpCode::BufferLoad;
+  case DxilResource::Kind::Invalid:
+    DXASSERT(0, "invalid resource kind");
+    break;
+  default:
+    return OP::OpCode::TextureLoad;
+  }
+  return OP::OpCode::TextureLoad;
+}
+
 struct ResLoadHelper {
+  // Default constructor uses CI load intrinsic call
+  //  to get the retval and various location indicators.
   ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
-                Value *h, IntrinsicOp IOP, bool bForSubscript = false);
-  // For double subscript.
-  ResLoadHelper(Instruction *ldInst, Value *h, Value *idx, Value *mip)
-      : opcode(OP::OpCode::TextureLoad),
-        intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(ldInst),
-        addr(idx), offset(nullptr), status(nullptr), mipLevel(mip) {}
+                Value *h, IntrinsicOp IOP, LoadInst *TyBufSubLoad = nullptr);
+  // Alternative constructor explicitly sets the index.
+  // Used for some subscript operators that feed the generic HL call inst
+  // into a load op and by the matrixload call instruction.
+  ResLoadHelper(Instruction *Inst, DxilResource::Kind RK, Value *h, Value *idx,
+                Value *Offset, Value *mip = nullptr)
+      : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst),
+        addr(idx), offset(Offset), status(nullptr), mipLevel(mip) {
+    opcode = LoadOpFromResKind(RK);
+    Type *Ty = Inst->getType();
+    if (opcode == OP::OpCode::RawBufferLoad && Ty->isVectorTy() &&
+        Ty->getVectorNumElements() > 1 &&
+        Inst->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus())
+      opcode = OP::OpCode::RawBufferVectorLoad;
+  }
   OP::OpCode opcode;
   IntrinsicOp intrinsicOpCode;
   unsigned dxilMajor;
@@ -3961,122 +4051,93 @@ struct ResLoadHelper {
   Value *mipLevel;
 };
 
+// Uses CI arguments to determine the index, offset, and mipLevel also depending
+// on the RK/RC resource kind and class, which determine the opcode.
+// Handle and IOP are set explicitly.
+// For typed buffer loads, the call instruction feeds into a load
+// represented by TyBufSubLoad which determines the instruction to replace.
+// Otherwise, CI is replaced.
 ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
                              DxilResourceBase::Class RC, Value *hdl,
-                             IntrinsicOp IOP, bool bForSubscript)
+                             IntrinsicOp IOP, LoadInst *TyBufSubLoad)
     : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) {
-  switch (RK) {
-  case DxilResource::Kind::RawBuffer:
-  case DxilResource::Kind::StructuredBuffer:
-    opcode = OP::OpCode::RawBufferLoad;
-    break;
-  case DxilResource::Kind::TypedBuffer:
-    opcode = OP::OpCode::BufferLoad;
-    break;
-  case DxilResource::Kind::Invalid:
-    DXASSERT(0, "invalid resource kind");
-    break;
-  default:
-    opcode = OP::OpCode::TextureLoad;
-    break;
-  }
-  retVal = CI;
+  opcode = LoadOpFromResKind(RK);
+  bool bForSubscript = false;
+  if (TyBufSubLoad) {
+    bForSubscript = true;
+    retVal = TyBufSubLoad;
+  } else
+    retVal = CI;
   const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx;
   addr = CI->getArgOperand(kAddrIdx);
   unsigned argc = CI->getNumArgOperands();
+  Type *i32Ty = Type::getInt32Ty(CI->getContext());
+  unsigned StatusIdx = HLOperandIndex::kBufLoadStatusOpIdx;
+  unsigned OffsetIdx = HLOperandIndex::kInvalidIdx;
 
   if (opcode == OP::OpCode::TextureLoad) {
-    // mip at last channel
-    unsigned coordSize = DxilResource::GetNumCoords(RK);
-
-    if (RC == DxilResourceBase::Class::SRV) {
-      if (bForSubscript) {
-        // Use 0 when access by [].
-        mipLevel = IRBuilder<>(CI).getInt32(0);
-      } else {
-        if (coordSize == 1 && !addr->getType()->isVectorTy()) {
-          // Use addr when access by Load.
-          mipLevel = addr;
-        } else {
-          mipLevel = IRBuilder<>(CI).CreateExtractElement(addr, coordSize);
-        }
-      }
-    } else {
-      // Set mip level to undef for UAV.
-      mipLevel = UndefValue::get(Type::getInt32Ty(addr->getContext()));
-    }
-
-    if (RC == DxilResourceBase::Class::SRV) {
-      unsigned offsetIdx = HLOperandIndex::kTexLoadOffsetOpIdx;
-      unsigned statusIdx = HLOperandIndex::kTexLoadStatusOpIdx;
-      if (RK == DxilResource::Kind::Texture2DMS ||
-          RK == DxilResource::Kind::Texture2DMSArray) {
-        offsetIdx = HLOperandIndex::kTex2DMSLoadOffsetOpIdx;
-        statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx;
+    bool IsMS = (RK == DxilResource::Kind::Texture2DMS ||
+                 RK == DxilResource::Kind::Texture2DMSArray);
+    // Set mip and status index.
+    offset = UndefValue::get(i32Ty);
+    if (IsMS) {
+      // Retrieve appropriate MS parameters.
+      StatusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx;
+      // MS textures keep the sample param (mipLevel) regardless of writability.
+      if (bForSubscript)
+        mipLevel = ConstantInt::get(i32Ty, 0);
+      else
         mipLevel =
             CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx);
-      }
-
-      if (argc > offsetIdx)
-        offset = CI->getArgOperand(offsetIdx);
-
-      if (argc > statusIdx)
-        status = CI->getArgOperand(statusIdx);
-    } else if (RC == DxilResourceBase::Class::UAV &&
-               (RK == DxilResource::Kind::Texture2DMS ||
-                RK == DxilResource::Kind::Texture2DMSArray)) {
-      unsigned statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx;
-      mipLevel = CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx);
-
-      if (argc > statusIdx)
-        status = CI->getArgOperand(statusIdx);
-
+    } else if (RC == DxilResourceBase::Class::UAV) {
+      // DXIL requires that non-MS UAV accesses set miplevel to undef.
+      mipLevel = UndefValue::get(i32Ty);
+      StatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx;
     } else {
-      const unsigned kStatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx;
-
-      if (argc > kStatusIdx)
-        status = CI->getArgOperand(kStatusIdx);
+      // Non-MS SRV case.
+      StatusIdx = HLOperandIndex::kTexLoadStatusOpIdx;
+      if (bForSubscript)
+        // Having no miplevel param, single subscripted SRVs default to 0.
+        mipLevel = ConstantInt::get(i32Ty, 0);
+      else
+        // Mip is stored at the last channel of the coordinate vector.
+        mipLevel = IRBuilder<>(CI).CreateExtractElement(
+            addr, DxilResource::GetNumCoords(RK));
     }
-  } else {
-    const unsigned kStatusIdx = HLOperandIndex::kBufLoadStatusOpIdx;
-    if (argc > kStatusIdx)
-      status = CI->getArgOperand(kStatusIdx);
-  }
+    if (RC == DxilResourceBase::Class::SRV)
+      OffsetIdx = IsMS ? HLOperandIndex::kTex2DMSLoadOffsetOpIdx
+                       : HLOperandIndex::kTexLoadOffsetOpIdx;
+  } else if (opcode == OP::OpCode::RawBufferLoad) {
+    // If native vectors are available and this load had a vector
+    // with more than one elements, convert the RawBufferLod to the
+    // native vector variant RawBufferVectorLoad.
+    Type *Ty = CI->getType();
+    if (Ty->isVectorTy() && Ty->getVectorNumElements() > 1 &&
+        CI->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus())
+      opcode = OP::OpCode::RawBufferVectorLoad;
+  }
+
+  // Set offset.
+  if (DXIL::IsStructuredBuffer(RK))
+    // Structured buffers receive no exterior offset in this constructor,
+    // but may need to increment it later.
+    offset = ConstantInt::get(i32Ty, 0U);
+  else if (argc > OffsetIdx)
+    // Textures may set the offset from an explicit argument.
+    offset = CI->getArgOperand(OffsetIdx);
+  else
+    // All other cases use undef.
+    offset = UndefValue::get(i32Ty);
+
+  // Retrieve status value if provided.
+  if (argc > StatusIdx)
+    status = CI->getArgOperand(StatusIdx);
 }
 
 void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
                                  hlsl::OP *OP, HLResource::Kind RK,
                                  const DataLayout &DL);
 
-// Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi }
-void Make64bitResultForLoad(Type *EltTy, ArrayRef<Value *> resultElts32,
-                            unsigned size, MutableArrayRef<Value *> resultElts,
-                            hlsl::OP *hlslOP, IRBuilder<> &Builder) {
-  Type *i64Ty = Builder.getInt64Ty();
-  Type *doubleTy = Builder.getDoubleTy();
-  if (EltTy == doubleTy) {
-    Function *makeDouble =
-        hlslOP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy);
-    Value *makeDoubleOpArg =
-        Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
-    for (unsigned i = 0; i < size; i++) {
-      Value *lo = resultElts32[2 * i];
-      Value *hi = resultElts32[2 * i + 1];
-      Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
-      resultElts[i] = V;
-    }
-  } else {
-    for (unsigned i = 0; i < size; i++) {
-      Value *lo = resultElts32[2 * i];
-      Value *hi = resultElts32[2 * i + 1];
-      lo = Builder.CreateZExt(lo, i64Ty);
-      hi = Builder.CreateZExt(hi, i64Ty);
-      hi = Builder.CreateShl(hi, 32);
-      resultElts[i] = Builder.CreateOr(lo, hi);
-    }
-  }
-}
-
 static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents,
                                         hlsl::OP *OP) {
   unsigned mask = 0;
@@ -4108,183 +4169,208 @@ Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset,
                         IRBuilder<> &Builder, unsigned NumComponents,
                         Constant *alignment);
 
-static Value *TranslateRawBufVecLd(Type *VecEltTy, unsigned VecElemCount,
-                                   IRBuilder<> &Builder, Value *handle,
-                                   hlsl::OP *OP, Value *status, Value *bufIdx,
-                                   Value *baseOffset, const DataLayout &DL,
-                                   std::vector<Value *> &bufLds,
-                                   unsigned baseAlign, bool isScalarTy = false);
-
-void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
-                   IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) {
-
-  Type *Ty = helper.retVal->getType();
-  if (Ty->isPointerTy()) {
-    DXASSERT(!DxilResource::IsAnyTexture(RK),
-             "Textures should not be treated as structured buffers.");
-    TranslateStructBufSubscript(cast<CallInst>(helper.retVal), helper.handle,
-                                helper.status, OP, RK, DL);
-    return;
-  }
-
+// Sets up arguments for buffer load call.
+static SmallVector<Value *, 10> GetBufLoadArgs(ResLoadHelper helper,
+                                               HLResource::Kind RK,
+                                               IRBuilder<> Builder,
+                                               unsigned LdSize) {
   OP::OpCode opcode = helper.opcode;
+  llvm::Constant *opArg = Builder.getInt32((uint32_t)opcode);
 
-  Type *i32Ty = Builder.getInt32Ty();
-  Type *i64Ty = Builder.getInt64Ty();
-  Type *doubleTy = Builder.getDoubleTy();
-  Type *EltTy = Ty->getScalarType();
-  unsigned numComponents = 1;
-  if (Ty->isVectorTy()) {
-    numComponents = Ty->getVectorNumElements();
-  }
-
-  if (DXIL::IsStructuredBuffer(RK) || DXIL::IsRawBuffer(RK)) {
-    std::vector<Value *> bufLds;
-    const bool isBool = EltTy->isIntegerTy(1);
-
-    // Bool are represented as i32 in memory
-    Type *MemReprTy = isBool ? Builder.getInt32Ty() : EltTy;
-    bool isScalarTy = !Ty->isVectorTy();
+  unsigned alignment = RK == DxilResource::Kind::RawBuffer ? 4U : 8U;
+  alignment = std::min(alignment, LdSize);
+  Constant *alignmentVal = Builder.getInt32(alignment);
 
-    Value *retValNew = nullptr;
-    if (DXIL::IsStructuredBuffer(RK)) {
-      retValNew = TranslateRawBufVecLd(
-          MemReprTy, numComponents, Builder, helper.handle, OP, helper.status,
-          helper.addr, OP->GetU32Const(0), DL, bufLds,
-          /*baseAlign (in bytes)*/ 8, isScalarTy);
-    } else {
-      retValNew =
-          TranslateRawBufVecLd(MemReprTy, numComponents, Builder, helper.handle,
-                               OP, helper.status, nullptr, helper.addr, DL,
-                               bufLds, /*baseAlign (in bytes)*/ 4, isScalarTy);
-    }
+  // Assemble args specific to the type bab/struct/typed:
+  // - Typed needs to handle the possibility of vector coords
+  // - Raws need to calculate alignment and mask values.
+  SmallVector<Value *, 10> Args;
+  Args.emplace_back(opArg);         // opcode @0.
+  Args.emplace_back(helper.handle); // Resource handle @1
 
-    DXASSERT_NOMSG(!bufLds.empty());
-    dxilutil::MigrateDebugValue(helper.retVal, bufLds.front());
+  // Set offsets appropriate for the load operation.
+  bool isVectorAddr = helper.addr->getType()->isVectorTy();
+  if (opcode == OP::OpCode::TextureLoad) {
+    llvm::Value *undefI = llvm::UndefValue::get(Builder.getInt32Ty());
 
-    if (isBool) {
-      // Convert result back to register representation.
-      retValNew = Builder.CreateICmpNE(
-          retValNew, Constant::getNullValue(retValNew->getType()));
+    // Set mip level or sample for MS texutures @2.
+    Args.emplace_back(helper.mipLevel);
+    // Set texture coords according to resource kind @3-5
+    // Coords unused by the resource kind are undefs.
+    unsigned coordSize = DxilResource::GetNumCoords(RK);
+    for (unsigned i = 0; i < 3; i++)
+      if (i < coordSize)
+        Args.emplace_back(isVectorAddr
+                              ? Builder.CreateExtractElement(helper.addr, i)
+                              : helper.addr);
+      else
+        Args.emplace_back(undefI);
+
+    // Set texture offsets according to resource kind @7-9
+    // Coords unused by the resource kind are undefs.
+    unsigned offsetSize = DxilResource::GetNumOffsets(RK);
+    if (!helper.offset || isa<llvm::UndefValue>(helper.offset))
+      offsetSize = 0;
+    for (unsigned i = 0; i < 3; i++)
+      if (i < offsetSize)
+        Args.emplace_back(Builder.CreateExtractElement(helper.offset, i));
+      else
+        Args.emplace_back(undefI);
+  } else {
+    // If not TextureLoad, it could be a typed or raw buffer load.
+    // They have mostly similar arguments.
+    DXASSERT(opcode == OP::OpCode::RawBufferLoad ||
+                 opcode == OP::OpCode::RawBufferVectorLoad ||
+                 opcode == OP::OpCode::BufferLoad,
+             "Wrong opcode in get load args");
+    Args.emplace_back(
+        isVectorAddr ? Builder.CreateExtractElement(helper.addr, (uint64_t)0)
+                     : helper.addr);
+    Args.emplace_back(helper.offset);
+    if (opcode == OP::OpCode::RawBufferLoad) {
+      // Unlike typed buffer load, raw buffer load has mask and alignment.
+      Args.emplace_back(nullptr);      // Mask will be added later %4.
+      Args.emplace_back(alignmentVal); // alignment @5.
+    } else if (opcode == OP::OpCode::RawBufferVectorLoad) {
+      // RawBufferVectorLoad takes just alignment, no mask.
+      Args.emplace_back(alignmentVal); // alignment @4
     }
-
-    helper.retVal->replaceAllUsesWith(retValNew);
-    helper.retVal = retValNew;
-    return;
   }
+  return Args;
+}
 
-  bool isTyped = opcode == OP::OpCode::TextureLoad ||
-                 RK == DxilResource::Kind::TypedBuffer;
-  bool is64 = EltTy == i64Ty || EltTy == doubleTy;
-  if (is64 && isTyped) {
-    EltTy = i32Ty;
-  }
-  bool isBool = EltTy->isIntegerTy(1);
-  if (isBool) {
-    // Value will be loaded in its memory representation.
-    EltTy = i32Ty;
-    if (Ty->isVectorTy())
-      Ty = VectorType::get(EltTy, numComponents);
-  }
+// Emits as many calls as needed to load the full vector
+// Performs any needed extractions and conversions of the results.
+Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
+                        IRBuilder<> &Builder, hlsl::OP *OP,
+                        const DataLayout &DL) {
+  OP::OpCode opcode = helper.opcode;
+  Type *Ty = helper.retVal->getType();
 
-  Function *F = OP->GetOpFunc(opcode, EltTy);
-  llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
+  unsigned NumComponents = 1;
+  if (Ty->isVectorTy())
+    NumComponents = Ty->getVectorNumElements();
 
-  llvm::Value *undefI = llvm::UndefValue::get(i32Ty);
+  const bool isTyped = DXIL::IsTyped(RK);
+  Type *EltTy = Ty->getScalarType();
+  const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy());
+  const bool isBool = EltTy->isIntegerTy(1);
+  // Values will be loaded in memory representations.
+  if (isBool || (is64 && isTyped))
+    EltTy = Builder.getInt32Ty();
 
-  SmallVector<Value *, 12> loadArgs;
-  loadArgs.emplace_back(opArg);         // opcode
-  loadArgs.emplace_back(helper.handle); // resource handle
+  // Calculate load size with the scalar memory element type.
+  unsigned LdSize = DL.getTypeAllocSize(EltTy);
 
-  if (opcode == OP::OpCode::TextureLoad) {
-    // set mip level
-    loadArgs.emplace_back(helper.mipLevel);
-  }
-
-  if (opcode == OP::OpCode::TextureLoad) {
-    // texture coord
-    unsigned coordSize = DxilResource::GetNumCoords(RK);
-    bool isVectorAddr = helper.addr->getType()->isVectorTy();
-    for (unsigned i = 0; i < 3; i++) {
-      if (i < coordSize) {
-        loadArgs.emplace_back(isVectorAddr
-                                  ? Builder.CreateExtractElement(helper.addr, i)
-                                  : helper.addr);
-      } else
-        loadArgs.emplace_back(undefI);
+  // Adjust number of components as needed.
+  if (is64 && isTyped) {
+    // 64-bit types are stored as int32 pairs in typed buffers.
+    DXASSERT(NumComponents <= 2, "Typed buffers only allow 4 dwords.");
+    NumComponents *= 2;
+  } else if (opcode == OP::OpCode::RawBufferVectorLoad) {
+    // Native vector loads only have a single vector element in ResRet.
+    EltTy = VectorType::get(EltTy, NumComponents);
+    NumComponents = 1;
+  }
+
+  SmallVector<Value *, 10> Args = GetBufLoadArgs(helper, RK, Builder, LdSize);
+
+  // Keep track of the first load for debug info migration.
+  Value *FirstLd = nullptr;
+
+  unsigned OffsetIdx = 0;
+  if (RK == DxilResource::Kind::RawBuffer)
+    // Raw buffers can't use offset param. Add to coord index.
+    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx;
+  else if (RK == DxilResource::Kind::StructuredBuffer)
+    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx;
+
+  // Create call(s) to function object and collect results in Elts.
+  // Typed buffer loads are limited to one load of up to 4 32-bit values.
+  // Raw buffer loads might need multiple loads in chunks of 4.
+  SmallVector<Value *, 4> Elts(NumComponents);
+  for (unsigned i = 0; i < NumComponents;) {
+    // Load 4 elements or however many less than 4 are left to load.
+    unsigned chunkSize = std::min(NumComponents - i, 4U);
+
+    // Assign mask for raw buffer loads.
+    if (opcode == OP::OpCode::RawBufferLoad) {
+      Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
+          GetRawBufferMaskForETy(EltTy, chunkSize, OP);
+      // If we've loaded a chunk already, update offset to next chunk.
+      if (FirstLd != nullptr)
+        Args[OffsetIdx] =
+            Builder.CreateAdd(Args[OffsetIdx], OP->GetU32Const(4 * LdSize));
     }
-  } else {
-    if (helper.addr->getType()->isVectorTy()) {
-      Value *scalarOffset =
-          Builder.CreateExtractElement(helper.addr, (uint64_t)0);
-
-      // TODO: calculate the real address based on opcode
 
-      loadArgs.emplace_back(scalarOffset); // offset
-    } else {
-      // TODO: calculate the real address based on opcode
+    Function *F = OP->GetOpFunc(opcode, EltTy);
+    Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode));
 
-      loadArgs.emplace_back(helper.addr); // offset
-    }
-  }
-  // offset 0
-  if (opcode == OP::OpCode::TextureLoad) {
-    if (helper.offset && !isa<llvm::UndefValue>(helper.offset)) {
-      unsigned offsetSize = DxilResource::GetNumOffsets(RK);
-      for (unsigned i = 0; i < 3; i++) {
-        if (i < offsetSize)
-          loadArgs.emplace_back(Builder.CreateExtractElement(helper.offset, i));
-        else
-          loadArgs.emplace_back(undefI);
+    // Extract elements from returned ResRet.
+    // Native vector loads just have one vector element in the ResRet.
+    // Others have up to four scalars that need to be individually extracted.
+    if (opcode == OP::OpCode::RawBufferVectorLoad)
+      Elts[i++] = Builder.CreateExtractValue(Ld, 0);
+    else
+      for (unsigned j = 0; j < chunkSize; j++, i++)
+        Elts[i] = Builder.CreateExtractValue(Ld, j);
+
+    // Update status.
+    UpdateStatus(Ld, helper.status, Builder, OP);
+
+    if (!FirstLd)
+      FirstLd = Ld;
+  }
+  DXASSERT(FirstLd, "No loads created by TranslateBufLoad");
+
+  // Convert loaded 32-bit integers to intended 64-bit type representation.
+  if (isTyped) {
+    Type *RegEltTy = Ty->getScalarType();
+    if (RegEltTy->isDoubleTy()) {
+      Function *makeDouble = OP->GetOpFunc(DXIL::OpCode::MakeDouble, RegEltTy);
+      Value *makeDoubleOpArg =
+          Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
+      NumComponents /= 2; // Convert back to number of doubles.
+      for (unsigned i = 0; i < NumComponents; i++) {
+        Value *lo = Elts[2 * i];
+        Value *hi = Elts[2 * i + 1];
+        Elts[i] = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
       }
-    } else {
-      loadArgs.emplace_back(undefI);
-      loadArgs.emplace_back(undefI);
-      loadArgs.emplace_back(undefI);
+      EltTy = RegEltTy;
+    } else if (RegEltTy->isIntegerTy(64)) {
+      NumComponents /= 2; // Convert back to number of int64s.
+      for (unsigned i = 0; i < NumComponents; i++) {
+        Value *lo = Elts[2 * i];
+        Value *hi = Elts[2 * i + 1];
+        lo = Builder.CreateZExt(lo, RegEltTy);
+        hi = Builder.CreateZExt(hi, RegEltTy);
+        hi = Builder.CreateShl(hi, 32);
+        Elts[i] = Builder.CreateOr(lo, hi);
+      }
+      EltTy = RegEltTy;
     }
   }
 
-  // Offset 1
-  if (RK == DxilResource::Kind::TypedBuffer) {
-    loadArgs.emplace_back(undefI);
-  }
-
-  Value *ResRet = Builder.CreateCall(F, loadArgs, OP->GetOpCodeName(opcode));
-  dxilutil::MigrateDebugValue(helper.retVal, ResRet);
-
+  // Package elements into a vector as needed.
   Value *retValNew = nullptr;
-  if (!is64 || !isTyped) {
-    retValNew = ScalarizeResRet(Ty, ResRet, Builder);
+  // Scalar or native vector loads need not construct vectors from elements.
+  if (!Ty->isVectorTy() || opcode == OP::OpCode::RawBufferVectorLoad) {
+    retValNew = Elts[0];
   } else {
-    unsigned size = numComponents;
-    DXASSERT(size <= 2, "typed buffer only allow 4 dwords");
-    EltTy = Ty->getScalarType();
-    Value *Elts[2];
-
-    Make64bitResultForLoad(Ty->getScalarType(),
-                           {
-                               Builder.CreateExtractValue(ResRet, 0),
-                               Builder.CreateExtractValue(ResRet, 1),
-                               Builder.CreateExtractValue(ResRet, 2),
-                               Builder.CreateExtractValue(ResRet, 3),
-                           },
-                           size, Elts, OP, Builder);
-
-    retValNew = ScalarizeElements(Ty, Elts, Builder);
+    retValNew = UndefValue::get(VectorType::get(EltTy, NumComponents));
+    for (unsigned i = 0; i < NumComponents; i++)
+      retValNew = Builder.CreateInsertElement(retValNew, Elts[i], i);
   }
 
-  if (isBool) {
-    // Convert result back to register representation.
+  // Convert loaded int32 bool results to i1 register representation.
+  if (isBool)
     retValNew = Builder.CreateICmpNE(
         retValNew, Constant::getNullValue(retValNew->getType()));
-  }
 
-  // replace
   helper.retVal->replaceAllUsesWith(retValNew);
-  // Save new ret val.
   helper.retVal = retValNew;
-  // get status
-  UpdateStatus(ResRet, helper.status, Builder, OP);
+
+  return FirstLd;
 }
 
 Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -4292,6 +4378,7 @@ Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                              HLObjectOperationLowerHelper *pObjHelper,
                              bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
+  DataLayout &DL = helper.dataLayout;
   Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
 
   IRBuilder<> Builder(CI);
@@ -4299,9 +4386,19 @@ Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
   DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
 
-  ResLoadHelper loadHelper(CI, RK, RC, handle, IOP);
-  TranslateLoad(loadHelper, RK, Builder, hlslOP, helper.dataLayout);
-  // CI is replaced in TranslateLoad.
+  ResLoadHelper ldHelper(CI, RK, RC, handle, IOP);
+  Type *Ty = CI->getType();
+  Value *Ld = nullptr;
+  if (Ty->isPointerTy()) {
+    DXASSERT(!DxilResource::IsAnyTexture(RK),
+             "Textures should not be treated as structured buffers.");
+    TranslateStructBufSubscript(cast<CallInst>(ldHelper.retVal), handle,
+                                ldHelper.status, hlslOP, RK, DL);
+  } else {
+    Ld = TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL);
+    dxilutil::MigrateDebugValue(CI, Ld);
+  }
+  // CI is replaced by above translation calls..
   return nullptr;
 }
 
@@ -4345,19 +4442,20 @@ void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
 }
 
 void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
-                    Value *offset, IRBuilder<> &Builder, hlsl::OP *OP,
-                    Value *sampIdx = nullptr) {
+                    Value *Idx, Value *offset, IRBuilder<> &Builder,
+                    hlsl::OP *OP, Value *sampIdx = nullptr) {
   Type *Ty = val->getType();
-
-  // This function is no longer used for lowering stores to a
-  // structured buffer.
-  DXASSERT_NOMSG(RK != DxilResource::Kind::StructuredBuffer);
-
   OP::OpCode opcode = OP::OpCode::NumOpCodes;
+  bool IsTyped = true;
   switch (RK) {
   case DxilResource::Kind::RawBuffer:
   case DxilResource::Kind::StructuredBuffer:
+    IsTyped = false;
     opcode = OP::OpCode::RawBufferStore;
+    // Where shader model and type allows, use vector store intrinsic.
+    if (OP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() &&
+        Ty->isVectorTy() && Ty->getVectorNumElements() > 1)
+      opcode = OP::OpCode::RawBufferVectorStore;
     break;
   case DxilResource::Kind::TypedBuffer:
     opcode = OP::OpCode::BufferStore;
@@ -4374,10 +4472,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
     break;
   }
 
-  bool isTyped = opcode == OP::OpCode::TextureStore ||
-                 opcode == OP::OpCode::TextureStoreSample ||
-                 RK == DxilResource::Kind::TypedBuffer;
-
   Type *i32Ty = Builder.getInt32Ty();
   Type *i64Ty = Builder.getInt64Ty();
   Type *doubleTy = Builder.getDoubleTy();
@@ -4400,11 +4494,10 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
     alignValue = 4;
   Constant *Alignment = OP->GetI32Const(alignValue);
   bool is64 = EltTy == i64Ty || EltTy == doubleTy;
-  if (is64 && isTyped) {
+  if (is64 && IsTyped) {
     EltTy = i32Ty;
   }
 
-  Function *F = OP->GetOpFunc(opcode, EltTy);
   llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
 
   llvm::Value *undefI =
@@ -4416,44 +4509,58 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
   storeArgs.emplace_back(opArg);  // opcode
   storeArgs.emplace_back(handle); // resource handle
 
-  unsigned offset0Idx = 0;
-  if (RK == DxilResource::Kind::RawBuffer ||
-      RK == DxilResource::Kind::TypedBuffer) {
-    // Offset 0
-    if (offset->getType()->isVectorTy()) {
-      Value *scalarOffset = Builder.CreateExtractElement(offset, (uint64_t)0);
-      storeArgs.emplace_back(scalarOffset); // offset
+  unsigned OffsetIdx = 0;
+  if (opcode == OP::OpCode::RawBufferStore ||
+      opcode == OP::OpCode::RawBufferVectorStore ||
+      opcode == OP::OpCode::BufferStore) {
+    // Append Coord0 (Index) value.
+    if (Idx->getType()->isVectorTy()) {
+      Value *ScalarIdx = Builder.CreateExtractElement(Idx, (uint64_t)0);
+      storeArgs.emplace_back(ScalarIdx); // Coord0 (Index).
     } else {
-      storeArgs.emplace_back(offset); // offset
+      storeArgs.emplace_back(Idx); // Coord0 (Index).
     }
 
-    // Store offset0 for later use
-    offset0Idx = storeArgs.size() - 1;
+    // Store OffsetIdx representing the argument that may need to be incremented
+    // later to load additional chunks of data.
+    // Only structured buffers can use the offset parameter.
+    // Others must increment the index.
+    if (RK == DxilResource::Kind::StructuredBuffer)
+      OffsetIdx = storeArgs.size();
+    else
+      OffsetIdx = storeArgs.size() - 1;
 
-    // Offset 1
-    storeArgs.emplace_back(undefI);
+    // Coord1 (Offset).
+    storeArgs.emplace_back(offset);
   } else {
     // texture store
     unsigned coordSize = DxilResource::GetNumCoords(RK);
 
     // Set x first.
-    if (offset->getType()->isVectorTy())
-      storeArgs.emplace_back(Builder.CreateExtractElement(offset, (uint64_t)0));
+    if (Idx->getType()->isVectorTy())
+      storeArgs.emplace_back(Builder.CreateExtractElement(Idx, (uint64_t)0));
     else
-      storeArgs.emplace_back(offset);
-
-    // Store offset0 for later use
-    offset0Idx = storeArgs.size() - 1;
+      storeArgs.emplace_back(Idx);
 
     for (unsigned i = 1; i < 3; i++) {
       if (i < coordSize)
-        storeArgs.emplace_back(Builder.CreateExtractElement(offset, i));
+        storeArgs.emplace_back(Builder.CreateExtractElement(Idx, i));
       else
         storeArgs.emplace_back(undefI);
     }
     // TODO: support mip for texture ST
   }
 
+  // RawBufferVectorStore only takes a single value and alignment arguments.
+  if (opcode == DXIL::OpCode::RawBufferVectorStore) {
+    storeArgs.emplace_back(val);
+    storeArgs.emplace_back(Alignment);
+    Function *F = OP->GetOpFunc(DXIL::OpCode::RawBufferVectorStore, Ty);
+    Builder.CreateCall(F, storeArgs);
+    return;
+  }
+  Function *F = OP->GetOpFunc(opcode, EltTy);
+
   constexpr unsigned MaxStoreElemCount = 4;
   const unsigned CompCount = Ty->isVectorTy() ? Ty->getVectorNumElements() : 1;
   const unsigned StoreInstCount =
@@ -4474,30 +4581,24 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
   }
 
   for (unsigned j = 0; j < storeArgsList.size(); j++) {
-
-    // For second and subsequent store calls, increment the offset0 (i.e. store
-    // index)
+    // For second and subsequent store calls, increment the resource-appropriate
+    // index or offset parameter.
     if (j > 0) {
-      // Greater than four-components store is not allowed for
-      // TypedBuffer and Textures. So greater than four elements
-      // scenario should only get hit here for RawBuffer.
-      DXASSERT_NOMSG(RK == DxilResource::Kind::RawBuffer);
       unsigned EltSize = OP->GetAllocSizeForType(EltTy);
-      unsigned newOffset = EltSize * MaxStoreElemCount * j;
-      Value *newOffsetVal = ConstantInt::get(Builder.getInt32Ty(), newOffset);
-      newOffsetVal =
-          Builder.CreateAdd(storeArgsList[0][offset0Idx], newOffsetVal);
-      storeArgsList[j][offset0Idx] = newOffsetVal;
+      unsigned NewCoord = EltSize * MaxStoreElemCount * j;
+      Value *NewCoordVal = ConstantInt::get(Builder.getInt32Ty(), NewCoord);
+      NewCoordVal = Builder.CreateAdd(storeArgsList[0][OffsetIdx], NewCoordVal);
+      storeArgsList[j][OffsetIdx] = NewCoordVal;
     }
 
-    // values
+    // Set value parameters.
     uint8_t mask = 0;
     if (Ty->isVectorTy()) {
       unsigned vecSize =
           std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) -
           (j * MaxStoreElemCount);
       Value *emptyVal = undefVal;
-      if (isTyped) {
+      if (IsTyped) {
         mask = DXIL::kCompMask_All;
         emptyVal = Builder.CreateExtractElement(val, (uint64_t)0);
       }
@@ -4513,7 +4614,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
       }
 
     } else {
-      if (isTyped) {
+      if (IsTyped) {
         mask = DXIL::kCompMask_All;
         storeArgsList[j].emplace_back(val);
         storeArgsList[j].emplace_back(val);
@@ -4528,7 +4629,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
       }
     }
 
-    if (is64 && isTyped) {
+    if (is64 && IsTyped) {
       unsigned size = 1;
       if (Ty->isVectorTy()) {
         size =
@@ -4586,7 +4687,8 @@ Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 
   Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx);
   Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx);
-  TranslateStore(RK, handle, val, offset, Builder, hlslOP);
+  Value *UndefI = UndefValue::get(Builder.getInt32Ty());
+  TranslateStore(RK, handle, val, offset, UndefI, Builder, hlslOP);
 
   return nullptr;
 }
@@ -5680,7 +5782,24 @@ Value *TranslateAllocateRayQuery(CallInst *CI, IntrinsicOp IOP,
                                  HLObjectOperationLowerHelper *pObjHelper,
                                  bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
-  Value *refArgs[] = {nullptr, CI->getOperand(1)};
+  // upgrade to allocateRayQuery2 if there is a non-zero 2nd template arg
+  DXASSERT(CI->getNumArgOperands() == 3,
+           "hlopcode for allocaterayquery always expects 3 arguments");
+
+  llvm::Value *Arg =
+      CI->getArgOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx);
+  llvm::ConstantInt *ConstVal = llvm::dyn_cast<llvm::ConstantInt>(Arg);
+  DXASSERT(ConstVal,
+           "2nd argument to allocaterayquery must always be a constant value");
+  if (ConstVal->getValue().getZExtValue() != 0) {
+    Value *refArgs[3] = {
+        nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx),
+        CI->getOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx)};
+    opcode = OP::OpCode::AllocateRayQuery2;
+    return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
+  }
+  Value *refArgs[2] = {
+      nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx)};
   return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
 }
 
@@ -5689,7 +5808,6 @@ Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                                HLObjectOperationLowerHelper *pObjHelper,
                                bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
-
   Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
 
   Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp];
@@ -6064,6 +6182,190 @@ Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 
 } // namespace
 
+// Shader Execution Reordering.
+namespace {
+Value *TranslateHitObjectMake(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode,
+                              HLOperationLowerHelper &Helper,
+                              HLObjectOperationLowerHelper *ObjHelper,
+                              bool &Translated) {
+  hlsl::OP *HlslOP = &Helper.hlslOP;
+  IRBuilder<> Builder(CI);
+  unsigned SrcIdx = 1;
+  Value *HitObjectPtr = CI->getArgOperand(SrcIdx++);
+  if (Opcode == OP::OpCode::HitObject_MakeNop) {
+    Value *HitObject = TrivialDxilOperation(
+        Opcode, {nullptr}, Type::getVoidTy(CI->getContext()), CI, HlslOP);
+    Builder.CreateStore(HitObject, HitObjectPtr);
+    DXASSERT(
+        CI->use_empty(),
+        "Default ctor return type is a Clang artifact. Value must not be used");
+    return nullptr;
+  }
+
+  DXASSERT_NOMSG(CI->getNumArgOperands() ==
+                 HLOperandIndex::kHitObjectMakeMiss_NumOp);
+  Value *RayFlags = CI->getArgOperand(SrcIdx++);
+  Value *MissShaderIdx = CI->getArgOperand(SrcIdx++);
+  DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectMakeMissRayDescOpIdx);
+  Value *RayDescOrigin = CI->getArgOperand(SrcIdx++);
+  Value *RayDescOriginX =
+      Builder.CreateExtractElement(RayDescOrigin, (uint64_t)0);
+  Value *RayDescOriginY =
+      Builder.CreateExtractElement(RayDescOrigin, (uint64_t)1);
+  Value *RayDescOriginZ =
+      Builder.CreateExtractElement(RayDescOrigin, (uint64_t)2);
+
+  Value *RayDescTMin = CI->getArgOperand(SrcIdx++);
+  Value *RayDescDirection = CI->getArgOperand(SrcIdx++);
+  Value *RayDescDirectionX =
+      Builder.CreateExtractElement(RayDescDirection, (uint64_t)0);
+  Value *RayDescDirectionY =
+      Builder.CreateExtractElement(RayDescDirection, (uint64_t)1);
+  Value *RayDescDirectionZ =
+      Builder.CreateExtractElement(RayDescDirection, (uint64_t)2);
+
+  Value *RayDescTMax = CI->getArgOperand(SrcIdx++);
+  DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands());
+
+  Value *OutHitObject = TrivialDxilOperation(
+      Opcode,
+      {nullptr, RayFlags, MissShaderIdx, RayDescOriginX, RayDescOriginY,
+       RayDescOriginZ, RayDescTMin, RayDescDirectionX, RayDescDirectionY,
+       RayDescDirectionZ, RayDescTMax},
+      Helper.voidTy, CI, HlslOP);
+  Builder.CreateStore(OutHitObject, HitObjectPtr);
+  return nullptr;
+}
+
+Value *TranslateMaybeReorderThread(CallInst *CI, IntrinsicOp IOP,
+                                   OP::OpCode OpCode,
+                                   HLOperationLowerHelper &Helper,
+                                   HLObjectOperationLowerHelper *pObjHelper,
+                                   bool &Translated) {
+  hlsl::OP *OP = &Helper.hlslOP;
+
+  // clang-format off
+  // Match MaybeReorderThread overload variants:
+  // void MaybeReorderThread(<Op>,
+  //                    HitObject Hit);
+  // void MaybeReorderThread(<Op>,
+  //                    uint CoherenceHint,
+  //                    uint NumCoherenceHintBitsFromLSB );
+  // void MaybeReorderThread(<Op>,
+  //                    HitObject Hit,
+  //                    uint CoherenceHint,
+  //                    uint NumCoherenceHintBitsFromLSB);
+  // clang-format on
+  const unsigned NumHLArgs = CI->getNumArgOperands();
+  DXASSERT_NOMSG(NumHLArgs >= 2);
+
+  // Use a NOP HitObject for MaybeReorderThread without HitObject.
+  Value *HitObject = nullptr;
+  unsigned HLIndex = 1;
+  if (3 == NumHLArgs) {
+    HitObject = TrivialDxilOperation(DXIL::OpCode::HitObject_MakeNop, {nullptr},
+                                     Type::getVoidTy(CI->getContext()), CI, OP);
+  } else {
+    Value *FirstParam = CI->getArgOperand(HLIndex);
+    DXASSERT_NOMSG(isa<PointerType>(FirstParam->getType()));
+    IRBuilder<> Builder(CI);
+    HitObject = Builder.CreateLoad(FirstParam);
+    HLIndex++;
+  }
+
+  // If there are trailing parameters, these have to be the two coherence bit
+  // parameters
+  Value *CoherenceHint = nullptr;
+  Value *NumCoherenceHintBits = nullptr;
+  if (2 != NumHLArgs) {
+    DXASSERT_NOMSG(HLIndex + 2 == NumHLArgs);
+    CoherenceHint = CI->getArgOperand(HLIndex++);
+    NumCoherenceHintBits = CI->getArgOperand(HLIndex++);
+    DXASSERT_NOMSG(Helper.i32Ty == CoherenceHint->getType());
+    DXASSERT_NOMSG(Helper.i32Ty == NumCoherenceHintBits->getType());
+  } else {
+    CoherenceHint = UndefValue::get(Helper.i32Ty);
+    NumCoherenceHintBits = OP->GetU32Const(0);
+  }
+
+  TrivialDxilOperation(
+      OpCode, {nullptr, HitObject, CoherenceHint, NumCoherenceHintBits},
+      Type::getVoidTy(CI->getContext()), CI, OP);
+  return nullptr;
+}
+
+Value *TranslateHitObjectFromRayQuery(CallInst *CI, IntrinsicOp IOP,
+                                      OP::OpCode OpCode,
+                                      HLOperationLowerHelper &Helper,
+                                      HLObjectOperationLowerHelper *pObjHelper,
+                                      bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP,
+                                  OP::OpCode OpCode,
+                                  HLOperationLowerHelper &Helper,
+                                  HLObjectOperationLowerHelper *pObjHelper,
+                                  bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectInvoke(CallInst *CI, IntrinsicOp IOP,
+                                OP::OpCode OpCode,
+                                HLOperationLowerHelper &Helper,
+                                HLObjectOperationLowerHelper *pObjHelper,
+                                bool &Translated) {
+  return nullptr; // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectGetAttributes(CallInst *CI, IntrinsicOp IOP,
+                                       OP::OpCode OpCode,
+                                       HLOperationLowerHelper &Helper,
+                                       HLObjectOperationLowerHelper *pObjHelper,
+                                       bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectScalarGetter(CallInst *CI, IntrinsicOp IOP,
+                                      OP::OpCode OpCode,
+                                      HLOperationLowerHelper &Helper,
+                                      HLObjectOperationLowerHelper *pObjHelper,
+                                      bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectVectorGetter(CallInst *CI, IntrinsicOp IOP,
+                                      OP::OpCode OpCode,
+                                      HLOperationLowerHelper &Helper,
+                                      HLObjectOperationLowerHelper *pObjHelper,
+                                      bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectMatrixGetter(CallInst *CI, IntrinsicOp IOP,
+                                      OP::OpCode OpCode,
+                                      HLOperationLowerHelper &Helper,
+                                      HLObjectOperationLowerHelper *pObjHelper,
+                                      bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectLoadLocalRootTableConstant(
+    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
+    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper,
+    bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+Value *TranslateHitObjectSetShaderTableIndex(
+    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
+    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper,
+    bool &Translated) {
+  return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches
+}
+
+} // namespace
+
 // Resource Handle.
 namespace {
 Value *TranslateGetHandleFromHeap(CallInst *CI, IntrinsicOp IOP,
@@ -6091,20 +6393,8 @@ Value *TranslateAnd(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                     bool &Translated) {
   Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
   Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
-  Type *Ty = CI->getType();
-  Type *EltTy = Ty->getScalarType();
   IRBuilder<> Builder(CI);
 
-  if (Ty != EltTy) {
-    Value *Result = UndefValue::get(Ty);
-    for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) {
-      Value *EltX = Builder.CreateExtractElement(x, i);
-      Value *EltY = Builder.CreateExtractElement(y, i);
-      Value *tmp = Builder.CreateAnd(EltX, EltY);
-      Result = Builder.CreateInsertElement(Result, tmp, i);
-    }
-    return Result;
-  }
   return Builder.CreateAnd(x, y);
 }
 Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -6112,20 +6402,8 @@ Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                    HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
   Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
   Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
-  Type *Ty = CI->getType();
-  Type *EltTy = Ty->getScalarType();
   IRBuilder<> Builder(CI);
 
-  if (Ty != EltTy) {
-    Value *Result = UndefValue::get(Ty);
-    for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) {
-      Value *EltX = Builder.CreateExtractElement(x, i);
-      Value *EltY = Builder.CreateExtractElement(y, i);
-      Value *tmp = Builder.CreateOr(EltX, EltY);
-      Result = Builder.CreateInsertElement(Result, tmp, i);
-    }
-    return Result;
-  }
   return Builder.CreateOr(x, y);
 }
 Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@@ -6135,21 +6413,8 @@ Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   Value *cond = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
   Value *t = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
   Value *f = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
-  Type *Ty = CI->getType();
-  Type *EltTy = Ty->getScalarType();
   IRBuilder<> Builder(CI);
 
-  if (Ty != EltTy) {
-    Value *Result = UndefValue::get(Ty);
-    for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) {
-      Value *EltCond = Builder.CreateExtractElement(cond, i);
-      Value *EltTrue = Builder.CreateExtractElement(t, i);
-      Value *EltFalse = Builder.CreateExtractElement(f, i);
-      Value *tmp = Builder.CreateSelect(EltCond, EltTrue, EltFalse);
-      Result = Builder.CreateInsertElement(Result, tmp, i);
-    }
-    return Result;
-  }
   return Builder.CreateSelect(cond, t, f);
 }
 } // namespace
@@ -6166,7 +6431,6 @@ Value *EmptyLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
 }
 
 // SPIRV change starts
-#ifdef ENABLE_SPIRV_CODEGEN
 Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP,
                                   DXIL::OpCode opcode,
                                   HLOperationLowerHelper &helper,
@@ -6176,7 +6440,6 @@ Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP,
   dxilutil::EmitErrorOnInstruction(CI, "Unsupported Vulkan intrinsic.");
   return nullptr;
 }
-#endif // ENABLE_SPIRV_CODEGEN
 // SPIRV change ends
 
 Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
@@ -6410,18 +6673,20 @@ IntrinsicLower gLowerTable[] = {
     {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos},
     {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos},
-    {IntrinsicOp::IOP_countbits, TrivialUnaryOperation,
+    {IntrinsicOp::IOP_countbits, TrivialUnaryOperationRet,
      DXIL::OpCode::Countbits},
     {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
-    {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation,
+    {IntrinsicOp::IOP_ddx, TrivialUnaryOperationRet,
      DXIL::OpCode::DerivCoarseX},
-    {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation,
+    {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperationRet,
+     DXIL::OpCode::DerivCoarseX},
+    {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperationRet,
      DXIL::OpCode::DerivFineX},
-    {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
-    {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation,
+    {IntrinsicOp::IOP_ddy, TrivialUnaryOperationRet,
+     DXIL::OpCode::DerivCoarseY},
+    {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperationRet,
      DXIL::OpCode::DerivCoarseY},
-    {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation,
+    {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperationRet,
      DXIL::OpCode::DerivFineY},
     {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes},
@@ -6521,7 +6786,6 @@ IntrinsicLower gLowerTable[] = {
     {IntrinsicOp::IOP_unpack_s8s32, TranslateUnpack, DXIL::OpCode::Unpack4x8},
     {IntrinsicOp::IOP_unpack_u8u16, TranslateUnpack, DXIL::OpCode::Unpack4x8},
     {IntrinsicOp::IOP_unpack_u8u32, TranslateUnpack, DXIL::OpCode::Unpack4x8},
-#ifdef ENABLE_SPIRV_CODEGEN
     {IntrinsicOp::IOP_VkRawBufferLoad, UnsupportedVulkanIntrinsic,
      DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_VkRawBufferStore, UnsupportedVulkanIntrinsic,
@@ -6532,7 +6796,6 @@ IntrinsicLower gLowerTable[] = {
      DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_Vkext_execution_mode_id, UnsupportedVulkanIntrinsic,
      DXIL::OpCode::NumOpCodes},
-#endif // ENABLE_SPIRV_CODEGEN
     {IntrinsicOp::MOP_Append, StreamOutputLower, DXIL::OpCode::EmitStream},
     {IntrinsicOp::MOP_RestartStrip, StreamOutputLower, DXIL::OpCode::CutStream},
     {IntrinsicOp::MOP_CalculateLevelOfDetail, TranslateCalculateLOD,
@@ -6760,11 +7023,9 @@ IntrinsicLower gLowerTable[] = {
     {IntrinsicOp::MOP_OutputComplete, TranslateNodeOutputComplete,
      DXIL::OpCode::OutputComplete},
 
-// SPIRV change starts
-#ifdef ENABLE_SPIRV_CODEGEN
+    // SPIRV change starts
     {IntrinsicOp::MOP_SubpassLoad, UnsupportedVulkanIntrinsic,
      DXIL::OpCode::NumOpCodes},
-#endif // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
     // Manually added part.
@@ -6802,6 +7063,73 @@ IntrinsicLower gLowerTable[] = {
      DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation,
      DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMake,
+     DXIL::OpCode::HitObject_MakeNop},
+    {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread,
+     DXIL::OpCode::MaybeReorderThread},
+    {IntrinsicOp::IOP_Vkstatic_pointer_cast, UnsupportedVulkanIntrinsic,
+     DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::IOP_Vkreinterpret_pointer_cast, UnsupportedVulkanIntrinsic,
+     DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_GetBufferContents, UnsupportedVulkanIntrinsic,
+     DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_DxHitObject_FromRayQuery, TranslateHitObjectFromRayQuery,
+     DXIL::OpCode::HitObject_FromRayQuery},
+    {IntrinsicOp::MOP_DxHitObject_GetAttributes,
+     TranslateHitObjectGetAttributes, DXIL::OpCode::HitObject_Attributes},
+    {IntrinsicOp::MOP_DxHitObject_GetGeometryIndex,
+     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_GeometryIndex},
+    {IntrinsicOp::MOP_DxHitObject_GetHitKind, TranslateHitObjectScalarGetter,
+     DXIL::OpCode::HitObject_HitKind},
+    {IntrinsicOp::MOP_DxHitObject_GetInstanceID, TranslateHitObjectScalarGetter,
+     DXIL::OpCode::HitObject_InstanceID},
+    {IntrinsicOp::MOP_DxHitObject_GetInstanceIndex,
+     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_InstanceIndex},
+    {IntrinsicOp::MOP_DxHitObject_GetObjectRayDirection,
+     TranslateHitObjectVectorGetter,
+     DXIL::OpCode::HitObject_ObjectRayDirection},
+    {IntrinsicOp::MOP_DxHitObject_GetObjectRayOrigin,
+     TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_ObjectRayOrigin},
+    {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld3x4,
+     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4},
+    {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld4x3,
+     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4},
+    {IntrinsicOp::MOP_DxHitObject_GetPrimitiveIndex,
+     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_PrimitiveIndex},
+    {IntrinsicOp::MOP_DxHitObject_GetRayFlags, TranslateHitObjectScalarGetter,
+     DXIL::OpCode::HitObject_RayFlags},
+    {IntrinsicOp::MOP_DxHitObject_GetRayTCurrent,
+     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_RayTCurrent},
+    {IntrinsicOp::MOP_DxHitObject_GetRayTMin, TranslateHitObjectScalarGetter,
+     DXIL::OpCode::HitObject_RayTMin},
+    {IntrinsicOp::MOP_DxHitObject_GetShaderTableIndex,
+     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_ShaderTableIndex},
+    {IntrinsicOp::MOP_DxHitObject_GetWorldRayDirection,
+     TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayDirection},
+    {IntrinsicOp::MOP_DxHitObject_GetWorldRayOrigin,
+     TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayOrigin},
+    {IntrinsicOp::MOP_DxHitObject_GetWorldToObject3x4,
+     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4},
+    {IntrinsicOp::MOP_DxHitObject_GetWorldToObject4x3,
+     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4},
+    {IntrinsicOp::MOP_DxHitObject_Invoke, TranslateHitObjectInvoke,
+     DXIL::OpCode::HitObject_Invoke},
+    {IntrinsicOp::MOP_DxHitObject_IsHit, TranslateHitObjectScalarGetter,
+     DXIL::OpCode::HitObject_IsHit},
+    {IntrinsicOp::MOP_DxHitObject_IsMiss, TranslateHitObjectScalarGetter,
+     DXIL::OpCode::HitObject_IsMiss},
+    {IntrinsicOp::MOP_DxHitObject_IsNop, TranslateHitObjectScalarGetter,
+     DXIL::OpCode::HitObject_IsNop},
+    {IntrinsicOp::MOP_DxHitObject_LoadLocalRootTableConstant,
+     TranslateHitObjectLoadLocalRootTableConstant,
+     DXIL::OpCode::HitObject_LoadLocalRootTableConstant},
+    {IntrinsicOp::MOP_DxHitObject_MakeMiss, TranslateHitObjectMake,
+     DXIL::OpCode::HitObject_MakeMiss},
+    {IntrinsicOp::MOP_DxHitObject_SetShaderTableIndex,
+     TranslateHitObjectSetShaderTableIndex,
+     DXIL::OpCode::HitObject_SetShaderTableIndex},
+    {IntrinsicOp::MOP_DxHitObject_TraceRay, TranslateHitObjectTraceRay,
+     DXIL::OpCode::HitObject_TraceRay},
 };
 } // namespace
 static_assert(
@@ -7887,113 +8215,36 @@ void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
   Builder.CreateCall(dxilF, Args);
 }
 
-static Value *TranslateRawBufVecLd(Type *VecEltTy, unsigned ElemCount,
-                                   IRBuilder<> &Builder, Value *handle,
-                                   hlsl::OP *OP, Value *status, Value *bufIdx,
-                                   Value *baseOffset, const DataLayout &DL,
-                                   std::vector<Value *> &bufLds,
-                                   unsigned baseAlign, bool isScalarTy) {
-
-  unsigned EltSize = DL.getTypeAllocSize(VecEltTy);
-  unsigned alignment = std::min(baseAlign, EltSize);
-  Constant *alignmentVal = OP->GetI32Const(alignment);
-
-  if (baseOffset == nullptr) {
-    baseOffset = OP->GetU32Const(0);
-  }
-
-  std::vector<Value *> elts(ElemCount);
-  unsigned rest = (ElemCount % 4);
-  for (unsigned i = 0; i < ElemCount - rest; i += 4) {
-    Value *ResultElts[4];
-    Value *bufLd =
-        GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy,
-                         ResultElts, OP, Builder, 4, alignmentVal);
-    bufLds.emplace_back(bufLd);
-    elts[i] = ResultElts[0];
-    elts[i + 1] = ResultElts[1];
-    elts[i + 2] = ResultElts[2];
-    elts[i + 3] = ResultElts[3];
-
-    baseOffset = Builder.CreateAdd(baseOffset, OP->GetU32Const(4 * EltSize));
-  }
-
-  if (rest) {
-    Value *ResultElts[4];
-    Value *bufLd =
-        GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy,
-                         ResultElts, OP, Builder, rest, alignmentVal);
-    bufLds.emplace_back(bufLd);
-    for (unsigned i = 0; i < rest; i++)
-      elts[ElemCount - rest + i] = ResultElts[i];
-  }
-
-  // If the expected return type is scalar then skip building a vector
-  if (isScalarTy) {
-    return elts[0];
-  }
-
-  Value *Vec = HLMatrixLower::BuildVector(VecEltTy, elts, Builder);
-  return Vec;
-}
-
-Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
-                               Value *handle, hlsl::OP *OP, Value *status,
-                               Value *bufIdx, Value *baseOffset,
+Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder,
+                               Value *handle, HLResource::Kind RK, hlsl::OP *OP,
+                               Value *status, Value *bufIdx, Value *baseOffset,
                                const DataLayout &DL) {
+
+  ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset);
+#ifndef NDEBUG
+  Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
+  Type *matType = ptr->getType()->getPointerElementType();
   HLMatrixType MatTy = HLMatrixType::cast(matType);
-  Type *EltTy = MatTy.getElementTypeForMem();
-  unsigned matSize = MatTy.getNumElements();
-  std::vector<Value *> bufLds;
-  Value *Vec =
-      TranslateRawBufVecLd(EltTy, matSize, Builder, handle, OP, status, bufIdx,
-                           baseOffset, DL, bufLds, /*baseAlign (in bytes)*/ 8);
-  Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
-  return Vec;
+  DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) ==
+               helper.retVal->getType(),
+           "helper type should match vectorized matrix");
+#endif
+  return TranslateBufLoad(helper, RK, Builder, OP, DL);
 }
 
 void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
                              hlsl::OP *OP, Value *bufIdx, Value *baseOffset,
                              Value *val, const DataLayout &DL) {
-  HLMatrixType MatTy = HLMatrixType::cast(matType);
-  Type *EltTy = MatTy.getElementTypeForMem();
-
-  val = MatTy.emitLoweredRegToMem(val, Builder);
-
-  unsigned EltSize = DL.getTypeAllocSize(EltTy);
-  Constant *Alignment = OP->GetI32Const(EltSize);
-  Value *offset = baseOffset;
-  if (baseOffset == nullptr)
-    offset = OP->GetU32Const(0);
-
-  unsigned matSize = MatTy.getNumElements();
-  Value *undefElt = UndefValue::get(EltTy);
-
-  unsigned storeSize = matSize;
-  if (matSize % 4) {
-    storeSize = matSize + 4 - (matSize & 3);
-  }
-  std::vector<Value *> elts(storeSize, undefElt);
-  for (unsigned i = 0; i < matSize; i++)
-    elts[i] = Builder.CreateExtractElement(val, i);
-
-  for (unsigned i = 0; i < matSize; i += 4) {
-    uint8_t mask = 0;
-    for (unsigned j = 0; j < 4 && (i + j) < matSize; j++) {
-      if (elts[i + j] != undefElt)
-        mask |= (1 << j);
-    }
-    GenerateStructBufSt(handle, bufIdx, offset, EltTy, OP, Builder,
-                        {elts[i], elts[i + 1], elts[i + 2], elts[i + 3]}, mask,
-                        Alignment);
-    // Update offset by 4*4bytes.
-    offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize));
-  }
+  [[maybe_unused]] HLMatrixType MatTy = HLMatrixType::cast(matType);
+  DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) == val->getType(),
+           "helper type should match vectorized matrix");
+  TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx,
+                 baseOffset, Builder, OP);
 }
 
-void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP,
-                               Value *status, Value *bufIdx, Value *baseOffset,
-                               const DataLayout &DL) {
+void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, HLResource::Kind RK,
+                               hlsl::OP *OP, Value *status, Value *bufIdx,
+                               Value *baseOffset, const DataLayout &DL) {
   IRBuilder<> Builder(CI);
   HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
   unsigned opcode = GetHLOpcode(CI);
@@ -8006,13 +8257,10 @@ void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP,
   // orientation.
   switch (matOp) {
   case HLMatLoadStoreOpcode::RowMatLoad:
-  case HLMatLoadStoreOpcode::ColMatLoad: {
-    Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
-    Value *NewLd = TranslateStructBufMatLd(
-        ptr->getType()->getPointerElementType(), Builder, handle, OP, status,
-        bufIdx, baseOffset, DL);
-    CI->replaceAllUsesWith(NewLd);
-  } break;
+  case HLMatLoadStoreOpcode::ColMatLoad:
+    TranslateStructBufMatLd(CI, Builder, handle, RK, OP, status, bufIdx,
+                            baseOffset, DL);
+    break;
   case HLMatLoadStoreOpcode::RowMatStore:
   case HLMatLoadStoreOpcode::ColMatStore: {
     Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
@@ -8136,6 +8384,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
 
       GEP->eraseFromParent();
     } else if (StoreInst *stUser = dyn_cast<StoreInst>(subsUser)) {
+      // Store elements of matrix in a struct. Needs to be done one scalar at a
+      // time even for vectors in the case that matrix orientation spreads the
+      // indexed scalars throughout the matrix vector.
       IRBuilder<> stBuilder(stUser);
       Value *Val = stUser->getValueOperand();
       if (Val->getType()->isVectorTy()) {
@@ -8159,6 +8410,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
       LoadInst *ldUser = cast<LoadInst>(subsUser);
       IRBuilder<> ldBuilder(ldUser);
       Value *ldData = UndefValue::get(resultType);
+      // Load elements of matrix in a struct. Needs to be done one scalar at a
+      // time even for vectors in the case that matrix orientation spreads the
+      // indexed scalars throughout the matrix vector.
       if (resultType->isVectorTy()) {
         for (unsigned i = 0; i < resultSize; i++) {
           Value *ResultElt;
@@ -8283,57 +8537,26 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
       }
       userCall->eraseFromParent();
     } else if (group == HLOpcodeGroup::HLMatLoadStore)
-      TranslateStructBufMatLdSt(userCall, handle, OP, status, bufIdx,
+      // Load/Store matrix within a struct
+      TranslateStructBufMatLdSt(userCall, handle, ResKind, OP, status, bufIdx,
                                 baseOffset, DL);
     else if (group == HLOpcodeGroup::HLSubscript) {
+      // Subscript of matrix within a struct
       TranslateStructBufMatSubscript(userCall, handle, ResKind, bufIdx,
                                      baseOffset, status, OP, DL);
     }
-  } else if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
-    LoadInst *LdInst = dyn_cast<LoadInst>(user);
-    StoreInst *StInst = dyn_cast<StoreInst>(user);
-
-    Type *Ty = isa<LoadInst>(user) ? LdInst->getType()
-                                   : StInst->getValueOperand()->getType();
-    Type *pOverloadTy = Ty->getScalarType();
-    Value *Offset = baseOffset;
-
-    if (LdInst) {
-      unsigned NumComponents = 0;
-      if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-        NumComponents = VTy->getNumElements();
-      else
-        NumComponents = 1;
-      Value *ResultElts[4];
-      Constant *Alignment =
-          OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
-      GenerateRawBufLd(handle, bufIdx, Offset, status, pOverloadTy, ResultElts,
-                       OP, Builder, NumComponents, Alignment);
-      Value *NewLd = ScalarizeElements(Ty, ResultElts, Builder);
-      LdInst->replaceAllUsesWith(NewLd);
-    } else {
-      Value *val = StInst->getValueOperand();
-      Value *undefVal = llvm::UndefValue::get(pOverloadTy);
-      Value *vals[] = {undefVal, undefVal, undefVal, undefVal};
-      uint8_t mask = 0;
-      if (Ty->isVectorTy()) {
-        unsigned vectorNumElements = Ty->getVectorNumElements();
-        DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector");
-        assert(vectorNumElements <= 4);
-        for (unsigned i = 0; i < vectorNumElements; i++) {
-          vals[i] = Builder.CreateExtractElement(val, i);
-          mask |= (1 << i);
-        }
-      } else {
-        vals[0] = val;
-        mask = DXIL::kCompMask_X;
-      }
-      Constant *alignment =
-          OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
-      GenerateStructBufSt(handle, bufIdx, Offset, pOverloadTy, OP, Builder,
-                          vals, mask, alignment);
-    }
-    user->eraseFromParent();
+  } else if (LoadInst *LdInst = dyn_cast<LoadInst>(user)) {
+    // Load of scalar/vector within a struct or structured raw load.
+    ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset);
+    TranslateBufLoad(helper, ResKind, Builder, OP, DL);
+
+    LdInst->eraseFromParent();
+  } else if (StoreInst *StInst = dyn_cast<StoreInst>(user)) {
+    // Store of scalar/vector within a struct or structured raw store.
+    Value *val = StInst->getValueOperand();
+    TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx,
+                   baseOffset, Builder, OP);
+    StInst->eraseFromParent();
   } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
     // Recurse users
     for (auto U = BCI->user_begin(); U != BCI->user_end();) {
@@ -8368,13 +8591,18 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
     DXASSERT_LOCALVAR(Ty,
                       offset->getType() == Type::getInt32Ty(Ty->getContext()),
                       "else bitness is wrong");
-    offset = Builder.CreateAdd(offset, baseOffset);
+    // No offset into element for Raw buffers; byte offset is in bufIdx.
+    if (DXIL::IsRawBuffer(ResKind))
+      bufIdx = Builder.CreateAdd(offset, bufIdx);
+    else
+      baseOffset = Builder.CreateAdd(offset, baseOffset);
 
     for (auto U = GEP->user_begin(); U != GEP->user_end();) {
       Value *GEPUser = *(U++);
 
       TranslateStructBufSubscriptUser(cast<Instruction>(GEPUser), handle,
-                                      ResKind, bufIdx, offset, status, OP, DL);
+                                      ResKind, bufIdx, baseOffset, status, OP,
+                                      DL);
     }
     // delete the inst
     GEP->eraseFromParent();
@@ -8388,13 +8616,12 @@ void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
       CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
   Value *bufIdx = nullptr;
   Value *offset = nullptr;
-  if (ResKind == HLResource::Kind::RawBuffer) {
-    offset = subscriptIndex;
-  } else {
+  bufIdx = subscriptIndex;
+  if (ResKind == HLResource::Kind::RawBuffer)
+    offset = UndefValue::get(Type::getInt32Ty(CI->getContext()));
+  else
     // StructuredBuffer, TypedBuffer, etc.
-    bufIdx = subscriptIndex;
     offset = OP->GetU32Const(0);
-  }
 
   for (auto U = CI->user_begin(); U != CI->user_end();) {
     Value *user = *(U++);
@@ -8408,19 +8635,14 @@ void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
 // HLSubscript.
 namespace {
 
-Value *TranslateTypedBufLoad(CallInst *CI, DXIL::ResourceKind RK,
-                             DXIL::ResourceClass RC, Value *handle,
-                             LoadInst *ldInst, IRBuilder<> &Builder,
-                             hlsl::OP *hlslOP, const DataLayout &DL) {
-  ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load,
-                         /*bForSubscript*/ true);
-  // Default sampleIdx for 2DMS textures.
-  if (RK == DxilResource::Kind::Texture2DMS ||
-      RK == DxilResource::Kind::Texture2DMSArray)
-    ldHelper.mipLevel = hlslOP->GetU32Const(0);
-  // use ldInst as retVal
-  ldHelper.retVal = ldInst;
-  TranslateLoad(ldHelper, RK, Builder, hlslOP, DL);
+Value *TranslateTypedBufSubscript(CallInst *CI, DXIL::ResourceKind RK,
+                                  DXIL::ResourceClass RC, Value *handle,
+                                  LoadInst *ldInst, IRBuilder<> &Builder,
+                                  hlsl::OP *hlslOP, const DataLayout &DL) {
+  // The arguments to the call instruction are used to determine the access,
+  // the return value and type come from the load instruction.
+  ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, ldInst);
+  TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL);
   // delete the ld
   ldInst->eraseFromParent();
   return ldHelper.retVal;
@@ -8463,9 +8685,9 @@ Value *UpdateVectorElt(Value *VecVal, Value *EltVal, Value *EltIdx,
   return VecVal;
 }
 
-void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper,
-                               HLObjectOperationLowerHelper *pObjHelper,
-                               bool &Translated) {
+void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper,
+                                   HLObjectOperationLowerHelper *pObjHelper,
+                                   bool &Translated) {
   Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
 
   hlsl::OP *hlslOP = &helper.hlslOP;
@@ -8480,14 +8702,15 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper,
     User *user = *(It++);
     Instruction *I = cast<Instruction>(user);
     IRBuilder<> Builder(I);
+    Value *UndefI = UndefValue::get(Builder.getInt32Ty());
     if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
-      TranslateTypedBufLoad(CI, RK, RC, handle, ldInst, Builder, hlslOP,
-                            helper.dataLayout);
+      TranslateTypedBufSubscript(CI, RK, RC, handle, ldInst, Builder, hlslOP,
+                                 helper.dataLayout);
     } else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
       Value *val = stInst->getValueOperand();
       TranslateStore(RK, handle, val,
-                     CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
-                     Builder, hlslOP);
+                     CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx),
+                     UndefI, Builder, hlslOP);
       // delete the st
       stInst->eraseFromParent();
     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(user)) {
@@ -8504,7 +8727,7 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper,
           // Generate Ld.
           LoadInst *tmpLd = StBuilder.CreateLoad(CI);
 
-          Value *ldVal = TranslateTypedBufLoad(
+          Value *ldVal = TranslateTypedBufSubscript(
               CI, RK, RC, handle, tmpLd, StBuilder, hlslOP, helper.dataLayout);
           // Update vector.
           ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx,
@@ -8512,9 +8735,10 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper,
           // Generate St.
           // Reset insert point, UpdateVectorElt may move SI to different block.
           StBuilder.SetInsertPoint(SI);
-          TranslateStore(RK, handle, ldVal,
-                         CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
-                         StBuilder, hlslOP);
+          TranslateStore(
+              RK, handle, ldVal,
+              CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), UndefI,
+              StBuilder, hlslOP);
           SI->eraseFromParent();
           continue;
         }
@@ -8524,7 +8748,7 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper,
           // Generate tmp vector load with vector type & translate it
           LoadInst *tmpLd = LdBuilder.CreateLoad(CI);
 
-          Value *ldVal = TranslateTypedBufLoad(
+          Value *ldVal = TranslateTypedBufSubscript(
               CI, RK, RC, handle, tmpLd, LdBuilder, hlslOP, helper.dataLayout);
 
           // get the single element
@@ -8697,15 +8921,17 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
     DXASSERT(CI->hasOneUse(), "subscript should only have one use");
     IRBuilder<> Builder(CI);
     if (LoadInst *ldInst = dyn_cast<LoadInst>(*U)) {
-      ResLoadHelper ldHelper(ldInst, handle, coord, mipLevel);
-      TranslateLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout);
+      Value *Offset = UndefValue::get(Builder.getInt32Ty());
+      ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset, mipLevel);
+      TranslateBufLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout);
       ldInst->eraseFromParent();
     } else {
       StoreInst *stInst = cast<StoreInst>(*U);
       Value *val = stInst->getValueOperand();
+      Value *UndefI = UndefValue::get(Builder.getInt32Ty());
       TranslateStore(RK, handle, val,
-                     CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
-                     Builder, hlslOP, mipLevel);
+                     CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx),
+                     UndefI, Builder, hlslOP, mipLevel);
       stInst->eraseFromParent();
     }
     Translated = true;
@@ -8736,7 +8962,7 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
         TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK,
                                     helper.dataLayout);
       else
-        TranslateDefaultSubscript(CI, helper, pObjHelper, Translated);
+        TranslateTypedBufferSubscript(CI, helper, pObjHelper, Translated);
 
       return;
     }
diff --git a/lib/Transforms/Scalar/LowerTypePasses.cpp b/lib/Transforms/Scalar/LowerTypePasses.cpp
index feeb23a5da..d2438c7e22 100644
--- a/lib/Transforms/Scalar/LowerTypePasses.cpp
+++ b/lib/Transforms/Scalar/LowerTypePasses.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "dxc/DXIL/DxilConstants.h"
+#include "dxc/DXIL/DxilModule.h"
 #include "dxc/DXIL/DxilOperations.h"
 #include "dxc/DXIL/DxilUtil.h"
 #include "dxc/HLSL/HLModule.h"
@@ -180,10 +181,12 @@ bool LowerTypePass::runOnModule(Module &M) {
 namespace {
 class DynamicIndexingVectorToArray : public LowerTypePass {
   bool ReplaceAllVectors;
+  bool SupportsVectors;
 
 public:
   explicit DynamicIndexingVectorToArray(bool ReplaceAll = false)
-      : LowerTypePass(ID), ReplaceAllVectors(ReplaceAll) {}
+      : LowerTypePass(ID), ReplaceAllVectors(ReplaceAll),
+        SupportsVectors(false) {}
   static char ID; // Pass identification, replacement for typeid
   void applyOptions(PassOptions O) override;
   void dumpConfig(raw_ostream &OS) override;
@@ -194,6 +197,7 @@ class DynamicIndexingVectorToArray : public LowerTypePass {
   Type *lowerType(Type *Ty) override;
   Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override;
   StringRef getGlobalPrefix() override { return ".v"; }
+  void initialize(Module &M) override;
 
 private:
   bool HasVectorDynamicIndexing(Value *V);
@@ -207,6 +211,18 @@ class DynamicIndexingVectorToArray : public LowerTypePass {
   void ReplaceAddrSpaceCast(ConstantExpr *CE, Value *A, IRBuilder<> &Builder);
 };
 
+void DynamicIndexingVectorToArray::initialize(Module &M) {
+  // Set vector support according to available Dxil version.
+  // Use HLModule or metadata for version info.
+  // Otherwise retrieve from dxil module or metadata.
+  unsigned Major = 0, Minor = 0;
+  if (M.HasHLModule())
+    M.GetHLModule().GetShaderModel()->GetDxilVersion(Major, Minor);
+  else
+    dxilutil::LoadDxilVersion(&M, Major, Minor);
+  SupportsVectors = (Major == 1 && Minor >= 9);
+}
+
 void DynamicIndexingVectorToArray::applyOptions(PassOptions O) {
   GetPassOptionBool(O, "ReplaceAllVectors", &ReplaceAllVectors,
                     ReplaceAllVectors);
@@ -306,9 +322,21 @@ void DynamicIndexingVectorToArray::ReplaceStaticIndexingOnVector(Value *V) {
 }
 
 bool DynamicIndexingVectorToArray::needToLower(Value *V) {
+  bool MustReplaceVector = ReplaceAllVectors;
   Type *Ty = V->getType()->getPointerElementType();
-  if (dyn_cast<VectorType>(Ty)) {
-    if (isa<GlobalVariable>(V) || ReplaceAllVectors) {
+
+  if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+    // Array must be replaced even without dynamic indexing to remove vector
+    // type in dxil.
+    MustReplaceVector = true;
+    Ty = dxilutil::GetArrayEltTy(AT);
+  }
+
+  if (isa<VectorType>(Ty)) {
+    // Only needed for 2+ vectors where native vectors unsupported.
+    if (SupportsVectors && Ty->getVectorNumElements() > 1)
+      return false;
+    if (isa<GlobalVariable>(V) || MustReplaceVector) {
       return true;
     }
     // Don't lower local vector which only static indexing.
@@ -319,12 +347,6 @@ bool DynamicIndexingVectorToArray::needToLower(Value *V) {
       ReplaceStaticIndexingOnVector(V);
       return false;
     }
-  } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
-    // Array must be replaced even without dynamic indexing to remove vector
-    // type in dxil.
-    // TODO: optimize static array index in later pass.
-    Type *EltTy = dxilutil::GetArrayEltTy(AT);
-    return isa<VectorType>(EltTy);
   }
   return false;
 }
diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
index 0c3e13f608..e487079b94 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
@@ -81,16 +81,18 @@ class SROA_Helper {
   static bool DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
                                   Type *&BrokenUpTy, uint64_t &NumInstances,
                                   IRBuilder<> &Builder, bool bFlatVector,
-                                  bool hasPrecise, DxilTypeSystem &typeSys,
-                                  const DataLayout &DL,
+                                  bool SupportsVectors, bool hasPrecise,
+                                  DxilTypeSystem &typeSys, const DataLayout &DL,
                                   SmallVector<Value *, 32> &DeadInsts,
                                   DominatorTree *DT);
 
-  static bool
-  DoScalarReplacement(GlobalVariable *GV, std::vector<Value *> &Elts,
-                      IRBuilder<> &Builder, bool bFlatVector, bool hasPrecise,
-                      DxilTypeSystem &typeSys, const DataLayout &DL,
-                      SmallVector<Value *, 32> &DeadInsts, DominatorTree *DT);
+  static bool DoScalarReplacement(GlobalVariable *GV,
+                                  std::vector<Value *> &Elts,
+                                  IRBuilder<> &Builder, bool bFlatVector,
+                                  bool SupportsVectors, bool hasPrecise,
+                                  DxilTypeSystem &typeSys, const DataLayout &DL,
+                                  SmallVector<Value *, 32> &DeadInsts,
+                                  DominatorTree *DT);
   static unsigned GetEltAlign(unsigned ValueAlign, const DataLayout &DL,
                               Type *EltTy, unsigned Offset);
   // Lower memcpy related to V.
@@ -1714,6 +1716,7 @@ bool isGroupShareOrConstStaticArray(GlobalVariable *GV) {
 
 bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
   Module &M = *HLM.GetModule();
+  bool SupportsVectors = HLM.GetShaderModel()->IsSM69Plus();
   DxilTypeSystem &typeSys = HLM.GetTypeSystem();
 
   const DataLayout &DL = M.getDataLayout();
@@ -1878,7 +1881,8 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
         uint64_t NumInstances = 1;
         bool SROAed = SROA_Helper::DoScalarReplacement(
             AI, Elts, BrokenUpTy, NumInstances, Builder,
-            /*bFlatVector*/ true, hasPrecise, typeSys, DL, DeadInsts, &DT);
+            /*bFlatVector*/ true, SupportsVectors, hasPrecise, typeSys, DL,
+            DeadInsts, &DT);
 
         if (SROAed) {
           Type *Ty = AI->getAllocatedType();
@@ -1945,7 +1949,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
         continue;
       }
 
-      // Flat Global vector if no dynamic vector indexing.
+      // Flatten global vector if it has no dynamic vector indexing.
       bool bFlatVector = !hasDynamicVectorIndexing(GV);
 
       if (bFlatVector) {
@@ -1981,7 +1985,7 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) {
         // SROA_Parameter_HLSL has no access to a domtree, if one is needed,
         // it'll be generated
         SROAed = SROA_Helper::DoScalarReplacement(
-            GV, Elts, Builder, bFlatVector,
+            GV, Elts, Builder, bFlatVector, SupportsVectors,
             // TODO: set precise.
             /*hasPrecise*/ false, typeSys, DL, DeadInsts, /*DT*/ nullptr);
       }
@@ -2771,6 +2775,14 @@ void SROA_Helper::RewriteCall(CallInst *CI) {
         RewriteCallArg(CI, HLOperandIndex::kCallShaderPayloadOpIdx,
                        /*bIn*/ true, /*bOut*/ true);
       } break;
+      case IntrinsicOp::MOP_DxHitObject_MakeMiss: {
+        if (OldVal ==
+            CI->getArgOperand(HLOperandIndex::kHitObjectMakeMissRayDescOpIdx)) {
+          RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts,
+                                              /*loadElts*/ true);
+          DeadInsts.push_back(CI);
+        }
+      } break;
       case IntrinsicOp::MOP_TraceRayInline: {
         if (OldVal ==
             CI->getArgOperand(HLOperandIndex::kTraceRayInlineRayDescOpIdx)) {
@@ -2920,7 +2932,8 @@ static ArrayType *CreateNestArrayTy(Type *FinalEltTy,
 bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
                                       Type *&BrokenUpTy, uint64_t &NumInstances,
                                       IRBuilder<> &Builder, bool bFlatVector,
-                                      bool hasPrecise, DxilTypeSystem &typeSys,
+                                      bool SupportsVectors, bool hasPrecise,
+                                      DxilTypeSystem &typeSys,
                                       const DataLayout &DL,
                                       SmallVector<Value *, 32> &DeadInsts,
                                       DominatorTree *DT) {
@@ -3033,6 +3046,10 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
       if (!bFlatVector)
         return false;
 
+      // Skip vector where supported if it has more than 1 element.
+      if (SupportsVectors && ElTy->getVectorNumElements() > 1)
+        return false;
+
       // for array of vector
       // split into arrays of scalar
       VectorType *ElVT = cast<VectorType>(ElTy);
@@ -3114,13 +3131,11 @@ unsigned SROA_Helper::GetEltAlign(unsigned ValueAlign, const DataLayout &DL,
 
 /// DoScalarReplacement - Split V into AllocaInsts with Builder and save the new
 /// AllocaInsts into Elts. Then do SROA on V.
-bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV,
-                                      std::vector<Value *> &Elts,
-                                      IRBuilder<> &Builder, bool bFlatVector,
-                                      bool hasPrecise, DxilTypeSystem &typeSys,
-                                      const DataLayout &DL,
-                                      SmallVector<Value *, 32> &DeadInsts,
-                                      DominatorTree *DT) {
+bool SROA_Helper::DoScalarReplacement(
+    GlobalVariable *GV, std::vector<Value *> &Elts, IRBuilder<> &Builder,
+    bool bFlatVector, bool SupportsVectors, bool hasPrecise,
+    DxilTypeSystem &typeSys, const DataLayout &DL,
+    SmallVector<Value *, 32> &DeadInsts, DominatorTree *DT) {
   DEBUG(dbgs() << "Found inst to SROA: " << *GV << '\n');
   Type *Ty = GV->getType();
   // Skip none pointer types.
@@ -3134,6 +3149,9 @@ bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV,
   // Skip basic types.
   if (Ty->isSingleValueType() && !Ty->isVectorTy())
     return false;
+  // Skip vector where supported if it has more than 1 element.
+  if (Ty->isVectorTy() && SupportsVectors && Ty->getVectorNumElements() > 1)
+    return false;
   // Skip matrix types.
   if (HLMatrixType::isa(Ty))
     return false;
@@ -3240,6 +3258,10 @@ bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV,
       if (!bFlatVector)
         return false;
 
+      // Skip vector where supported if it has more than 1 element.
+      if (SupportsVectors && ElTy->getVectorNumElements() > 1)
+        return false;
+
       // for array of vector
       // split into arrays of scalar
       VectorType *ElVT = cast<VectorType>(ElTy);
@@ -5277,6 +5299,8 @@ void SROA_Parameter_HLSL::flattenArgument(
     std::vector<DxilParameterAnnotation> &FlatAnnotationList,
     BasicBlock *EntryBlock, ArrayRef<DbgDeclareInst *> DDIs) {
   std::deque<AnnotatedValue> WorkList;
+  bool SupportsVectors = m_pHLModule->GetShaderModel()->IsSM69Plus();
+
   WorkList.push_back({Arg, paramAnnotation});
 
   unsigned startArgIndex = FlatAnnotationList.size();
@@ -5351,8 +5375,8 @@ void SROA_Parameter_HLSL::flattenArgument(
       // DomTree isn't used by arguments
       SROAed = SROA_Helper::DoScalarReplacement(
           V, Elts, BrokenUpTy, NumInstances, Builder,
-          /*bFlatVector*/ false, annotation.IsPrecise(), dxilTypeSys, DL,
-          DeadInsts, /*DT*/ nullptr);
+          /*bFlatVector*/ false, SupportsVectors, annotation.IsPrecise(),
+          dxilTypeSys, DL, DeadInsts, /*DT*/ nullptr);
     }
 
     if (SROAed) {
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 729771c7c7..730354af99 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -14,6 +14,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "dxc/DXIL/DxilModule.h"
+#include "dxc/DXIL/DxilUtil.h"
+
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
@@ -151,6 +154,7 @@ class Scalarizer : public FunctionPass,
 
 // HLSL Change Begin
   bool AllowFolding = false;
+  bool SupportsVectors = false;
   Scalarizer(bool AllowFolding) :
     FunctionPass(ID),
     AllowFolding(AllowFolding) {
@@ -290,6 +294,13 @@ bool Scalarizer::doInitialization(Module &M) {
 }
 
 bool Scalarizer::runOnFunction(Function &F) {
+  // HLSL Change start - set SupportsVectors
+  const Module *M = F.getParent();
+  unsigned Major = 0, Minor = 0;
+  if (hlsl::dxilutil::LoadDxilVersion(M, Major, Minor))
+    SupportsVectors = (Major == 1 && Minor >= 9);
+  // HLSL Change end - set SupportsVectors
+
   for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
     BasicBlock *BB = BBI;
     for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
@@ -436,7 +447,8 @@ bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
 template<typename Splitter>
 bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
   VectorType *VT = dyn_cast<VectorType>(I.getType());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   unsigned NumElems = VT->getNumElements();
@@ -457,7 +469,8 @@ bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
 
 bool Scalarizer::visitSelectInst(SelectInst &SI) {
   VectorType *VT = dyn_cast<VectorType>(SI.getType());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   unsigned NumElems = VT->getNumElements();
@@ -500,7 +513,8 @@ bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) {
 
 bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
   VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   IRBuilder<> Builder(GEPI.getParent(), &GEPI);
@@ -534,7 +548,8 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
 
 bool Scalarizer::visitCastInst(CastInst &CI) {
   VectorType *VT = dyn_cast<VectorType>(CI.getDestTy());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   unsigned NumElems = VT->getNumElements();
@@ -559,6 +574,12 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
 
   unsigned DstNumElems = DstVT->getNumElements();
   unsigned SrcNumElems = SrcVT->getNumElements();
+
+  // HLSL Change Begin - allow > 1 vectors where supported.
+  if (SupportsVectors && (DstNumElems > 1 || SrcNumElems > 1))
+    return false;
+  // HLSL Change End - allow > 1 vectors where supported.
+
   IRBuilder<> Builder(BCI.getParent(), &BCI);
   Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
@@ -609,7 +630,8 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
 
 bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   VectorType *VT = dyn_cast<VectorType>(SVI.getType());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   unsigned NumElems = VT->getNumElements();
@@ -643,7 +665,8 @@ bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
 
 bool Scalarizer::visitPHINode(PHINode &PHI) {
   VectorType *VT = dyn_cast<VectorType>(PHI.getType());
-  if (!VT)
+  // HLSL Change - allow > 1 vectors where supported.
+  if (!VT || (SupportsVectors && VT->getNumElements() > 1))
     return false;
 
   unsigned NumElems = VT->getNumElements();
@@ -679,6 +702,10 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) {
     return false;
 
   unsigned NumElems = Layout.VecTy->getNumElements();
+  // HLSL Change Begin - allow > 1 vectors where supported.
+  if (SupportsVectors && NumElems > 1)
+    return false;
+  // HLSL Change End - allow > 1 vectors where supported.
   IRBuilder<> Builder(LI.getParent(), &LI);
   Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
@@ -705,6 +732,10 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
     return false;
 
   unsigned NumElems = Layout.VecTy->getNumElements();
+  // HLSL Change Begin - allow > 1 vectors where supported.
+  if (SupportsVectors && NumElems > 1)
+    return false;
+  // HLSL Change End - allow > 1 vectors where supported.
   IRBuilder<> Builder(SI.getParent(), &SI);
   Builder.AllowFolding = this->AllowFolding; // HLSL Change
   Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index f0d2dbcd7a..46294b3db8 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -13,7 +13,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -29,7 +28,9 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <map>
@@ -473,6 +474,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
                                      const char *NameSuffix, 
                                      ClonedCodeInfo *CodeInfo,
                                      CloningDirector *Director) {
+  TimeTraceScope TimeScope("CloneAndPruneIntoFromInst", [&] {
+    return (Twine(OldFunc->getName()) + "->" + NewFunc->getName()).str();
+  });
   assert(NameSuffix && "NameSuffix cannot be null!");
 
   ValueMapTypeRemapper *TypeMapper = nullptr;
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index f6a255a0e4..bfa4b61fbe 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -12,10 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -24,13 +23,13 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
@@ -38,8 +37,10 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -291,6 +292,8 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
 /// non-aliasing property communicated by the metadata could have
 /// call-site-specific control dependencies).
 static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
+  TimeTraceScope TimeScope("CloneAliasScopeMetadata",
+                           [&] { return CS.getCalledFunction()->getName(); });
   const Function *CalledFunc = CS.getCalledFunction();
   SetVector<const MDNode *> MD;
 
@@ -401,6 +404,8 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
 /// non-derived loads, stores and memory intrinsics with the new alias scopes.
 static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
                                   const DataLayout &DL, AliasAnalysis *AA) {
+  TimeTraceScope TimeScope("AddAliasScopeMetadata",
+                           [&] { return CS.getCalledFunction()->getName(); });
   if (!EnableNoAliasConversion)
     return;
 
@@ -872,6 +877,7 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx,
 /// to encode location where these instructions are inlined.
 static void fixupLineNumbers(Function *Fn, Function::iterator FI,
                              Instruction *TheCall) {
+  TimeTraceScope TimeScope("fixupLineNumbers", [&] { return Fn->getName(); });
   DebugLoc TheCallDL = TheCall->getDebugLoc();
 #if 0 // HLSL Change
   if (!TheCallDL)
diff --git a/tools/clang/CMakeLists.txt b/tools/clang/CMakeLists.txt
index 71190336ca..449e6c28b4 100644
--- a/tools/clang/CMakeLists.txt
+++ b/tools/clang/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 2.8.8)
+cmake_minimum_required(VERSION 3.17.2) # HLSL Change - Require CMake 3.17.2.
 
 # FIXME: It may be removed when we use 2.8.12.
 if(CMAKE_VERSION VERSION_LESS 2.8.12)
diff --git a/tools/clang/include/clang/AST/DeclCXX.h b/tools/clang/include/clang/AST/DeclCXX.h
index 3b07576545..36e0f99c82 100644
--- a/tools/clang/include/clang/AST/DeclCXX.h
+++ b/tools/clang/include/clang/AST/DeclCXX.h
@@ -465,6 +465,10 @@ class CXXRecordDecl : public RecordDecl {
     /// \brief Whether we are currently parsing base specifiers.
     bool IsParsingBaseSpecifiers : 1;
 
+    /// \brief Whether this class contains at least one member or base
+    ///  class containing an HLSL vector longer than 4 elements.
+    bool HasHLSLLongVector : 1;
+
     /// \brief The number of base class specifiers in Bases.
     unsigned NumBases;
 
@@ -1018,6 +1022,13 @@ class CXXRecordDecl : public RecordDecl {
     return data().NeedOverloadResolutionForDestructor;
   }
 
+  // HLSL Change add HLSL Long vector bit.
+  /// \brief Determine whether this class contains an HLSL long vector
+  /// of over 4 elements.
+  bool hasHLSLLongVector() { return data().HasHLSLLongVector; }
+  /// \brief Set that this class contains an HLSL long vector of over 4 elements
+  bool setHasHLSLLongVector() { return data().HasHLSLLongVector = true; }
+
   /// \brief Determine whether this class describes a lambda function object.
   bool isLambda() const {
     // An update record can't turn a non-lambda into a lambda.
diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h
index 2aa9afa5f9..3a02824b3a 100644
--- a/tools/clang/include/clang/AST/HlslTypes.h
+++ b/tools/clang/include/clang/AST/HlslTypes.h
@@ -6,6 +6,9 @@
 // This file is distributed under the University of Illinois Open Source     //
 // License. See LICENSE.TXT for details.                                     //
 //                                                                           //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.              //
+// All rights reserved.                                                      //
+//                                                                           //
 ///
 /// \file                                                                    //
 /// \brief Defines the HLSL type system interface.                           //
@@ -31,6 +34,7 @@
 namespace clang {
 class ASTContext;
 class AttributeList;
+class CXXConstructorDecl;
 class CXXMethodDecl;
 class CXXRecordDecl;
 class ClassTemplateDecl;
@@ -348,9 +352,10 @@ void AddHLSLNodeOutputRecordTemplate(
     _Outptr_ clang::ClassTemplateDecl **outputRecordTemplateDecl,
     bool isCompleteType = true);
 
-clang::CXXRecordDecl *DeclareRecordTypeWithHandle(clang::ASTContext &context,
-                                                  llvm::StringRef name,
-                                                  bool isCompleteType = true);
+clang::CXXRecordDecl *
+DeclareRecordTypeWithHandle(clang::ASTContext &context, llvm::StringRef name,
+                            bool isCompleteType = true,
+                            clang::InheritableAttr *Attr = nullptr);
 
 void AddRaytracingConstants(clang::ASTContext &context);
 void AddSamplerFeedbackConstants(clang::ASTContext &context);
@@ -381,15 +386,16 @@ clang::CXXRecordDecl *DeclareTemplateTypeWithHandleInDeclContext(
 
 clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandle(
     clang::ASTContext &context, llvm::StringRef typeName,
-    llvm::StringRef templateParamName,
-    clang::TagTypeKind tagKind = clang::TagTypeKind::TTK_Class);
+    llvm::StringRef templateParamName, clang::InheritableAttr *Attr = nullptr);
 clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandleInDeclContext(
     clang::ASTContext &context, clang::DeclContext *declContext,
     llvm::StringRef typeName, llvm::StringRef templateParamName,
-    clang::TagTypeKind tagKind = clang::TagTypeKind::TTK_Class);
-clang::CXXRecordDecl *DeclareConstantBufferViewType(clang::ASTContext &context,
-                                                    bool bTBuf);
+    clang::InheritableAttr *Attr = nullptr);
+clang::CXXRecordDecl *
+DeclareConstantBufferViewType(clang::ASTContext &context,
+                              clang::InheritableAttr *Attr);
 clang::CXXRecordDecl *DeclareRayQueryType(clang::ASTContext &context);
+clang::CXXRecordDecl *DeclareHitObjectType(clang::NamespaceDecl &NSDecl);
 clang::CXXRecordDecl *DeclareResourceType(clang::ASTContext &context,
                                           bool bSampler);
 
@@ -400,6 +406,10 @@ DeclareNodeOrRecordType(clang::ASTContext &Ctx, DXIL::NodeIOKind Type,
                         bool IsCompleteType = false);
 
 #ifdef ENABLE_SPIRV_CODEGEN
+clang::CXXRecordDecl *
+DeclareVkBufferPointerType(clang::ASTContext &context,
+                           clang::DeclContext *declContext);
+
 clang::CXXRecordDecl *DeclareInlineSpirvType(clang::ASTContext &context,
                                              clang::DeclContext *declContext,
                                              llvm::StringRef typeName,
@@ -425,7 +435,7 @@ clang::VarDecl *DeclareBuiltinGlobal(llvm::StringRef name, clang::QualType Ty,
 /// method.</summary> <param name="context">AST context in which to
 /// work.</param> <param name="recordDecl">Class in which the function template
 /// is declared.</param> <param name="functionDecl">Function for which a
-/// template is created.</params> <param
+/// template is created.</param> <param
 /// name="templateParamNamedDecls">Declarations for templates to the
 /// function.</param> <param name="templateParamNamedDeclsCount">Count of
 /// template declarations.</param> <returns>A new function template declaration
@@ -460,6 +470,7 @@ bool IsHLSLUnsigned(clang::QualType type);
 bool IsHLSLMinPrecision(clang::QualType type);
 bool HasHLSLUNormSNorm(clang::QualType type, bool *pIsSNorm = nullptr);
 bool HasHLSLGloballyCoherent(clang::QualType type);
+bool HasHLSLReorderCoherent(clang::QualType type);
 bool IsHLSLInputPatchType(clang::QualType type);
 bool IsHLSLOutputPatchType(clang::QualType type);
 bool IsHLSLPointStreamType(clang::QualType type);
@@ -471,6 +482,7 @@ bool IsHLSLNodeInputType(clang::QualType type);
 bool IsHLSLDynamicResourceType(clang::QualType type);
 bool IsHLSLDynamicSamplerType(clang::QualType type);
 bool IsHLSLNodeType(clang::QualType type);
+bool IsHLSLHitObjectType(clang::QualType type);
 
 bool IsHLSLObjectWithImplicitMemberAccess(clang::QualType type);
 bool IsHLSLObjectWithImplicitROMemberAccess(clang::QualType type);
@@ -530,6 +542,29 @@ bool DoesTypeDefineOverloadedOperator(clang::QualType typeWithOperator,
                                       clang::QualType paramType);
 bool IsPatchConstantFunctionDecl(const clang::FunctionDecl *FD);
 
+#ifdef ENABLE_SPIRV_CODEGEN
+bool IsVKBufferPointerType(clang::QualType type);
+clang::QualType GetVKBufferPointerBufferType(clang::QualType type);
+unsigned GetVKBufferPointerAlignment(clang::QualType type);
+#endif
+
+/// <summary>Adds a constructor declaration to the specified class
+/// record.</summary> <param name="context">ASTContext that owns
+/// declarations.</param> <param name="recordDecl">Record declaration in which
+/// to add constructor.</param> <param name="resultType">Result type for
+/// constructor.</param> <param name="paramTypes">Types for constructor
+/// parameters.</param> <param name="paramNames">Names for constructor
+/// parameters.</param> <param name="declarationName">Name for
+/// constructor.</param> <param name="isConst">Whether the constructor is a
+/// const function.</param> <returns>The method declaration for the
+/// constructor.</returns>
+clang::CXXConstructorDecl *CreateConstructorDeclarationWithParams(
+    clang::ASTContext &context, clang::CXXRecordDecl *recordDecl,
+    clang::QualType resultType, llvm::ArrayRef<clang::QualType> paramTypes,
+    llvm::ArrayRef<clang::StringRef> paramNames,
+    clang::DeclarationName declarationName, bool isConst,
+    bool isTemplateFunction = false);
+
 /// <summary>Adds a function declaration to the specified class
 /// record.</summary> <param name="context">ASTContext that owns
 /// declarations.</param> <param name="recordDecl">Record declaration in which
@@ -544,6 +579,7 @@ clang::CXXMethodDecl *CreateObjectFunctionDeclarationWithParams(
     clang::QualType resultType, llvm::ArrayRef<clang::QualType> paramTypes,
     llvm::ArrayRef<clang::StringRef> paramNames,
     clang::DeclarationName declarationName, bool isConst,
+    clang::StorageClass SC = clang::StorageClass::SC_None,
     bool isTemplateFunction = false);
 
 DXIL::ResourceClass GetResourceClassForType(const clang::ASTContext &context,
diff --git a/tools/clang/include/clang/AST/OperationKinds.h b/tools/clang/include/clang/AST/OperationKinds.h
index 75e665a5e9..3909c8b5e8 100644
--- a/tools/clang/include/clang/AST/OperationKinds.h
+++ b/tools/clang/include/clang/AST/OperationKinds.h
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 // This file enumerates the different kinds of operations that can be
@@ -321,6 +324,8 @@ enum CastKind {
   CK_HLSLCC_FloatingToIntegral,
   CK_HLSLCC_FloatingToBoolean,
   CK_HLSLCC_FloatingCast,
+  CK_VK_BufferPointerToIntegral,
+  CK_VK_IntegralToBufferPointer,
 
   // HLSL Change - Made CK_Invalid an enum case because otherwise it is UB to
   // assign it to a value of CastKind.
diff --git a/tools/clang/include/clang/AST/Type.h b/tools/clang/include/clang/AST/Type.h
index f393f88ce9..2c96bbc295 100644
--- a/tools/clang/include/clang/AST/Type.h
+++ b/tools/clang/include/clang/AST/Type.h
@@ -3652,7 +3652,8 @@ class AttributedType : public Type, public llvm::FoldingSetNode {
     attr_hlsl_row_major,
     attr_hlsl_column_major,
     attr_hlsl_globallycoherent,
-    // HLSL Change Ends    
+    attr_hlsl_reordercoherent,
+    // HLSL Change Ends
   };
 
 private:
diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td
index 3a6718a339..2518423565 100644
--- a/tools/clang/include/clang/Basic/Attr.td
+++ b/tools/clang/include/clang/Basic/Attr.td
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 class DocumentationCategory<string name> {
@@ -851,6 +854,12 @@ def HLSLGloballyCoherent : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def HLSLReorderCoherent : InheritableAttr {
+  let Spellings = [CXX11<"", "reordercoherent", 2015>];
+  let Subjects = SubjectList<[Var, Function]>;
+  let Documentation = [Undocumented];
+}
+
 def HLSLShader : InheritableAttr {
   let Spellings = [CXX11<"", "shader", 2017>];
   let Args = [StringArgument<"stage">]; // one of compute, pixel, vertex, hull, domain, geometry, node
@@ -939,6 +948,52 @@ def HLSLCXXOverload : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def HLSLVector : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+}
+
+def HLSLMatrix : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+}
+
+def HLSLTessPatch : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Args = [BoolArgument<"IsInput">];
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+}
+
+def HLSLStreamOutput : InheritableAttr {
+  let Spellings = []; // No spellings!
+  // PrimVertices are the number of vertices that make up the streamed
+  // primitive. Points have 1. Lines have 2. Triangles have 3.
+  let Args = [UnsignedArgument<"PrimVertices">];
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+}
+
+def HLSLResource : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Args = [UnsignedArgument<"ResKindUint">,
+              UnsignedArgument<"ResClassUint">];
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+
+  // Add enum typed getters for safety and brevity.
+  let AdditionalMembers = [{
+  hlsl::DXIL::ResourceKind getResKind() const {
+    return (hlsl::DXIL::ResourceKind)getResKindUint();
+  }
+  hlsl::DXIL::ResourceClass getResClass() const {
+    return (hlsl::DXIL::ResourceClass)getResClassUint();
+  }
+  }];
+}
+
 def HLSLNodeLaunch : InheritableAttr {
   let Spellings = [CXX11<"", "nodelaunch", 2017>];
   let Args = [StringArgument<"LaunchType">]; // one of broadcasting, coalescing, thread
@@ -992,13 +1047,6 @@ def HLSLNodeTrackRWInputSharing : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
-def HLSLResource : InheritableAttr {
-  let Spellings = []; // No spellings!
-  let Args = [UnsignedArgument<"ResKind">, UnsignedArgument<"ResClass">];
-  let Subjects = SubjectList<[CXXRecord]>;
-  let Documentation = [Undocumented];
-}
-
 def HLSLNodeObject : InheritableAttr {
   let Spellings = []; // No spellings!
   let Subjects = SubjectList<[CXXRecord]>;
@@ -1110,6 +1158,28 @@ def HLSLNodeObject : InheritableAttr {
     }];
 }
 
+// HLSL Ray Query Attribute
+
+def HLSLRayQueryObject : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+}
+
+def HLSLSubObject : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+  let Args = [UnsignedArgument<"SubObjKindUint">, UnsignedArgument<"HitGroupType">];
+}
+
+// HLSL HitObject Attribute
+
+def HLSLHitObject : InheritableAttr {
+  let Spellings = []; // No spellings!
+  let Subjects = SubjectList<[CXXRecord]>;
+  let Documentation = [Undocumented];
+}
 
 // HLSL Parameter Attributes
 
@@ -1386,6 +1456,20 @@ def VKStorageClassExt : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def VKBufferPointer : InheritableAttr {
+  let Spellings = [CXX11<"", "hlsl_vk_buffer_pointer", 2021>];
+  let LangOpts = [SPIRV];
+  let Documentation = [Undocumented];
+}
+
+def VKAliasedPointer : InheritableAttr {
+  let Spellings = [CXX11<"vk", "aliased_pointer">];
+  let Subjects = SubjectList<[Var, ParmVar], ErrorDiag>;
+  let Args = [];
+  let LangOpts = [SPIRV];
+  let Documentation = [Undocumented];
+}
+
 // Global variables that are of struct type
 def StructGlobalVar : SubsetSubject<Var, [{S->hasGlobalStorage() && S->getType()->isStructureType()}]>;
 
diff --git a/tools/clang/include/clang/Basic/DiagnosticGroups.td b/tools/clang/include/clang/Basic/DiagnosticGroups.td
index 39618aed04..ff21b34652 100644
--- a/tools/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/tools/clang/include/clang/Basic/DiagnosticGroups.td
@@ -799,10 +799,12 @@ def HLSLPayloadAccessQualifer: DiagGroup<"payload-access-qualifier", [
      HLSLPayloadAccessQualiferPerf,
      HLSLPayloadAccessQualiferCall
   ]>;
+def HLSLRayQueryFlags : DiagGroup<"hlsl-rayquery-flags">;
 def HLSLSemanticIdentifierCollision : DiagGroup<"semantic-identifier-collision">;
 def HLSLStructurizeExitsLifetimeMarkersConflict: DiagGroup<"structurize-exits-lifetime-markers-conflict">;
 def HLSLParameterUsage : DiagGroup<"parameter-usage">;
 def HLSLAvailability: DiagGroup<"hlsl-availability">;
+def HLSLAvailabilityConstant: DiagGroup<"hlsl-availability-constant">;
 def HLSLBarrier : DiagGroup<"hlsl-barrier">;
 def HLSLLegacyLiterals : DiagGroup<"hlsl-legacy-literal">;
 // HLSL Change Ends
diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 99b6534e1f..6254e5fc71 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -7519,8 +7522,8 @@ def err_hlsl_half_load_store: Error<
   "LoadHalf and StoreHalf are not supported for min precision mode">;
 def err_hlsl_interfaces_cannot_inherit: Error<
   "interfaces cannot inherit from other types">;
-def err_hlsl_invalid_range_1_4: Error<
-  "invalid value, valid range is between 1 and 4 inclusive">;
+def err_hlsl_invalid_range_1_to_max
+    : Error<"invalid value, valid range is between 1 and %0 inclusive">;
 def err_hlsl_matrix_member_bad_format: Error<
   "invalid format for matrix subscript '%0'">;
 def err_hlsl_matrix_member_empty: Error<
@@ -7549,6 +7552,8 @@ def err_hlsl_vector_element_index_out_of_bounds: Error<
   "vector element index '%0' is out of bounds">;
 def err_hlsl_vector_member_too_many_positions: Error<
   "more than four positions are referenced in '%0'">;
+def err_hlsl_vector_member_on_long_vector: Error<
+   "invalid swizzle '%0' on vector of over 4 elements.">;
 def err_hlsl_missing_type_specifier : Error< // Patterened after err_missing_type_specifier
   "HLSL requires a type specifier for all declarations">;
 def err_hlsl_multiple_concrete_bases : Error<
@@ -7652,8 +7657,20 @@ def err_payload_fields_is_payload_and_overqualified : Error<
   "payload field '%0' is a payload struct. Payload access qualifiers are not allowed on payload types.">;
 def warn_hlsl_payload_qualifer_dropped : Warning<
   "payload access qualifiers ignored. These are only supported for lib_6_7+ targets and lib_6_6 with with the -enable-payload-qualifiers flag.">, InGroup<HLSLPayloadAccessQualifer>;
+def warn_hlsl_rayquery_flags_disallowed : Warning<
+  "A non-zero value for the RayQueryFlags template argument requires"
+  " shader model 6.9 or above.">, DefaultError, InGroup<HLSLAvailability>;
+def warn_hlsl_rayquery_flags_conflict : Warning<
+  "When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags"
+  " must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.">, DefaultError, InGroup<HLSLRayQueryFlags>;
 def err_hlsl_unsupported_builtin_op: Error<
   "operator cannot be used with built-in type %0">;
+def warn_hlsl_builtin_constant_unavailable: Warning<
+  "potential misuse of built-in constant %0 in shader model %1; introduced"
+  " in shader model %2">, InGroup<HLSLAvailabilityConstant>;
+def warn_hlsl_builtin_type_unavailable: Warning<
+  "potential misuse of built-in type %0 in shader model %1; introduced"
+  " in shader model %2">, DefaultError, InGroup<HLSLAvailability>;
 def err_hlsl_unsupported_char_literal : Error<
   "unsupported style of char literal - use a single-character char-based literal">;
 def err_hlsl_unsupported_clipplane_argument_expression : Error<
@@ -7689,8 +7706,10 @@ def err_hlsl_varmodifierna : Error<
   "%0 is not a valid modifier for a %1">;
 def err_hlsl_varmodifierna_decltype : Error<
   "%0 is not a valid modifier for a declaration of type %1">;
-def note_hlsl_globallycoherent_applies_to : Note<
-  "'globallycoherent' can only be applied to UAV or RWDispatchNodeInputRecord objects">;
+def note_hlsl_coherence_applies_to : Note<
+  "'%select{reordercoherent|globallycoherent}0' can only be applied to UAV%select{| or RWDispatchNodeInputRecord}0 objects">;
+def warn_hlsl_gc_implies_rc_attribute : Warning<
+  "attribute 'reordercoherent' implied by 'globallycoherent' in %0. 'reordercoherent' ignored.">;
 def err_hlsl_varmodifiersna : Error<
   "%0 and %1 cannot be used together for a %2">;
 def err_hlsl_vla : Error< // Patterened after err_opencl_vla
@@ -7701,8 +7720,6 @@ def err_hlsl_control_flow_cond_not_scalar : Error<
   "%0 statement conditional expressions must evaluate to a scalar">;
 def err_hlsl_unsupportedvectortype : Error<
   "%0 is declared with type %1, but only primitive scalar values are supported">;
-def err_hlsl_unsupportedvectorsize : Error<
-  "%0 is declared with size %1, but only values 1 through 4 are supported">;
 def err_hlsl_unsupportedmatrixsize : Error<
   "%0 is declared with size %1x%2, but only values 1 through 4 are supported">;
 def err_hlsl_norm_float_only : Error<
@@ -7741,9 +7758,17 @@ def warn_hlsl_semantic_attribute_position_misuse_hint: Warning<
 def warn_hlsl_unary_negate_unsigned : Warning<
   "unary negate of unsigned value is still unsigned">,
   InGroup<Conversion>, DefaultWarn;
-def warn_hlsl_impcast_glc_mismatch : Warning<
-  "implicit conversion from %0 to %1 %select{loses|adds}2 globallycoherent annotation">,
-  InGroup<Conversion>, DefaultWarn;
+def warn_hlsl_impcast_coherence_mismatch : Warning<
+  "implicit conversion from %0 to %1 %select{"
+  "demotes globallycoherent to reordercoherent|"
+  "promotes reordercoherent to globallycoherent|"
+  "loses reordercoherent|"
+  "loses globallycoherent|"
+  "adds reordercoherent|"
+  "adds globallycoherent}2 annotation">,
+  InGroup<Conversion>;
+def warn_hlsl_glc_implies_rdc : Warning<
+  "attribute 'globallycoherent' implies 'reordercoherent'">, InGroup<IgnoredAttributes>;
 def warn_hlsl_narrowing : Warning<
   "conversion from larger type %0 to smaller type %1, possible loss of data">,
   InGroup<Conversion>, DefaultWarn;
@@ -7826,7 +7851,7 @@ def warn_hlsl_intrinsic_in_wrong_shader_model : Warning<
    "intrinsic %0 potentially used by '%1' requires shader model %2 or greater">,
     DefaultError, InGroup<HLSLAvailability>;
 def warn_hlsl_intrinsic_overload_in_wrong_shader_model : Warning<
-   "overload of intrinsic %0 requires shader model %1 or greater">, 
+   "overload of intrinsic %0 requires shader model %1 or greater">,
     DefaultError, InGroup<HLSLAvailability>;
 def err_hlsl_intrinsic_template_arg_unsupported: Error<
    "Explicit template arguments on intrinsic %0 are not supported">;
@@ -7853,6 +7878,14 @@ def err_hlsl_load_from_mesh_out_arrays: Error<
    "output arrays of a mesh shader can not be read from">;
 def err_hlsl_out_indices_array_incorrect_access: Error<
    "a vector in out indices array must be accessed as a whole">;
+def err_hlsl_unsupported_long_vector
+    : Error<"vectors of over 4 elements in "
+    "%select{ConstantBuffers or TextureBuffers|"
+    "tessellation patches|geometry streams|node records|"
+    "cbuffers or tbuffers|user-defined struct parameter|"
+    "entry function parameters|entry function return type|"
+    "patch constant function parameters|patch constant function return type|"
+    "payload parameters}0 are not supported">;
 def err_hlsl_logical_binop_scalar : Error<
    "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">;
 def err_hlsl_ternary_scalar : Error<
@@ -7953,7 +7986,7 @@ def err_hlsl_barrier_invalid_memory_flags: Error<
   "UAV_MEMORY, GROUP_SHARED_MEMORY, NODE_INPUT_MEMORY, NODE_OUTPUT_MEMORY flags">;
 def err_hlsl_barrier_invalid_semantic_flags: Error<
   "invalid SemanticFlags for Barrier operation; expected 0 or some combination of "
-  "GROUP_SYNC, GROUP_SCOPE, DEVICE_SCOPE flags">;
+  "GROUP_SYNC, GROUP_SCOPE, DEVICE_SCOPE%select{|, REORDER_SCOPE}0 flags">;
 def warn_hlsl_barrier_group_memory_requires_group: Warning<
   "GROUP_SHARED_MEMORY specified for Barrier operation when context has no visible group">,
   InGroup<HLSLBarrier>, DefaultError;
@@ -7974,10 +8007,20 @@ def warn_hlsl_legacy_integer_literal_signedness: Warning<
   InGroup<HLSLLegacyLiterals>, DefaultIgnore;
 def err_hlsl_unsupported_semantic_index: Error<
   "'%0' is defined with semantic index %1, but only values 0 through %2 are supported">;
+
+// Shader Execution Reordering
+def err_hlsl_reorder_unsupported_stage : Error<
+   "dx::MaybeReorderThread is unavailable in shader stage '%0' (requires 'raygeneration')">;
+def err_hlsl_hitobject_unsupported_stage : Error<
+   "dx::HitObject is unavailable in shader stage '%0' (requires 'raygeneration', 'closesthit' or 'miss')">;
 // HLSL Change Ends
 
 // SPIRV Change Starts
 def err_hlsl_vulkan_specific_feature: Error<"%0 is a Vulkan specific feature">;
+def err_hlsl_vk_pointer_cast_alignment: Error<
+  "Vulkan buffer pointer cannot be cast to greater alignment">;
+def err_hlsl_vk_static_pointer_cast_type: Error<
+  "vk::static_pointer_cast() content type must be base class of argument's content type">;
 // SPIRV Change Ends
 
 let CategoryName = "OpenMP Issue" in {
diff --git a/tools/clang/include/clang/Basic/LangOptions.h b/tools/clang/include/clang/Basic/LangOptions.h
index 8dc15da5d8..433b767c8d 100644
--- a/tools/clang/include/clang/Basic/LangOptions.h
+++ b/tools/clang/include/clang/Basic/LangOptions.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_CLANG_BASIC_LANGOPTIONS_H
 #define LLVM_CLANG_BASIC_LANGOPTIONS_H
 
-#include "dxc/DXIL/DxilConstants.h" // For DXIL::DefaultLinkage
+#include "dxc/DXIL/DxilConstants.h" // For DXIL:: default values.
 #include "dxc/Support/HLSLVersion.h"
 #include "clang/Basic/CommentOptions.h"
 #include "clang/Basic/LLVM.h"
@@ -168,6 +168,7 @@ class LangOptions : public LangOptionsBase {
       hlsl::DXIL::DefaultLinkage::Default;
   /// Whether use row major as default matrix major.
   bool HLSLDefaultRowMajor = false;
+  unsigned MaxHLSLVectorLength = hlsl::DXIL::kDefaultMaxVectorLength;
   // HLSL Change Ends
 
   bool SPIRV = false;  // SPIRV Change
diff --git a/tools/clang/include/clang/Basic/TokenKinds.def b/tools/clang/include/clang/Basic/TokenKinds.def
index 2267b12b74..6933c965cf 100644
--- a/tools/clang/include/clang/Basic/TokenKinds.def
+++ b/tools/clang/include/clang/Basic/TokenKinds.def
@@ -508,6 +508,7 @@ KEYWORD(lineadj                     , KEYHLSL)
 KEYWORD(triangle                    , KEYHLSL)
 KEYWORD(triangleadj                 , KEYHLSL)
 KEYWORD(globallycoherent            , KEYHLSL)
+KEYWORD(reordercoherent             , KEYHLSL)
 KEYWORD(interface                   , KEYHLSL)
 KEYWORD(sampler_state               , KEYHLSL)
 KEYWORD(technique                   , KEYHLSL)
diff --git a/tools/clang/include/clang/SPIRV/FeatureManager.h b/tools/clang/include/clang/SPIRV/FeatureManager.h
index 32ee187091..3c1871df37 100644
--- a/tools/clang/include/clang/SPIRV/FeatureManager.h
+++ b/tools/clang/include/clang/SPIRV/FeatureManager.h
@@ -59,10 +59,12 @@ enum class Extension {
   KHR_physical_storage_buffer,
   KHR_vulkan_memory_model,
   NV_compute_shader_derivatives,
+  KHR_compute_shader_derivatives,
   KHR_fragment_shader_barycentric,
   KHR_maximal_reconvergence,
   KHR_float_controls,
   NV_shader_subgroup_partitioned,
+  KHR_quad_control,
   Unknown,
 };
 
@@ -132,6 +134,9 @@ class FeatureManager {
   /// Returns false otherwise.
   bool isTargetEnvVulkan1p3OrAbove();
 
+  /// Return true if the target environment is a Vulkan environment.
+  bool isTargetEnvVulkan();
+
   /// Returns the spv_target_env matching the input string if possible.
   /// This functions matches the spv_target_env with the command-line version
   /// of the name ('vulkan1.1', not 'Vulkan 1.1').
diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h
index f03735115b..5e03d1ef96 100644
--- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h
+++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_SPIRVBUILDER_H
 #define LLVM_CLANG_SPIRV_SPIRVBUILDER_H
@@ -239,7 +242,7 @@ class SpirvBuilder {
   /// \brief Creates an operation with the given OpGroupNonUniform* SPIR-V
   /// opcode.
   SpirvGroupNonUniformOp *createGroupNonUniformOp(
-      spv::Op op, QualType resultType, spv::Scope execScope,
+      spv::Op op, QualType resultType, llvm::Optional<spv::Scope> execScope,
       llvm::ArrayRef<SpirvInstruction *> operands, SourceLocation,
       llvm::Optional<spv::GroupOperation> groupOp = llvm::None);
 
@@ -273,6 +276,14 @@ class SpirvBuilder {
                                                   SpirvInstruction *sample,
                                                   SourceLocation);
 
+  /// \brief Creates an OpConverPtrToU SPIR-V instruction with the given
+  /// parameters.
+  SpirvConvertPtrToU *createConvertPtrToU(SpirvInstruction *ptr, QualType type);
+
+  /// \brief Creates an OpConverUToPtr SPIR-V instruction with the given
+  /// parameters.
+  SpirvConvertUToPtr *createConvertUToPtr(SpirvInstruction *val, QualType type);
+
   /// \brief Creates SPIR-V instructions for sampling the given image.
   ///
   /// If compareVal is given a non-zero value, *Dref* variants of OpImageSample*
diff --git a/tools/clang/include/clang/SPIRV/SpirvContext.h b/tools/clang/include/clang/SPIRV/SpirvContext.h
index e65097bedb..c18c139642 100644
--- a/tools/clang/include/clang/SPIRV/SpirvContext.h
+++ b/tools/clang/include/clang/SPIRV/SpirvContext.h
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_SPIRVCONTEXT_H
 #define LLVM_CLANG_SPIRV_SPIRVCONTEXT_H
@@ -317,6 +320,13 @@ class SpirvContext {
 
   const HybridPointerType *getPointerType(QualType pointee, spv::StorageClass);
 
+  const ForwardPointerType *getForwardPointerType(QualType pointee);
+
+  const SpirvPointerType *getForwardReference(QualType type);
+
+  void registerForwardReference(QualType type,
+                                const SpirvPointerType *pointerType);
+
   /// Generates (or reuses an existing) OpString for the given string literal.
   SpirvString *getSpirvString(llvm::StringRef str);
 
@@ -478,6 +488,8 @@ class SpirvContext {
   llvm::SmallVector<const HybridStructType *, 8> hybridStructTypes;
   llvm::DenseMap<const SpirvType *, SCToPtrTyMap> pointerTypes;
   llvm::SmallVector<const HybridPointerType *, 8> hybridPointerTypes;
+  llvm::MapVector<QualType, const ForwardPointerType *> forwardPointerTypes;
+  llvm::MapVector<QualType, const SpirvPointerType *> forwardReferences;
   llvm::DenseSet<FunctionType *, FunctionTypeMapInfo> functionTypes;
   llvm::DenseMap<unsigned, SpirvIntrinsicType *> spirvIntrinsicTypesById;
   llvm::SmallVector<const SpirvIntrinsicType *, 8> spirvIntrinsicTypes;
diff --git a/tools/clang/include/clang/SPIRV/SpirvInstruction.h b/tools/clang/include/clang/SPIRV/SpirvInstruction.h
index 7ec1375bde..f49a295610 100644
--- a/tools/clang/include/clang/SPIRV/SpirvInstruction.h
+++ b/tools/clang/include/clang/SPIRV/SpirvInstruction.h
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H
 #define LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H
@@ -67,6 +71,10 @@ class SpirvInstruction {
     IK_ConstantComposite,
     IK_ConstantNull,
 
+    // Pointer <-> uint conversions.
+    IK_ConvertPtrToU,
+    IK_ConvertUToPtr,
+
     // OpUndef
     IK_Undef,
 
@@ -1306,6 +1314,50 @@ class SpirvConstantNull : public SpirvConstant {
   bool operator==(const SpirvConstantNull &that) const;
 };
 
+class SpirvConvertPtrToU : public SpirvInstruction {
+public:
+  SpirvConvertPtrToU(SpirvInstruction *ptr, QualType type,
+                     SourceLocation loc = {}, SourceRange range = {});
+
+  DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvConvertPtrToU)
+
+  // For LLVM-style RTTI
+  static bool classof(const SpirvInstruction *inst) {
+    return inst->getKind() == IK_ConvertPtrToU;
+  }
+
+  bool operator==(const SpirvConvertPtrToU &that) const;
+
+  bool invokeVisitor(Visitor *v) override;
+
+  SpirvInstruction *getPtr() const { return ptr; }
+
+private:
+  SpirvInstruction *ptr;
+};
+
+class SpirvConvertUToPtr : public SpirvInstruction {
+public:
+  SpirvConvertUToPtr(SpirvInstruction *intValue, QualType type,
+                     SourceLocation loc = {}, SourceRange range = {});
+
+  DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvConvertUToPtr)
+
+  // For LLVM-style RTTI
+  static bool classof(const SpirvInstruction *inst) {
+    return inst->getKind() == IK_ConvertUToPtr;
+  }
+
+  bool operator==(const SpirvConvertUToPtr &that) const;
+
+  bool invokeVisitor(Visitor *v) override;
+
+  SpirvInstruction *getVal() const { return val; }
+
+private:
+  SpirvInstruction *val;
+};
+
 class SpirvUndef : public SpirvInstruction {
 public:
   SpirvUndef(QualType type);
@@ -1514,7 +1566,8 @@ class SpirvFunctionCall : public SpirvInstruction {
 /// \brief OpGroupNonUniform* instructions
 class SpirvGroupNonUniformOp : public SpirvInstruction {
 public:
-  SpirvGroupNonUniformOp(spv::Op opcode, QualType resultType, spv::Scope scope,
+  SpirvGroupNonUniformOp(spv::Op opcode, QualType resultType,
+                         llvm::Optional<spv::Scope> scope,
                          llvm::ArrayRef<SpirvInstruction *> operands,
                          SourceLocation loc,
                          llvm::Optional<spv::GroupOperation> group);
@@ -1528,7 +1581,8 @@ class SpirvGroupNonUniformOp : public SpirvInstruction {
 
   bool invokeVisitor(Visitor *v) override;
 
-  spv::Scope getExecutionScope() const { return execScope; }
+  bool hasExecutionScope() const { return execScope.hasValue(); }
+  spv::Scope getExecutionScope() const { return execScope.getValue(); }
 
   llvm::ArrayRef<SpirvInstruction *> getOperands() const { return operands; }
 
@@ -1546,7 +1600,7 @@ class SpirvGroupNonUniformOp : public SpirvInstruction {
   }
 
 private:
-  spv::Scope execScope;
+  llvm::Optional<spv::Scope> execScope;
   llvm::SmallVector<SpirvInstruction *, 4> operands;
   llvm::Optional<spv::GroupOperation> groupOp;
 };
diff --git a/tools/clang/include/clang/SPIRV/SpirvType.h b/tools/clang/include/clang/SPIRV/SpirvType.h
index 221f01e5ff..00a00ef238 100644
--- a/tools/clang/include/clang/SPIRV/SpirvType.h
+++ b/tools/clang/include/clang/SPIRV/SpirvType.h
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_SPIRVTYPE_H
 #define LLVM_CLANG_SPIRV_SPIRVTYPE_H
@@ -53,6 +56,7 @@ class SpirvType {
     TK_RuntimeArray,
     TK_Struct,
     TK_Pointer,
+    TK_ForwardPointer,
     TK_Function,
     TK_AccelerationStructureNV,
     TK_RayQueryKHR,
@@ -387,6 +391,26 @@ class SpirvPointerType : public SpirvType {
   spv::StorageClass storageClass;
 };
 
+/// Represents a SPIR-V forwarding pointer type.
+class ForwardPointerType : public SpirvType {
+public:
+  ForwardPointerType(QualType pointee)
+      : SpirvType(TK_ForwardPointer), pointeeType(pointee) {}
+
+  static bool classof(const SpirvType *t) {
+    return t->getKind() == TK_ForwardPointer;
+  }
+
+  const QualType getPointeeType() const { return pointeeType; }
+
+  bool operator==(const ForwardPointerType &that) const {
+    return pointeeType == that.pointeeType;
+  }
+
+private:
+  const QualType pointeeType;
+};
+
 /// Represents a SPIR-V function type. None of the parameters nor the return
 /// type is allowed to be a hybrid type.
 class FunctionType : public SpirvType {
diff --git a/tools/clang/include/clang/SPIRV/SpirvVisitor.h b/tools/clang/include/clang/SPIRV/SpirvVisitor.h
index 303a4600a1..93682518a1 100644
--- a/tools/clang/include/clang/SPIRV/SpirvVisitor.h
+++ b/tools/clang/include/clang/SPIRV/SpirvVisitor.h
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_SPIRVVISITOR_H
 #define LLVM_CLANG_SPIRV_SPIRVVISITOR_H
@@ -89,6 +93,8 @@ class Visitor {
   DEFINE_VISIT_METHOD(SpirvConstantFloat)
   DEFINE_VISIT_METHOD(SpirvConstantComposite)
   DEFINE_VISIT_METHOD(SpirvConstantNull)
+  DEFINE_VISIT_METHOD(SpirvConvertPtrToU)
+  DEFINE_VISIT_METHOD(SpirvConvertUToPtr)
   DEFINE_VISIT_METHOD(SpirvUndef)
   DEFINE_VISIT_METHOD(SpirvCompositeConstruct)
   DEFINE_VISIT_METHOD(SpirvCompositeExtract)
diff --git a/tools/clang/include/clang/Sema/Sema.h b/tools/clang/include/clang/Sema/Sema.h
index 42ab80b617..755c7e0755 100644
--- a/tools/clang/include/clang/Sema/Sema.h
+++ b/tools/clang/include/clang/Sema/Sema.h
@@ -3804,9 +3804,8 @@ class Sema {
   bool CheckHLSLUnaryExprOrTypeTraitOperand(QualType ExprType, SourceLocation Loc,
                                             UnaryExprOrTypeTrait ExprKind);
   void DiagnoseHLSLDeclAttr(const Decl *D, const Attr *A);
-  void DiagnoseGloballyCoherentMismatch(const Expr *SrcExpr,
-                                        QualType TargetType,
-                                        SourceLocation Loc);
+  void DiagnoseCoherenceMismatch(const Expr *SrcExpr, QualType TargetType,
+                                 SourceLocation Loc);
   void CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
                              const FunctionProtoType *Proto);
   void DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM,
diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h
index 40b030b430..59d99ab4c5 100644
--- a/tools/clang/include/clang/Sema/SemaHLSL.h
+++ b/tools/clang/include/clang/Sema/SemaHLSL.h
@@ -128,6 +128,8 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema,
                                           const clang::InitListExpr *InitList,
                                           const clang::QualType EltTy);
 
+bool ContainsLongVector(clang::QualType);
+
 bool IsConversionToLessOrEqualElements(clang::Sema *self,
                                        const clang::ExprResult &sourceExpr,
                                        const clang::QualType &targetType,
@@ -201,7 +203,8 @@ void Indent(unsigned int Indentation, llvm::raw_ostream &Out);
 void GetHLSLAttributedTypes(clang::Sema *self, clang::QualType type,
                             const clang::AttributedType **ppMatrixOrientation,
                             const clang::AttributedType **ppNorm,
-                            const clang::AttributedType **ppGLC);
+                            const clang::AttributedType **ppGLC,
+                            const clang::AttributedType **ppRDC);
 
 bool IsMatrixType(clang::Sema *self, clang::QualType type);
 bool IsVectorType(clang::Sema *self, clang::QualType type);
diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp
index 02125d5a84..0a688c03fa 100644
--- a/tools/clang/lib/AST/ASTContextHLSL.cpp
+++ b/tools/clang/lib/AST/ASTContextHLSL.cpp
@@ -6,6 +6,9 @@
 // This file is distributed under the University of Illinois Open Source     //
 // License. See LICENSE.TXT for details.                                     //
 //                                                                           //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.              //
+// All rights reserved.                                                      //
+//                                                                           //
 //  This file implements the ASTContext interface for HLSL.                  //
 //                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
@@ -23,6 +26,7 @@
 #include "clang/AST/ExternalASTSource.h"
 #include "clang/AST/HlslBuiltinTypeDeclBuilder.h"
 #include "clang/AST/TypeLoc.h"
+#include "clang/Basic/Specifiers.h"
 #include "clang/Sema/Overload.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaDiagnostic.h"
@@ -329,6 +333,9 @@ void hlsl::AddHLSLMatrixTemplate(ASTContext &context,
 
   typeDeclBuilder.addField("h", vectorArrayType);
 
+  typeDeclBuilder.getRecordDecl()->addAttr(
+      HLSLMatrixAttr::CreateImplicit(context));
+
   // Add an operator[]. The operator ranges from zero to rowcount-1, and returns
   // a vector of colcount elements.
   const unsigned int templateDepth = 0;
@@ -385,6 +392,9 @@ void hlsl::AddHLSLVectorTemplate(ASTContext &context,
   // Add an 'h' field to hold the handle.
   typeDeclBuilder.addField("h", vectorType);
 
+  typeDeclBuilder.getRecordDecl()->addAttr(
+      HLSLVectorAttr::CreateImplicit(context));
+
   // Add an operator[]. The operator ranges from zero to colcount-1, and returns
   // a scalar.
 
@@ -525,20 +535,33 @@ hlsl::DeclareRecordTypeWithHandleAndNoMemberFunctions(ASTContext &context,
 /// </summary>
 CXXRecordDecl *
 hlsl::DeclareRecordTypeWithHandle(ASTContext &context, StringRef name,
-                                  bool isCompleteType /*= true */) {
+                                  bool isCompleteType /*= true */,
+                                  InheritableAttr *Attr) {
   BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(), name,
                                          TagDecl::TagKind::TTK_Struct);
   typeDeclBuilder.startDefinition();
   typeDeclBuilder.addField("h", GetHLSLObjectHandleType(context));
+  if (Attr)
+    typeDeclBuilder.getRecordDecl()->addAttr(Attr);
+
   if (isCompleteType)
     return typeDeclBuilder.completeDefinition();
   return typeDeclBuilder.getRecordDecl();
 }
 
+AvailabilityAttr *ConstructAvailabilityAttribute(clang::ASTContext &context,
+                                                 VersionTuple Introduced) {
+  AvailabilityAttr *AAttr = AvailabilityAttr::CreateImplicit(
+      context, &context.Idents.get(""), clang::VersionTuple(6, 9),
+      clang::VersionTuple(), clang::VersionTuple(), false, "");
+  return AAttr;
+}
+
 // creates a global static constant unsigned integer with value.
 // equivalent to: static const uint name = val;
 static void AddConstUInt(clang::ASTContext &context, DeclContext *DC,
-                         StringRef name, unsigned val) {
+                         StringRef name, unsigned val,
+                         AvailabilityAttr *AAttr = nullptr) {
   IdentifierInfo &Id = context.Idents.get(name, tok::TokenKind::identifier);
   QualType type = context.getConstType(context.UnsignedIntTy);
   VarDecl *varDecl = VarDecl::Create(context, DC, NoLoc, NoLoc, &Id, type,
@@ -548,6 +571,9 @@ static void AddConstUInt(clang::ASTContext &context, DeclContext *DC,
       context, llvm::APInt(context.getIntWidth(type), val), type, NoLoc);
   varDecl->setInit(exprVal);
   varDecl->setImplicit(true);
+  if (AAttr)
+    varDecl->addAttr(AAttr);
+
   DC->addDecl(varDecl);
 }
 
@@ -560,6 +586,7 @@ static void AddConstUInt(clang::ASTContext &context, StringRef name,
 struct Enumerant {
   StringRef name;
   unsigned value;
+  AvailabilityAttr *avail = nullptr;
 };
 
 static void AddTypedefPseudoEnum(ASTContext &context, StringRef name,
@@ -575,33 +602,45 @@ static void AddTypedefPseudoEnum(ASTContext &context, StringRef name,
   enumDecl->setImplicit(true);
   // static const uint <enumerant.name> = <enumerant.value>;
   for (const Enumerant &enumerant : enumerants) {
-    AddConstUInt(context, curDC, enumerant.name, enumerant.value);
+    AddConstUInt(context, curDC, enumerant.name, enumerant.value,
+                 enumerant.avail);
   }
 }
 
 /// <summary> Adds all constants and enums for ray tracing </summary>
 void hlsl::AddRaytracingConstants(ASTContext &context) {
+
+  // Create aversion tuple for availability attributes
+  // for the RAYQUERY_FLAG enum
+  VersionTuple VT69 = VersionTuple(6, 9);
+
   AddTypedefPseudoEnum(
       context, "RAY_FLAG",
-      {
-          {"RAY_FLAG_NONE", (unsigned)DXIL::RayFlag::None},
-          {"RAY_FLAG_FORCE_OPAQUE", (unsigned)DXIL::RayFlag::ForceOpaque},
-          {"RAY_FLAG_FORCE_NON_OPAQUE",
-           (unsigned)DXIL::RayFlag::ForceNonOpaque},
-          {"RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH",
-           (unsigned)DXIL::RayFlag::AcceptFirstHitAndEndSearch},
-          {"RAY_FLAG_SKIP_CLOSEST_HIT_SHADER",
-           (unsigned)DXIL::RayFlag::SkipClosestHitShader},
-          {"RAY_FLAG_CULL_BACK_FACING_TRIANGLES",
-           (unsigned)DXIL::RayFlag::CullBackFacingTriangles},
-          {"RAY_FLAG_CULL_FRONT_FACING_TRIANGLES",
-           (unsigned)DXIL::RayFlag::CullFrontFacingTriangles},
-          {"RAY_FLAG_CULL_OPAQUE", (unsigned)DXIL::RayFlag::CullOpaque},
-          {"RAY_FLAG_CULL_NON_OPAQUE", (unsigned)DXIL::RayFlag::CullNonOpaque},
-          {"RAY_FLAG_SKIP_TRIANGLES", (unsigned)DXIL::RayFlag::SkipTriangles},
-          {"RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES",
-           (unsigned)DXIL::RayFlag::SkipProceduralPrimitives},
-      });
+      {{"RAY_FLAG_NONE", (unsigned)DXIL::RayFlag::None},
+       {"RAY_FLAG_FORCE_OPAQUE", (unsigned)DXIL::RayFlag::ForceOpaque},
+       {"RAY_FLAG_FORCE_NON_OPAQUE", (unsigned)DXIL::RayFlag::ForceNonOpaque},
+       {"RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH",
+        (unsigned)DXIL::RayFlag::AcceptFirstHitAndEndSearch},
+       {"RAY_FLAG_SKIP_CLOSEST_HIT_SHADER",
+        (unsigned)DXIL::RayFlag::SkipClosestHitShader},
+       {"RAY_FLAG_CULL_BACK_FACING_TRIANGLES",
+        (unsigned)DXIL::RayFlag::CullBackFacingTriangles},
+       {"RAY_FLAG_CULL_FRONT_FACING_TRIANGLES",
+        (unsigned)DXIL::RayFlag::CullFrontFacingTriangles},
+       {"RAY_FLAG_CULL_OPAQUE", (unsigned)DXIL::RayFlag::CullOpaque},
+       {"RAY_FLAG_CULL_NON_OPAQUE", (unsigned)DXIL::RayFlag::CullNonOpaque},
+       {"RAY_FLAG_SKIP_TRIANGLES", (unsigned)DXIL::RayFlag::SkipTriangles},
+       {"RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES",
+        (unsigned)DXIL::RayFlag::SkipProceduralPrimitives},
+       {"RAY_FLAG_FORCE_OMM_2_STATE", (unsigned)DXIL::RayFlag::ForceOMM2State,
+        ConstructAvailabilityAttribute(context, VT69)}});
+
+  AddTypedefPseudoEnum(
+      context, "RAYQUERY_FLAG",
+      {{"RAYQUERY_FLAG_NONE", (unsigned)DXIL::RayQueryFlag::None},
+       {"RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS",
+        (unsigned)DXIL::RayQueryFlag::AllowOpacityMicromaps,
+        ConstructAvailabilityAttribute(context, VT69)}});
 
   AddTypedefPseudoEnum(
       context, "COMMITTED_STATUS",
@@ -663,6 +702,10 @@ void hlsl::AddRaytracingConstants(ASTContext &context) {
   AddConstUInt(
       context, StringRef("RAYTRACING_PIPELINE_FLAG_SKIP_PROCEDURAL_PRIMITIVES"),
       (unsigned)DXIL::RaytracingPipelineFlags::SkipProceduralPrimitives);
+  AddConstUInt(context, context.getTranslationUnitDecl(),
+               StringRef("RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS"),
+               (unsigned)DXIL::RaytracingPipelineFlags::AllowOpacityMicromaps,
+               ConstructAvailabilityAttribute(context, VT69));
 }
 
 /// <summary> Adds all constants and enums for sampler feedback </summary>
@@ -675,6 +718,8 @@ void hlsl::AddSamplerFeedbackConstants(ASTContext &context) {
 
 /// <summary> Adds all enums for Barrier intrinsic</summary>
 void hlsl::AddBarrierConstants(ASTContext &context) {
+  VersionTuple VT69 = VersionTuple(6, 9);
+
   AddTypedefPseudoEnum(
       context, "MEMORY_TYPE_FLAG",
       {{"UAV_MEMORY", (unsigned)DXIL::MemoryTypeFlag::UavMemory},
@@ -687,7 +732,9 @@ void hlsl::AddBarrierConstants(ASTContext &context) {
       context, "BARRIER_SEMANTIC_FLAG",
       {{"GROUP_SYNC", (unsigned)DXIL::BarrierSemanticFlag::GroupSync},
        {"GROUP_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::GroupScope},
-       {"DEVICE_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::DeviceScope}});
+       {"DEVICE_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::DeviceScope},
+       {"REORDER_SCOPE", (unsigned)DXIL::BarrierSemanticFlag::ReorderScope,
+        ConstructAvailabilityAttribute(context, VT69)}});
 }
 
 static Expr *IntConstantAsBoolExpr(clang::Sema &sema, uint64_t value) {
@@ -915,6 +962,7 @@ CXXRecordDecl *hlsl::DeclareTemplateTypeWithHandleInDeclContext(
     ASTContext &context, DeclContext *declContext, StringRef name,
     uint8_t templateArgCount, TypeSourceInfo *defaultTypeArgValue,
     InheritableAttr *Attr) {
+
   DXASSERT(templateArgCount != 0,
            "otherwise caller should be creating a class or struct");
   DXASSERT(templateArgCount <= 2, "otherwise the function needs to be updated "
@@ -938,11 +986,9 @@ CXXRecordDecl *hlsl::DeclareTemplateTypeWithHandleInDeclContext(
   QualType elementType = context.getTemplateTypeParmType(
       /*templateDepth*/ 0, 0, ParameterPackFalse, elementTemplateParamDecl);
 
-  if (templateArgCount > 1 &&
-      // Only need array type for inputpatch and outputpatch.
-      // Avoid Texture2DMS which may use 0 count.
-      // TODO: use hlsl types to do the check.
-      !name.startswith("Texture") && !name.startswith("RWTexture")) {
+  // Only need array type for inputpatch and outputpatch.
+  if (Attr && isa<HLSLTessPatchAttr>(Attr)) {
+    DXASSERT(templateArgCount == 2, "Tess patches need 2 template params");
     Expr *countExpr = DeclRefExpr::Create(
         context, NestedNameSpecifierLoc(), NoLoc, countTemplateParamDecl, false,
         DeclarationNameInfo(countTemplateParamDecl->getDeclName(), NoLoc),
@@ -1033,10 +1079,51 @@ static void CreateConstructorDeclaration(
   (*constructorDecl)->setAccess(AccessSpecifier::AS_public);
 }
 
+CXXConstructorDecl *hlsl::CreateConstructorDeclarationWithParams(
+    ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType,
+    ArrayRef<QualType> paramTypes, ArrayRef<StringRef> paramNames,
+    DeclarationName declarationName, bool isConst, bool isTemplateFunction) {
+  DXASSERT_NOMSG(recordDecl != nullptr);
+  DXASSERT_NOMSG(!resultType.isNull());
+  DXASSERT_NOMSG(paramTypes.size() == paramNames.size());
+
+  TypeSourceInfo *tinfo;
+  CXXConstructorDecl *constructorDecl;
+  CreateConstructorDeclaration(context, recordDecl, resultType, paramTypes,
+                               declarationName, isConst, &constructorDecl,
+                               &tinfo);
+
+  // Create and associate parameters to constructor.
+  SmallVector<ParmVarDecl *, 2> parmVarDecls;
+  if (!paramTypes.empty()) {
+    for (unsigned int i = 0; i < paramTypes.size(); ++i) {
+      IdentifierInfo *argIi = &context.Idents.get(paramNames[i]);
+      ParmVarDecl *parmVarDecl = ParmVarDecl::Create(
+          context, constructorDecl, NoLoc, NoLoc, argIi, paramTypes[i],
+          context.getTrivialTypeSourceInfo(paramTypes[i], NoLoc),
+          StorageClass::SC_None, nullptr);
+      parmVarDecl->setScopeInfo(0, i);
+      DXASSERT(parmVarDecl->getFunctionScopeIndex() == i,
+               "otherwise failed to set correct index");
+      parmVarDecls.push_back(parmVarDecl);
+    }
+    constructorDecl->setParams(ArrayRef<ParmVarDecl *>(parmVarDecls));
+    AssociateParametersToFunctionPrototype(tinfo, &parmVarDecls.front(),
+                                           parmVarDecls.size());
+  }
+
+  // If this is going to be part of a template function decl, don't add it to
+  // the record because the template function decl will be added instead.
+  if (!isTemplateFunction)
+    recordDecl->addDecl(constructorDecl);
+
+  return constructorDecl;
+}
+
 static void CreateObjectFunctionDeclaration(
     ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType,
     ArrayRef<QualType> args, DeclarationName declarationName, bool isConst,
-    CXXMethodDecl **functionDecl, TypeSourceInfo **tinfo) {
+    StorageClass SC, CXXMethodDecl **functionDecl, TypeSourceInfo **tinfo) {
   DXASSERT_NOMSG(recordDecl != nullptr);
   DXASSERT_NOMSG(functionDecl != nullptr);
 
@@ -1048,8 +1135,8 @@ static void CreateObjectFunctionDeclaration(
   *tinfo = context.getTrivialTypeSourceInfo(functionQT, NoLoc);
   DXASSERT_NOMSG(*tinfo != nullptr);
   *functionDecl = CXXMethodDecl::Create(
-      context, recordDecl, NoLoc, declNameInfo, functionQT, *tinfo,
-      StorageClass::SC_None, InlineSpecifiedFalse, IsConstexprFalse, NoLoc);
+      context, recordDecl, NoLoc, declNameInfo, functionQT, *tinfo, SC,
+      InlineSpecifiedFalse, IsConstexprFalse, NoLoc);
   DXASSERT_NOMSG(*functionDecl != nullptr);
   (*functionDecl)->setLexicalDeclContext(recordDecl);
   (*functionDecl)->setAccess(AccessSpecifier::AS_public);
@@ -1058,7 +1145,8 @@ static void CreateObjectFunctionDeclaration(
 CXXMethodDecl *hlsl::CreateObjectFunctionDeclarationWithParams(
     ASTContext &context, CXXRecordDecl *recordDecl, QualType resultType,
     ArrayRef<QualType> paramTypes, ArrayRef<StringRef> paramNames,
-    DeclarationName declarationName, bool isConst, bool isTemplateFunction) {
+    DeclarationName declarationName, bool isConst, StorageClass SC,
+    bool isTemplateFunction) {
   DXASSERT_NOMSG(recordDecl != nullptr);
   DXASSERT_NOMSG(!resultType.isNull());
   DXASSERT_NOMSG(paramTypes.size() == paramNames.size());
@@ -1066,7 +1154,7 @@ CXXMethodDecl *hlsl::CreateObjectFunctionDeclarationWithParams(
   TypeSourceInfo *tinfo;
   CXXMethodDecl *functionDecl;
   CreateObjectFunctionDeclaration(context, recordDecl, resultType, paramTypes,
-                                  declarationName, isConst, &functionDecl,
+                                  declarationName, isConst, SC, &functionDecl,
                                   &tinfo);
 
   // Create and associate parameters to method.
@@ -1098,41 +1186,50 @@ CXXMethodDecl *hlsl::CreateObjectFunctionDeclarationWithParams(
 
 CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandle(
     ASTContext &context, StringRef typeName, StringRef templateParamName,
-    TagTypeKind tagKind) {
+    InheritableAttr *Attr) {
   return DeclareUIntTemplatedTypeWithHandleInDeclContext(
       context, context.getTranslationUnitDecl(), typeName, templateParamName,
-      tagKind);
+      Attr);
 }
 
 CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandleInDeclContext(
     ASTContext &context, DeclContext *declContext, StringRef typeName,
-    StringRef templateParamName, TagTypeKind tagKind) {
+    StringRef templateParamName, InheritableAttr *Attr) {
   // template<uint kind> FeedbackTexture2D[Array] { ... }
-  BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName, tagKind);
+  BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName,
+                                         TagTypeKind::TTK_Class);
   typeDeclBuilder.addIntegerTemplateParam(templateParamName,
                                           context.UnsignedIntTy);
   typeDeclBuilder.startDefinition();
   typeDeclBuilder.addField(
       "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle.
+  if (Attr)
+    typeDeclBuilder.getRecordDecl()->addAttr(Attr);
+
   return typeDeclBuilder.getRecordDecl();
 }
 
 clang::CXXRecordDecl *
-hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, bool bTBuf) {
+hlsl::DeclareConstantBufferViewType(clang::ASTContext &context,
+                                    InheritableAttr *Attr) {
   // Create ConstantBufferView template declaration in translation unit scope
   // like other resource.
   // template<typename T> ConstantBuffer { int h; }
   DeclContext *DC = context.getTranslationUnitDecl();
+  DXASSERT(Attr, "Constbuffer types require an attribute");
 
-  BuiltinTypeDeclBuilder typeDeclBuilder(
-      DC, bTBuf ? "TextureBuffer" : "ConstantBuffer",
-      TagDecl::TagKind::TTK_Struct);
+  const char *TypeName = "ConstantBuffer";
+  if (IsTBuffer(cast<HLSLResourceAttr>(Attr)->getResKind()))
+    TypeName = "TextureBuffer";
+  BuiltinTypeDeclBuilder typeDeclBuilder(DC, TypeName,
+                                         TagDecl::TagKind::TTK_Struct);
   (void)typeDeclBuilder.addTypeTemplateParam("T");
   typeDeclBuilder.startDefinition();
   CXXRecordDecl *templateRecordDecl = typeDeclBuilder.getRecordDecl();
 
   typeDeclBuilder.addField(
       "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle.
+  typeDeclBuilder.getRecordDecl()->addAttr(Attr);
 
   typeDeclBuilder.getRecordDecl();
 
@@ -1143,7 +1240,14 @@ CXXRecordDecl *hlsl::DeclareRayQueryType(ASTContext &context) {
   // template<uint kind> RayQuery { ... }
   BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(),
                                          "RayQuery");
-  typeDeclBuilder.addIntegerTemplateParam("flags", context.UnsignedIntTy);
+  typeDeclBuilder.addIntegerTemplateParam("constRayFlags",
+                                          context.UnsignedIntTy);
+  // create an optional second template argument with default value
+  // that contains the value of DXIL::RayFlag::None
+  llvm::Optional<int64_t> DefaultRayQueryFlag =
+      static_cast<int64_t>(DXIL::RayFlag::None);
+  typeDeclBuilder.addIntegerTemplateParam(
+      "RayQueryFlags", context.UnsignedIntTy, DefaultRayQueryFlag);
   typeDeclBuilder.startDefinition();
   typeDeclBuilder.addField(
       "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle.
@@ -1160,10 +1264,51 @@ CXXRecordDecl *hlsl::DeclareRayQueryType(ASTContext &context) {
       context.DeclarationNames.getCXXConstructorName(canQualType), false,
       &pConstructorDecl, &pTypeSourceInfo);
   typeDeclBuilder.getRecordDecl()->addDecl(pConstructorDecl);
-
+  typeDeclBuilder.getRecordDecl()->addAttr(
+      HLSLRayQueryObjectAttr::CreateImplicit(context));
   return typeDeclBuilder.getRecordDecl();
 }
 
+CXXRecordDecl *hlsl::DeclareHitObjectType(NamespaceDecl &NSDecl) {
+  ASTContext &Context = NSDecl.getASTContext();
+  // HitObject { ... }
+  BuiltinTypeDeclBuilder TypeDeclBuilder(&NSDecl, "HitObject");
+  TypeDeclBuilder.startDefinition();
+
+  // Add handle to mark as HLSL object.
+  TypeDeclBuilder.addField("h", GetHLSLObjectHandleType(Context));
+  CXXRecordDecl *RecordDecl = TypeDeclBuilder.getRecordDecl();
+
+  CanQualType canQualType = Context.getCanonicalType(
+      Context.getRecordType(TypeDeclBuilder.getRecordDecl()));
+
+  // Add constructor that will be lowered to MOP_HitObject_MakeNop.
+  CXXConstructorDecl *pConstructorDecl = nullptr;
+  TypeSourceInfo *pTypeSourceInfo = nullptr;
+  CreateConstructorDeclaration(
+      Context, RecordDecl, Context.VoidTy, {},
+      Context.DeclarationNames.getCXXConstructorName(canQualType), false,
+      &pConstructorDecl, &pTypeSourceInfo);
+  RecordDecl->addDecl(pConstructorDecl);
+  pConstructorDecl->addAttr(HLSLIntrinsicAttr::CreateImplicit(
+      Context, "op", "",
+      static_cast<int>(hlsl::IntrinsicOp::MOP_DxHitObject_MakeNop)));
+  pConstructorDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(Context));
+
+  // Add AvailabilityAttribute for SM6.9+
+  VersionTuple VT69 = VersionTuple(6, 9);
+  RecordDecl->addAttr(ConstructAvailabilityAttribute(Context, VT69));
+
+  // Add the implicit HLSLHitObjectAttr attribute to unambiguously recognize the
+  // builtin HitObject type.
+  RecordDecl->addAttr(HLSLHitObjectAttr::CreateImplicit(Context));
+  RecordDecl->setImplicit(true);
+
+  // Add to namespace
+  RecordDecl->setDeclContext(&NSDecl);
+  return RecordDecl;
+}
+
 CXXRecordDecl *hlsl::DeclareResourceType(ASTContext &context, bool bSampler) {
   // struct ResourceDescriptor { uint8 desc; }
   StringRef Name = bSampler ? ".Sampler" : ".Resource";
@@ -1227,6 +1372,49 @@ CXXRecordDecl *hlsl::DeclareNodeOrRecordType(
 }
 
 #ifdef ENABLE_SPIRV_CODEGEN
+CXXRecordDecl *hlsl::DeclareVkBufferPointerType(ASTContext &context,
+                                                DeclContext *declContext) {
+  BuiltinTypeDeclBuilder Builder(declContext, "BufferPointer",
+                                 TagDecl::TagKind::TTK_Struct);
+  TemplateTypeParmDecl *TyParamDecl =
+      Builder.addTypeTemplateParam("recordtype");
+  Builder.addIntegerTemplateParam("alignment", context.UnsignedIntTy, 0);
+
+  Builder.startDefinition();
+
+  QualType paramType = QualType(TyParamDecl->getTypeForDecl(), 0);
+  CXXRecordDecl *recordDecl = Builder.getRecordDecl();
+
+  CXXMethodDecl *methodDecl = CreateObjectFunctionDeclarationWithParams(
+      context, recordDecl, context.getLValueReferenceType(paramType), {}, {},
+      DeclarationName(&context.Idents.get("Get")), true);
+  CanQualType canQualType =
+      recordDecl->getTypeForDecl()->getCanonicalTypeUnqualified();
+  auto *copyConstructorDecl = CreateConstructorDeclarationWithParams(
+      context, recordDecl, context.VoidTy,
+      {context.getRValueReferenceType(canQualType)}, {"bufferPointer"},
+      context.DeclarationNames.getCXXConstructorName(canQualType), false, true);
+  auto *addressConstructorDecl = CreateConstructorDeclarationWithParams(
+      context, recordDecl, context.VoidTy, {context.UnsignedIntTy}, {"address"},
+      context.DeclarationNames.getCXXConstructorName(canQualType), false, true);
+  hlsl::CreateFunctionTemplateDecl(
+      context, recordDecl, copyConstructorDecl,
+      Builder.getTemplateDecl()->getTemplateParameters()->begin(), 2);
+  hlsl::CreateFunctionTemplateDecl(
+      context, recordDecl, addressConstructorDecl,
+      Builder.getTemplateDecl()->getTemplateParameters()->begin(), 2);
+
+  StringRef OpcodeGroup = GetHLOpcodeGroupName(HLOpcodeGroup::HLIntrinsic);
+  unsigned Opcode = static_cast<unsigned>(IntrinsicOp::MOP_GetBufferContents);
+  methodDecl->addAttr(
+      HLSLIntrinsicAttr::CreateImplicit(context, OpcodeGroup, "", Opcode));
+  methodDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context));
+  copyConstructorDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context));
+  addressConstructorDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(context));
+
+  return Builder.completeDefinition();
+}
+
 CXXRecordDecl *hlsl::DeclareInlineSpirvType(clang::ASTContext &context,
                                             clang::DeclContext *declContext,
                                             llvm::StringRef typeName,
diff --git a/tools/clang/lib/AST/DeclCXX.cpp b/tools/clang/lib/AST/DeclCXX.cpp
index 9ef771b932..baed44667f 100644
--- a/tools/clang/lib/AST/DeclCXX.cpp
+++ b/tools/clang/lib/AST/DeclCXX.cpp
@@ -48,34 +48,33 @@ void LazyASTUnresolvedSet::getFromExternalSource(ASTContext &C) const {
 }
 
 CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D)
-  : UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0),
-    Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false),
-    Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true),
-    HasPrivateFields(false), HasProtectedFields(false), HasPublicFields(false),
-    HasMutableFields(false), HasVariantMembers(false), HasOnlyCMembers(true),
-    HasInClassInitializer(false), HasUninitializedReferenceMember(false),
-    NeedOverloadResolutionForMoveConstructor(false),
-    NeedOverloadResolutionForMoveAssignment(false),
-    NeedOverloadResolutionForDestructor(false),
-    DefaultedMoveConstructorIsDeleted(false),
-    DefaultedMoveAssignmentIsDeleted(false),
-    DefaultedDestructorIsDeleted(false),
-    HasTrivialSpecialMembers(SMF_All),
-    DeclaredNonTrivialSpecialMembers(0),
-    HasIrrelevantDestructor(true),
-    HasConstexprNonCopyMoveConstructor(false),
-    DefaultedDefaultConstructorIsConstexpr(true),
-    HasConstexprDefaultConstructor(false),
-    HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false),
-    UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0),
-    ImplicitCopyConstructorHasConstParam(true),
-    ImplicitCopyAssignmentHasConstParam(true),
-    HasDeclaredCopyConstructorWithConstParam(false),
-    HasDeclaredCopyAssignmentWithConstParam(false),
-    IsLambda(false), IsParsingBaseSpecifiers(false), NumBases(0), NumVBases(0),
-    Bases(), VBases(),
-    Definition(D), FirstFriend() {
-}
+    // HLSL Change Begin - Add HasLongVector and clang-format
+    : UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0),
+      Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false),
+      Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true),
+      HasPrivateFields(false), HasProtectedFields(false),
+      HasPublicFields(false), HasMutableFields(false), HasVariantMembers(false),
+      HasOnlyCMembers(true), HasInClassInitializer(false),
+      HasUninitializedReferenceMember(false),
+      NeedOverloadResolutionForMoveConstructor(false),
+      NeedOverloadResolutionForMoveAssignment(false),
+      NeedOverloadResolutionForDestructor(false),
+      DefaultedMoveConstructorIsDeleted(false),
+      DefaultedMoveAssignmentIsDeleted(false),
+      DefaultedDestructorIsDeleted(false), HasTrivialSpecialMembers(SMF_All),
+      DeclaredNonTrivialSpecialMembers(0), HasIrrelevantDestructor(true),
+      HasConstexprNonCopyMoveConstructor(false),
+      DefaultedDefaultConstructorIsConstexpr(true),
+      HasConstexprDefaultConstructor(false),
+      HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false),
+      UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0),
+      ImplicitCopyConstructorHasConstParam(true),
+      ImplicitCopyAssignmentHasConstParam(true),
+      HasDeclaredCopyConstructorWithConstParam(false),
+      HasDeclaredCopyAssignmentWithConstParam(false), IsLambda(false),
+      IsParsingBaseSpecifiers(false), HasHLSLLongVector(false), NumBases(0),
+      NumVBases(0), Bases(), VBases(), Definition(D), FirstFriend() {}
+// HLSL Change End - Add HasLongVector and clang-format
 
 CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const {
   return Bases.get(Definition->getASTContext().getExternalSource());
@@ -204,6 +203,11 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases,
     if (!BaseClassDecl->isStandardLayout())
       data().IsStandardLayout = false;
 
+    // HLSL Change Begin - Propagate presence of long vector to child classes.
+    if (BaseClassDecl->hasHLSLLongVector())
+      data().HasHLSLLongVector = true;
+    // HLSL Change End
+
     // Record if this base is the first non-literal field or base.
     if (!hasNonLiteralTypeFieldsOrBases() && !BaseType->isLiteralType(C))
       data().HasNonLiteralTypeFieldsOrBases = true;
@@ -385,6 +389,11 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) {
     data().NeedOverloadResolutionForMoveConstructor = true;
     data().NeedOverloadResolutionForDestructor = true;
   }
+
+  // HLSL Change Begin - Propagate presence of long vector to child classes.
+  if (Subobj->hasHLSLLongVector())
+    data().HasHLSLLongVector = true;
+  // HLSL Change End
 }
 
 /// Callback function for CXXRecordDecl::forallBases that acknowledges
diff --git a/tools/clang/lib/AST/Expr.cpp b/tools/clang/lib/AST/Expr.cpp
index 0e2ec8c6c2..c6dc21217e 100644
--- a/tools/clang/lib/AST/Expr.cpp
+++ b/tools/clang/lib/AST/Expr.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 // This file implements the Expr class and subclasses.
@@ -1716,7 +1719,11 @@ const char *CastExpr::getCastKindName() const {
     return "HLSLCC_FloatingToBoolean";
   case CK_HLSLCC_FloatingCast:
     return "HLSLCC_FloatingCast";
-  // HLSL Change Ends
+  case CK_VK_BufferPointerToIntegral:
+    return "VK_BufferPointerToIntegral";
+  case CK_VK_IntegralToBufferPointer:
+    return "VK_IntegralToBufferPointer";
+    // HLSL Change Ends
   }
 
   llvm_unreachable("Unhandled cast kind!");
diff --git a/tools/clang/lib/AST/ExprConstant.cpp b/tools/clang/lib/AST/ExprConstant.cpp
index 5e8d4700bd..69e0760bce 100644
--- a/tools/clang/lib/AST/ExprConstant.cpp
+++ b/tools/clang/lib/AST/ExprConstant.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 // This file implements the Expr constant evaluator.
@@ -7829,6 +7832,12 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) {
       return false;
     return Success(Value, E);
   }
+
+  // HLSL Change Starts
+  case CK_VK_BufferPointerToIntegral: {
+    return false;
+    // HLSL Change Ends
+  }
   }
 
   llvm_unreachable("unknown cast resulting in integral value");
diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp
index d83b307463..5b19e064a3 100644
--- a/tools/clang/lib/AST/HlslTypes.cpp
+++ b/tools/clang/lib/AST/HlslTypes.cpp
@@ -5,6 +5,9 @@
 // Copyright (C) Microsoft Corporation. All rights reserved.                 //
 // This file is distributed under the University of Illinois Open Source     //
 // License. See LICENSE.TXT for details.                                     //
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
 //                                                                           //
 ///
 /// \file                                                                    //
@@ -53,44 +56,33 @@ ConvertHLSLVecMatTypeToExtVectorType(const clang::ASTContext &context,
   return nullptr;
 }
 
-bool IsHLSLVecMatType(clang::QualType type) {
-  const Type *Ty = type.getCanonicalType().getTypePtr();
-  if (const RecordType *RT = dyn_cast<RecordType>(Ty)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl())) {
-      if (templateDecl->getName() == "vector") {
-        return true;
-      } else if (templateDecl->getName() == "matrix") {
-        return true;
-      }
-    }
+template <typename AttrType> static AttrType *getAttr(clang::QualType type) {
+  type = type.getCanonicalType();
+  if (const RecordType *RT = type->getAs<RecordType>()) {
+    if (const auto *Spec =
+            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl()))
+      if (const auto *Template =
+              dyn_cast<ClassTemplateDecl>(Spec->getSpecializedTemplate()))
+        return Template->getTemplatedDecl()->getAttr<AttrType>();
+    if (const auto *Decl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
+      return Decl->getAttr<AttrType>();
   }
-  return false;
+  return nullptr;
+}
+
+bool IsHLSLVecMatType(clang::QualType type) {
+  return getAttr<HLSLMatrixAttr>(type) || getAttr<HLSLVectorAttr>(type);
 }
 
 bool IsHLSLMatType(clang::QualType type) {
-  const clang::Type *Ty = type.getCanonicalType().getTypePtr();
-  if (const RecordType *RT = dyn_cast<RecordType>(Ty)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl())) {
-      if (templateDecl->getName() == "matrix") {
-        return true;
-      }
-    }
-  }
+  if (getAttr<HLSLMatrixAttr>(type))
+    return true;
   return false;
 }
 
 bool IsHLSLVecType(clang::QualType type) {
-  const clang::Type *Ty = type.getCanonicalType().getTypePtr();
-  if (const RecordType *RT = dyn_cast<RecordType>(Ty)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl())) {
-      if (templateDecl->getName() == "vector") {
-        return true;
-      }
-    }
-  }
+  if (getAttr<HLSLVectorAttr>(type))
+    return true;
   return false;
 }
 
@@ -286,6 +278,18 @@ bool HasHLSLGloballyCoherent(clang::QualType type) {
   return false;
 }
 
+bool HasHLSLReorderCoherent(clang::QualType type) {
+  const AttributedType *AT = type->getAs<AttributedType>();
+  while (AT) {
+    AttributedType::Kind kind = AT->getAttrKind();
+    if (kind == AttributedType::attr_hlsl_reordercoherent)
+      return true;
+    AT = AT->getLocallyUnqualifiedSingleStepDesugaredType()
+             ->getAs<AttributedType>();
+  }
+  return false;
+}
+
 /// Checks whether the pAttributes indicate a parameter is inout or out; if
 /// inout, pIsIn will be set to true.
 bool IsParamAttributedAsOut(clang::AttributeList *pAttributes, bool *pIsIn);
@@ -474,160 +478,56 @@ clang::QualType GetHLSLMatElementType(clang::QualType type) {
   QualType elemTy = arg0.getAsType();
   return elemTy;
 }
+
 // TODO: Add type cache to ASTContext.
 bool IsHLSLInputPatchType(QualType type) {
-  type = type.getCanonicalType();
-  if (const RecordType *RT = dyn_cast<RecordType>(type)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(
-                RT->getAsCXXRecordDecl())) {
-      if (templateDecl->getName() == "InputPatch") {
-        return true;
-      }
-    }
-  }
+  if (const HLSLTessPatchAttr *Attr = getAttr<HLSLTessPatchAttr>(type))
+    return Attr->getIsInput();
   return false;
 }
+
 bool IsHLSLOutputPatchType(QualType type) {
-  type = type.getCanonicalType();
-  if (const RecordType *RT = dyn_cast<RecordType>(type)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(
-                RT->getAsCXXRecordDecl())) {
-      if (templateDecl->getName() == "OutputPatch") {
-        return true;
-      }
-    }
-  }
+  if (const HLSLTessPatchAttr *Attr = getAttr<HLSLTessPatchAttr>(type))
+    return !Attr->getIsInput();
   return false;
 }
+
 bool IsHLSLPointStreamType(QualType type) {
-  type = type.getCanonicalType();
-  if (const RecordType *RT = dyn_cast<RecordType>(type)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(
-                RT->getAsCXXRecordDecl())) {
-      if (templateDecl->getName() == "PointStream")
-        return true;
-    }
-  }
+  if (const HLSLStreamOutputAttr *Attr = getAttr<HLSLStreamOutputAttr>(type))
+    return Attr->getPrimVertices() == (unsigned)DXIL::InputPrimitive::Point;
   return false;
 }
+
 bool IsHLSLLineStreamType(QualType type) {
-  type = type.getCanonicalType();
-  if (const RecordType *RT = dyn_cast<RecordType>(type)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(
-                RT->getAsCXXRecordDecl())) {
-      if (templateDecl->getName() == "LineStream")
-        return true;
-    }
-  }
+  if (const HLSLStreamOutputAttr *Attr = getAttr<HLSLStreamOutputAttr>(type))
+    return Attr->getPrimVertices() == (unsigned)DXIL::InputPrimitive::Line;
   return false;
 }
+
 bool IsHLSLTriangleStreamType(QualType type) {
-  type = type.getCanonicalType();
-  if (const RecordType *RT = dyn_cast<RecordType>(type)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(
-                RT->getAsCXXRecordDecl())) {
-      if (templateDecl->getName() == "TriangleStream")
-        return true;
-    }
-  }
+  if (const HLSLStreamOutputAttr *Attr = getAttr<HLSLStreamOutputAttr>(type))
+    return Attr->getPrimVertices() == (unsigned)DXIL::InputPrimitive::Triangle;
   return false;
 }
+
 bool IsHLSLStreamOutputType(QualType type) {
-  type = type.getCanonicalType();
-  if (const RecordType *RT = dyn_cast<RecordType>(type)) {
-    if (const ClassTemplateSpecializationDecl *templateDecl =
-            dyn_cast<ClassTemplateSpecializationDecl>(
-                RT->getAsCXXRecordDecl())) {
-      if (templateDecl->getName() == "PointStream")
-        return true;
-      if (templateDecl->getName() == "LineStream")
-        return true;
-      if (templateDecl->getName() == "TriangleStream")
-        return true;
-    }
-  }
+  if (getAttr<HLSLStreamOutputAttr>(type))
+    return true;
   return false;
 }
-bool IsHLSLResourceType(clang::QualType type) {
-  if (const RecordType *RT = type->getAs<RecordType>()) {
-    StringRef name = RT->getDecl()->getName();
-    if (name == "Texture1D" || name == "RWTexture1D")
-      return true;
-    if (name == "Texture2D" || name == "RWTexture2D")
-      return true;
-    if (name == "Texture2DMS" || name == "RWTexture2DMS")
-      return true;
-    if (name == "Texture3D" || name == "RWTexture3D")
-      return true;
-    if (name == "TextureCube" || name == "RWTextureCube")
-      return true;
 
-    if (name == "Texture1DArray" || name == "RWTexture1DArray")
-      return true;
-    if (name == "Texture2DArray" || name == "RWTexture2DArray")
-      return true;
-    if (name == "Texture2DMSArray" || name == "RWTexture2DMSArray")
-      return true;
-    if (name == "TextureCubeArray" || name == "RWTextureCubeArray")
-      return true;
-
-    if (name == "FeedbackTexture2D" || name == "FeedbackTexture2DArray")
-      return true;
-
-    if (name == "RasterizerOrderedTexture1D" ||
-        name == "RasterizerOrderedTexture2D" ||
-        name == "RasterizerOrderedTexture3D" ||
-        name == "RasterizerOrderedTexture1DArray" ||
-        name == "RasterizerOrderedTexture2DArray" ||
-        name == "RasterizerOrderedBuffer" ||
-        name == "RasterizerOrderedByteAddressBuffer" ||
-        name == "RasterizerOrderedStructuredBuffer")
-      return true;
-
-    if (name == "ByteAddressBuffer" || name == "RWByteAddressBuffer")
-      return true;
-
-    if (name == "StructuredBuffer" || name == "RWStructuredBuffer")
-      return true;
-
-    if (name == "AppendStructuredBuffer" || name == "ConsumeStructuredBuffer")
-      return true;
-
-    if (name == "Buffer" || name == "RWBuffer")
-      return true;
-
-    if (name == "SamplerState" || name == "SamplerComparisonState")
-      return true;
-
-    if (name == "ConstantBuffer" || name == "TextureBuffer")
-      return true;
-
-    if (name == "RaytracingAccelerationStructure")
-      return true;
-  }
+bool IsHLSLResourceType(clang::QualType type) {
+  if (getAttr<HLSLResourceAttr>(type))
+    return true;
   return false;
 }
 
-static HLSLNodeObjectAttr *getNodeAttr(clang::QualType type) {
-  if (const RecordType *RT = type->getAs<RecordType>()) {
-    if (const auto *Spec =
-            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl()))
-      if (const auto *Template =
-              dyn_cast<ClassTemplateDecl>(Spec->getSpecializedTemplate()))
-        return Template->getTemplatedDecl()->getAttr<HLSLNodeObjectAttr>();
-    if (const auto *Decl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
-      return Decl->getAttr<HLSLNodeObjectAttr>();
-  }
-  return nullptr;
+bool IsHLSLHitObjectType(QualType type) {
+  return nullptr != getAttr<HLSLHitObjectAttr>(type);
 }
 
 DXIL::NodeIOKind GetNodeIOType(clang::QualType type) {
-  if (const HLSLNodeObjectAttr *Attr = getNodeAttr(type))
+  if (const HLSLNodeObjectAttr *Attr = getAttr<HLSLNodeObjectAttr>(type))
     return Attr->getNodeIOType();
   return DXIL::NodeIOKind::Invalid;
 }
@@ -654,27 +554,20 @@ bool IsHLSLDynamicSamplerType(clang::QualType type) {
 }
 
 bool IsHLSLNodeType(clang::QualType type) {
-  if (const HLSLNodeObjectAttr *Attr = getNodeAttr(type))
+  if (const HLSLNodeObjectAttr *Attr = getAttr<HLSLNodeObjectAttr>(type))
     return true;
   return false;
 }
 
 bool IsHLSLObjectWithImplicitMemberAccess(clang::QualType type) {
-  if (const RecordType *RT = type->getAs<RecordType>()) {
-    StringRef name = RT->getDecl()->getName();
-    if (name == "ConstantBuffer" || name == "TextureBuffer")
-      return true;
-  }
+  if (const HLSLResourceAttr *Attr = getAttr<HLSLResourceAttr>(type))
+    return DXIL::IsCTBuffer(Attr->getResKind());
   return false;
 }
 
 bool IsHLSLObjectWithImplicitROMemberAccess(clang::QualType type) {
-  if (const RecordType *RT = type->getAs<RecordType>()) {
-    StringRef name = RT->getDecl()->getName();
-    // Read-only records
-    if (name == "ConstantBuffer" || name == "TextureBuffer")
-      return true;
-  }
+  if (const HLSLResourceAttr *Attr = getAttr<HLSLResourceAttr>(type))
+    return DXIL::IsCTBuffer(Attr->getResKind());
   return false;
 }
 
@@ -701,14 +594,8 @@ bool IsHLSLNodeOutputType(clang::QualType type) {
 }
 
 bool IsHLSLStructuredBufferType(clang::QualType type) {
-  if (const RecordType *RT = type->getAs<RecordType>()) {
-    StringRef name = RT->getDecl()->getName();
-    if (name == "StructuredBuffer" || name == "RWStructuredBuffer")
-      return true;
-
-    if (name == "AppendStructuredBuffer" || name == "ConsumeStructuredBuffer")
-      return true;
-  }
+  if (const HLSLResourceAttr *Attr = getAttr<HLSLResourceAttr>(type))
+    return Attr->getResKind() == DXIL::ResourceKind::StructuredBuffer;
   return false;
 }
 
@@ -812,64 +699,20 @@ bool DoesTypeDefineOverloadedOperator(clang::QualType typeWithOperator,
 bool GetHLSLSubobjectKind(clang::QualType type,
                           DXIL::SubobjectKind &subobjectKind,
                           DXIL::HitGroupType &hgType) {
-  hgType = (DXIL::HitGroupType)(-1);
   type = type.getCanonicalType();
   if (const RecordType *RT = type->getAs<RecordType>()) {
-    StringRef name = RT->getDecl()->getName();
-    switch (name.size()) {
-    case 17:
-      return name == "StateObjectConfig"
-                 ? (subobjectKind = DXIL::SubobjectKind::StateObjectConfig,
-                    true)
-                 : false;
-    case 18:
-      return name == "LocalRootSignature"
-                 ? (subobjectKind = DXIL::SubobjectKind::LocalRootSignature,
-                    true)
-                 : false;
-    case 19:
-      return name == "GlobalRootSignature"
-                 ? (subobjectKind = DXIL::SubobjectKind::GlobalRootSignature,
-                    true)
-                 : false;
-    case 29:
-      return name == "SubobjectToExportsAssociation"
-                 ? (subobjectKind =
-                        DXIL::SubobjectKind::SubobjectToExportsAssociation,
-                    true)
-                 : false;
-    case 22:
-      return name == "RaytracingShaderConfig"
-                 ? (subobjectKind = DXIL::SubobjectKind::RaytracingShaderConfig,
-                    true)
-                 : false;
-    case 24:
-      return name == "RaytracingPipelineConfig"
-                 ? (subobjectKind =
-                        DXIL::SubobjectKind::RaytracingPipelineConfig,
-                    true)
-                 : false;
-    case 25:
-      return name == "RaytracingPipelineConfig1"
-                 ? (subobjectKind =
-                        DXIL::SubobjectKind::RaytracingPipelineConfig1,
-                    true)
-                 : false;
-    case 16:
-      if (name == "TriangleHitGroup") {
-        subobjectKind = DXIL::SubobjectKind::HitGroup;
-        hgType = DXIL::HitGroupType::Triangle;
-        return true;
-      }
-      return false;
-    case 27:
-      if (name == "ProceduralPrimitiveHitGroup") {
-        subobjectKind = DXIL::SubobjectKind::HitGroup;
-        hgType = DXIL::HitGroupType::ProceduralPrimitive;
-        return true;
-      }
+    RecordDecl *RD = RT->getDecl();
+    if (!RD->hasAttr<HLSLSubObjectAttr>()) {
       return false;
     }
+
+    HLSLSubObjectAttr *Attr = RD->getAttr<HLSLSubObjectAttr>();
+    subobjectKind = static_cast<DXIL::SubobjectKind>(Attr->getSubObjKindUint());
+    hgType = static_cast<DXIL::HitGroupType>(Attr->getHitGroupType());
+    if (subobjectKind == DXIL::SubobjectKind::HitGroup)
+      DXASSERT(DXIL::IsValidHitGroupType(hgType), "invalid hit group type");
+
+    return true;
   }
   return false;
 }
@@ -906,6 +749,50 @@ bool IsHLSLRayQueryType(clang::QualType type) {
   return false;
 }
 
+#ifdef ENABLE_SPIRV_CODEGEN
+static llvm::Optional<std::pair<clang::QualType, unsigned>>
+MaybeGetVKBufferPointerParams(clang::QualType type) {
+  const RecordType *RT = dyn_cast<RecordType>(type.getCanonicalType());
+  if (!RT)
+    return llvm::None;
+
+  const ClassTemplateSpecializationDecl *templateDecl =
+      dyn_cast<ClassTemplateSpecializationDecl>(RT->getAsCXXRecordDecl());
+  if (!templateDecl || !templateDecl->getName().equals("BufferPointer"))
+    return llvm::None;
+
+  auto *namespaceDecl =
+      dyn_cast_or_null<NamespaceDecl>(templateDecl->getDeclContext());
+  if (!namespaceDecl || !namespaceDecl->getName().equals("vk"))
+    return llvm::None;
+
+  const TemplateArgumentList &argList = templateDecl->getTemplateArgs();
+  QualType bufferType = argList[0].getAsType();
+  unsigned align =
+      argList.size() > 1 ? argList[1].getAsIntegral().getLimitedValue() : 0;
+  return std::make_pair(bufferType, align);
+}
+
+bool IsVKBufferPointerType(clang::QualType type) {
+  return MaybeGetVKBufferPointerParams(type).hasValue();
+}
+
+QualType GetVKBufferPointerBufferType(clang::QualType type) {
+  auto bpParams = MaybeGetVKBufferPointerParams(type);
+  assert(bpParams.hasValue() &&
+         "cannot get pointer type for type that is not a vk::BufferPointer");
+  return bpParams.getValue().first;
+}
+
+unsigned GetVKBufferPointerAlignment(clang::QualType type) {
+  auto bpParams = MaybeGetVKBufferPointerParams(type);
+  assert(
+      bpParams.hasValue() &&
+      "cannot get pointer alignment for type that is not a vk::BufferPointer");
+  return bpParams.getValue().second;
+}
+#endif
+
 QualType GetHLSLResourceResultType(QualType type) {
   // Don't canonicalize the type as to not lose snorm in Buffer<snorm float>
   const RecordType *RT = type->getAs<RecordType>();
@@ -914,7 +801,8 @@ QualType GetHLSLResourceResultType(QualType type) {
   if (const ClassTemplateSpecializationDecl *templateDecl =
           dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
 
-    if (RD->getName().startswith("FeedbackTexture")) {
+    const HLSLResourceAttr *Attr = getAttr<HLSLResourceAttr>(type);
+    if (Attr && DXIL::IsFeedbackTexture(Attr->getResKind())) {
       // Feedback textures are write-only and the data is opaque,
       // so there is no result type per se.
       return {};
diff --git a/tools/clang/lib/AST/Type.cpp b/tools/clang/lib/AST/Type.cpp
index 06db4747ff..51c20218cc 100644
--- a/tools/clang/lib/AST/Type.cpp
+++ b/tools/clang/lib/AST/Type.cpp
@@ -2945,6 +2945,7 @@ bool AttributedType::isHLSLTypeSpec() const {
   case attr_hlsl_snorm:
   case attr_hlsl_unorm:
   case attr_hlsl_globallycoherent:
+  case attr_hlsl_reordercoherent:
     return true;
   }
   llvm_unreachable("invalid attr kind");
@@ -2975,7 +2976,8 @@ bool AttributedType::isCallingConv() const {
   case attr_hlsl_snorm:
   case attr_hlsl_unorm:
   case attr_hlsl_globallycoherent:
-  // HLSL Change Ends
+  case attr_hlsl_reordercoherent:
+    // HLSL Change Ends
     return false;
 
   case attr_pcs:
diff --git a/tools/clang/lib/AST/TypePrinter.cpp b/tools/clang/lib/AST/TypePrinter.cpp
index 621e1d46a0..ca9e15bfd7 100644
--- a/tools/clang/lib/AST/TypePrinter.cpp
+++ b/tools/clang/lib/AST/TypePrinter.cpp
@@ -1174,6 +1174,9 @@ void TypePrinter::printAttributedBefore(const AttributedType *T,
     case AttributedType::attr_hlsl_globallycoherent:
       OS << "globallycoherent ";
       break;
+    case AttributedType::attr_hlsl_reordercoherent:
+      OS << "reordercoherent ";
+      break;
     default:
       // Only HLSL attribute types are covered.
       break;
diff --git a/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/tools/clang/lib/CodeGen/CGDebugInfo.cpp
index 206f7d9523..d947887d62 100644
--- a/tools/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/tools/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -1047,8 +1047,17 @@ bool CGDebugInfo::TryCollectHLSLRecordElements(const RecordType *Ty,
     unsigned VecSize = hlsl::GetHLSLVecSize(QualTy);
     unsigned ElemSizeInBits = CGM.getContext().getTypeSize(ElemQualTy);
     unsigned CurrentAlignedOffset = 0;
+    SmallString<8> FieldNameBuf;
     for (unsigned ElemIdx = 0; ElemIdx < VecSize; ++ElemIdx) {
-      StringRef FieldName = StringRef(&"xyzw"[ElemIdx], 1);
+      StringRef FieldName;
+      if (VecSize <= 4) {
+        FieldName = StringRef(&"xyzw"[ElemIdx], 1);
+      } else {
+        FieldNameBuf.clear();
+        llvm::raw_svector_ostream OS(FieldNameBuf);
+        OS << 'c' << ElemIdx;
+        FieldName = OS.str();
+      }
       CurrentAlignedOffset =
           llvm::RoundUpToAlignment(CurrentAlignedOffset, AlignBits);
       llvm::DIType *FieldType =
diff --git a/tools/clang/lib/CodeGen/CGExprScalar.cpp b/tools/clang/lib/CodeGen/CGExprScalar.cpp
index 0cb993e6f4..530c791fcc 100644
--- a/tools/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/tools/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3713,20 +3713,7 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
       llvm::Value *CondV = CGF.EmitScalarExpr(condExpr);
       llvm::Value *LHS = Visit(lhsExpr);
       llvm::Value *RHS = Visit(rhsExpr);
-      if (llvm::VectorType *VT = dyn_cast<llvm::VectorType>(CondV->getType())) {
-        llvm::VectorType *ResultVT = cast<llvm::VectorType>(LHS->getType());
-        llvm::Value *result = llvm::UndefValue::get(ResultVT);
-        for (unsigned i = 0; i < VT->getNumElements(); i++) {
-          llvm::Value *EltCond = Builder.CreateExtractElement(CondV, i);
-          llvm::Value *EltL = Builder.CreateExtractElement(LHS, i);
-          llvm::Value *EltR = Builder.CreateExtractElement(RHS, i);
-          llvm::Value *EltSelect = Builder.CreateSelect(EltCond, EltL, EltR);
-          result = Builder.CreateInsertElement(result, EltSelect, i);
-        }
-        return result;
-      } else {
-        return Builder.CreateSelect(CondV, LHS, RHS);
-      }
+      return Builder.CreateSelect(CondV, LHS, RHS);
     }
     if (hlsl::IsHLSLMatType(E->getType())) {
       llvm::Value *Cond = CGF.EmitScalarExpr(condExpr);
diff --git a/tools/clang/lib/CodeGen/CGHLSLMS.cpp b/tools/clang/lib/CodeGen/CGHLSLMS.cpp
index 29ed954425..16ddeaec60 100644
--- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp
+++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp
@@ -300,7 +300,7 @@ class CGMSHLSLRuntime : public CGHLSLRuntime {
                                  clang::QualType QaulTy) override;
   void FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D,
                      llvm::Value *V) override;
-  const clang::Expr *CheckReturnStmtGLCMismatch(
+  const clang::Expr *CheckReturnStmtCoherenceMismatch(
       CodeGenFunction &CGF, const Expr *RV, const clang::ReturnStmt &S,
       clang::QualType FnRetTy,
       const std::function<void(const VarDecl *, llvm::Value *)> &TmpArgMap)
@@ -2500,9 +2500,11 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
 
     // Type annotation for this pointer.
     if (const CXXMethodDecl *MFD = dyn_cast<CXXMethodDecl>(FD)) {
-      const CXXRecordDecl *RD = MFD->getParent();
-      QualType Ty = CGM.getContext().getTypeDeclType(RD);
-      AddTypeAnnotation(Ty, dxilTypeSys, arrayEltSize);
+      if (!MFD->isStatic()) {
+        const CXXRecordDecl *RD = MFD->getParent();
+        QualType Ty = CGM.getContext().getTypeDeclType(RD);
+        AddTypeAnnotation(Ty, dxilTypeSys, arrayEltSize);
+      }
     }
 
     for (const ValueDecl *param : FD->params()) {
@@ -2801,16 +2803,20 @@ void CGMSHLSLRuntime::MarkPotentialResourceTemp(CodeGenFunction &CGF,
   AddValToPropertyMap(V, QualTy);
 }
 
-static bool isGLCMismatch(QualType Ty0, QualType Ty1, const Expr *SrcExp,
-                          clang::SourceLocation Loc, DiagnosticsEngine &Diags) {
-  if (HasHLSLGloballyCoherent(Ty0) == HasHLSLGloballyCoherent(Ty1))
-    return false;
+static std::pair<bool, bool> getCoherenceMismatch(QualType Ty0, QualType Ty1,
+                                                  const Expr *SrcExp) {
+  std::pair Mismatch{
+      HasHLSLGloballyCoherent(Ty0) != HasHLSLGloballyCoherent(Ty1),
+      HasHLSLReorderCoherent(Ty0) != HasHLSLReorderCoherent(Ty1)};
+  if (!Mismatch.first && !Mismatch.second)
+    return {false, false};
+
   if (const CastExpr *Cast = dyn_cast<CastExpr>(SrcExp)) {
     // Skip flat conversion which is for createHandleFromHeap.
     if (Cast->getCastKind() == CastKind::CK_FlatConversion)
-      return false;
+      return {false, false};
   }
-  return true;
+  return Mismatch;
 }
 
 void CGMSHLSLRuntime::FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D,
@@ -2827,19 +2833,23 @@ void CGMSHLSLRuntime::FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D,
   AddValToPropertyMap(V, D.getType());
 
   if (D.hasInit()) {
-    if (isGLCMismatch(D.getType(), D.getInit()->getType(), D.getInit(),
-                      D.getLocation(), CGM.getDiags())) {
-      objectProperties.updateGLC(V);
+    auto [glcMismatch, rdcMismatch] =
+        getCoherenceMismatch(D.getType(), D.getInit()->getType(), D.getInit());
+
+    if (glcMismatch || rdcMismatch) {
+      objectProperties.updateCoherence(V, glcMismatch, rdcMismatch);
     }
   }
 }
 
-const clang::Expr *CGMSHLSLRuntime::CheckReturnStmtGLCMismatch(
+const clang::Expr *CGMSHLSLRuntime::CheckReturnStmtCoherenceMismatch(
     CodeGenFunction &CGF, const Expr *RV, const clang::ReturnStmt &S,
     clang::QualType FnRetTy,
     const std::function<void(const VarDecl *, llvm::Value *)> &TmpArgMap) {
-  if (!isGLCMismatch(RV->getType(), FnRetTy, RV, S.getReturnLoc(),
-                     CGM.getDiags())) {
+  auto [glcMismatch, rdcMismatch] =
+      getCoherenceMismatch(RV->getType(), FnRetTy, RV);
+
+  if (!glcMismatch && !rdcMismatch) {
     return RV;
   }
   const FunctionDecl *FD = cast<FunctionDecl>(CGF.CurFuncDecl);
@@ -2911,10 +2921,11 @@ void CGMSHLSLRuntime::addResource(Decl *D) {
     if (VD->hasInit() && resClass != DXIL::ResourceClass::Invalid) {
 
       if (resClass == DXIL::ResourceClass::UAV) {
-        if (isGLCMismatch(VD->getType(), VD->getInit()->getType(),
-                          VD->getInit(), D->getLocation(), CGM.getDiags())) {
+        auto [glcMismatch, rdcMismatch] = getCoherenceMismatch(
+            VD->getType(), VD->getInit()->getType(), VD->getInit());
+        if (glcMismatch || rdcMismatch) {
           GlobalVariable *GV = cast<GlobalVariable>(CGM.GetAddrOfGlobalVar(VD));
-          objectProperties.updateGLC(GV);
+          objectProperties.updateCoherence(GV, glcMismatch, rdcMismatch);
         }
       }
       return;
@@ -3461,8 +3472,11 @@ bool CGMSHLSLRuntime::SetUAVSRV(SourceLocation loc,
       }
     }
   }
+  // 'globallycoherent' implies 'reordercoherent'
   if (HasHLSLGloballyCoherent(QualTy)) {
     hlslRes->SetGloballyCoherent(true);
+  } else if (HasHLSLReorderCoherent(QualTy)) {
+    hlslRes->SetReorderCoherent(true);
   }
   if (resClass == hlsl::DxilResourceBase::Class::SRV) {
     hlslRes->SetRW(false);
@@ -3495,6 +3509,8 @@ uint32_t CGMSHLSLRuntime::AddUAVSRV(VarDecl *decl,
   if (decl->hasAttr<HLSLGloballyCoherentAttr>()) {
     hlslRes->SetGloballyCoherent(true);
   }
+  if (decl->hasAttr<HLSLReorderCoherentAttr>())
+    hlslRes->SetReorderCoherent(true);
 
   if (!SetUAVSRV(decl->getLocation(), resClass, hlslRes.get(), VarTy))
     return 0;
@@ -6138,8 +6154,9 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
     bool isObject = dxilutil::IsHLSLObjectType(CGF.ConvertTypeForMem(ParamTy));
     bool bAnnotResource = false;
     if (isObject) {
-      if (isGLCMismatch(Param->getType(), Arg->getType(), Arg,
-                        Arg->getExprLoc(), CGM.getDiags())) {
+      auto [glcMismatch, rdcMismatch] =
+          getCoherenceMismatch(Param->getType(), Arg->getType(), Arg);
+      if (glcMismatch || rdcMismatch) {
         // NOTE: if function is noinline, resource parameter is not allowed.
         // Here assume function will be always inlined.
         // This can only take care resource as parameter. When parameter is
diff --git a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
index 8af96cc3cd..13edadf9df 100644
--- a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
+++ b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
@@ -2795,10 +2795,12 @@ unsigned AlignBufferOffsetInLegacy(unsigned offset, unsigned size,
 }
 
 // Translate RayQuery constructor.  From:
-//  %call = call %"RayQuery<flags>" @<constructor>(%"RayQuery<flags>" %ptr)
+//  %call = call %"RayQuery<flags, constrayqueryflags<optional rayquery flags>>"
+//  @<constructor>(%"RayQuery<flags>" %ptr)
 // To:
-//  i32 %handle = AllocateRayQuery(i32 <IntrinsicOp::IOP_AllocateRayQuery>, i32
-//  %flags) %gep = GEP %"RayQuery<flags>" %ptr, 0, 0 store i32* %gep, i32
+//  i32 %handle = AllocateRayQuery2(i32 <IntrinsicOp::IOP_AllocateRayQuery>, i32
+//  %flags, i32 %constrayqueryflags <0 if not given>) %gep = GEP
+//  %"RayQuery<flags, constrayqueryflags>" %ptr, 0, 0 store i32* %gep, i32
 //  %handle ; and replace uses of %call with %ptr
 void TranslateRayQueryConstructor(HLModule &HLM) {
   llvm::Module &M = *HLM.GetModule();
@@ -2822,9 +2824,13 @@ void TranslateRayQueryConstructor(HLModule &HLM) {
     llvm::IntegerType *i32Ty = llvm::Type::getInt32Ty(M.getContext());
     llvm::ConstantInt *i32Zero =
         llvm::ConstantInt::get(i32Ty, (uint64_t)0, false);
+
+    // the third argument will default to 0 if the rayquery constructor doesn't
+    // have a second template argument
     llvm::FunctionType *funcTy =
-        llvm::FunctionType::get(i32Ty, {i32Ty, i32Ty}, false);
+        llvm::FunctionType::get(i32Ty, {i32Ty, i32Ty, i32Ty}, false);
     unsigned opcode = (unsigned)IntrinsicOp::IOP_AllocateRayQuery;
+
     llvm::ConstantInt *opVal = llvm::ConstantInt::get(i32Ty, opcode, false);
     Function *opFunc =
         GetOrCreateHLFunction(M, funcTy, HLOpcodeGroup::HLIntrinsic, opcode);
@@ -2839,14 +2845,22 @@ void TranslateRayQueryConstructor(HLModule &HLM) {
           HLM.GetTypeSystem().GetStructAnnotation(pRQType);
       DXASSERT(SA, "otherwise, could not find type annoation for RayQuery "
                    "specialization");
-      DXASSERT(SA->GetNumTemplateArgs() == 1 &&
-                   SA->GetTemplateArgAnnotation(0).IsIntegral(),
+      DXASSERT((SA->GetNumTemplateArgs() == 1 &&
+                SA->GetTemplateArgAnnotation(0).IsIntegral()) ||
+                   (SA->GetNumTemplateArgs() == 2 &&
+                    SA->GetTemplateArgAnnotation(0).IsIntegral() &&
+                    SA->GetTemplateArgAnnotation(1).IsIntegral()),
                "otherwise, RayQuery has changed, or lacks template args");
       llvm::IRBuilder<> Builder(CI);
       llvm::Value *rayFlags =
           Builder.getInt32(SA->GetTemplateArgAnnotation(0).GetIntegral());
-      llvm::Value *Call =
-          Builder.CreateCall(opFunc, {opVal, rayFlags}, pThis->getName());
+      // the default val of 0 will be assigned if there is no 2nd template arg
+      llvm::Value *rayQueryFlags =
+          Builder.getInt32(SA->GetTemplateArgAnnotation(1).GetIntegral());
+
+      llvm::Value *Call = Builder.CreateCall(
+          opFunc, {opVal, rayFlags, rayQueryFlags}, pThis->getName());
+
       llvm::Value *GEP = Builder.CreateInBoundsGEP(pThis, {i32Zero, i32Zero});
       Builder.CreateStore(Call, GEP);
       CI->replaceAllUsesWith(pThis);
@@ -4020,12 +4034,17 @@ hlsl::DxilResourceProperties DxilObjectProperties::GetResource(llvm::Value *V) {
     return it->second;
   return DxilResourceProperties();
 }
-void DxilObjectProperties::updateGLC(llvm::Value *V) {
+void DxilObjectProperties::updateCoherence(llvm::Value *V,
+                                           bool updateGloballyCoherent,
+                                           bool updateReorderCoherent) {
   auto it = resMap.find(V);
   if (it == resMap.end())
     return;
 
-  it->second.Basic.IsGloballyCoherent ^= 1;
+  if (updateGloballyCoherent)
+    it->second.Basic.IsGloballyCoherent ^= 1;
+  if (updateReorderCoherent)
+    it->second.Basic.IsReorderCoherent ^= 1;
 }
 
 } // namespace CGHLSLMSHelper
diff --git a/tools/clang/lib/CodeGen/CGHLSLMSHelper.h b/tools/clang/lib/CodeGen/CGHLSLMSHelper.h
index 9058ed4f6d..7fca5d4025 100644
--- a/tools/clang/lib/CodeGen/CGHLSLMSHelper.h
+++ b/tools/clang/lib/CodeGen/CGHLSLMSHelper.h
@@ -159,7 +159,8 @@ struct DxilObjectProperties {
   bool AddResource(llvm::Value *V, const hlsl::DxilResourceProperties &RP);
   bool IsResource(llvm::Value *V);
   hlsl::DxilResourceProperties GetResource(llvm::Value *V);
-  void updateGLC(llvm::Value *V);
+  void updateCoherence(llvm::Value *V, bool updateGloballyCoherent,
+                       bool updateReorderCoherent);
 
   // MapVector for deterministic iteration order.
   llvm::MapVector<llvm::Value *, hlsl::DxilResourceProperties> resMap;
diff --git a/tools/clang/lib/CodeGen/CGHLSLRuntime.h b/tools/clang/lib/CodeGen/CGHLSLRuntime.h
index 3e27951e86..b100d93579 100644
--- a/tools/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/tools/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -146,7 +146,7 @@ class CGHLSLRuntime {
 
   virtual void FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D,
                              llvm::Value *V) = 0;
-  virtual const clang::Expr *CheckReturnStmtGLCMismatch(
+  virtual const clang::Expr *CheckReturnStmtCoherenceMismatch(
       CodeGenFunction &CGF, const clang::Expr *RV, const clang::ReturnStmt &S,
       clang::QualType FnRetTy,
       const std::function<void(const VarDecl *, llvm::Value *)> &TmpArgMap) = 0;
diff --git a/tools/clang/lib/CodeGen/CGStmt.cpp b/tools/clang/lib/CodeGen/CGStmt.cpp
index 080d824022..1b1f593271 100644
--- a/tools/clang/lib/CodeGen/CGStmt.cpp
+++ b/tools/clang/lib/CodeGen/CGStmt.cpp
@@ -525,6 +525,10 @@ void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) {
 
 // HLSL Change Begins.
 void CodeGenFunction::EmitDiscardStmt(const DiscardStmt &S) {
+  // Skip unreachable discard.
+  if (!HaveInsertPoint())
+    return;
+
   CGM.getHLSLRuntime().EmitHLSLDiscard(*this);
 }
 // HLSL Change Ends.
@@ -1174,8 +1178,8 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
       auto MapTemp = [&](const VarDecl *LocalVD, llvm::Value *TmpArg) {
         OutParamScope.addTemp(LocalVD, TmpArg);
       };
-      RV = CGM.getHLSLRuntime().CheckReturnStmtGLCMismatch(*this, RV, S,
-                                                           FnRetTy, MapTemp);
+      RV = CGM.getHLSLRuntime().CheckReturnStmtCoherenceMismatch(
+          *this, RV, S, FnRetTy, MapTemp);
       // HLSL Change Ends.
       CharUnits Alignment = getContext().getTypeAlignInChars(RV->getType());
       EmitAggExpr(RV, AggValueSlot::forAddr(ReturnValue, Alignment,
diff --git a/tools/clang/lib/CodeGen/CodeGenModule.cpp b/tools/clang/lib/CodeGen/CodeGenModule.cpp
index 73ad296d47..b274ea9d64 100644
--- a/tools/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/tools/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3376,6 +3376,12 @@ void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) {
 
 /// EmitTopLevelDecl - Emit code for a single top level declaration.
 void CodeGenModule::EmitTopLevelDecl(Decl *D) {
+  llvm::TimeTraceScope TimeScope("CGM::EmitTopLevelDecl", [&] {
+    if (const auto *ND = dyn_cast<NamedDecl>(D))
+      return ND->getName();
+    return StringRef("Unnamed decl");
+  });
+
   // Ignore dependent declarations.
   if (D->getDeclContext() && D->getDeclContext()->isDependentContext())
     return;
diff --git a/tools/clang/lib/CodeGen/CodeGenTypes.cpp b/tools/clang/lib/CodeGen/CodeGenTypes.cpp
index d11575d359..82328c8fb5 100644
--- a/tools/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/tools/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -14,21 +14,23 @@
 #include "CodeGenTypes.h"
 #include "CGCXXABI.h"
 #include "CGCall.h"
+#include "CGHLSLRuntime.h" // HLSL Change
 #include "CGOpenCLRuntime.h"
 #include "CGRecordLayout.h"
+#include "CodeGenModule.h" // HLSL Change
 #include "TargetInfo.h"
+#include "dxc/DXIL/DxilUtil.h" // HLSL Change
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclCXX.h"
-#include "clang/AST/DeclTemplate.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/DeclTemplate.h" // HLSL Change - clang-format
 #include "clang/AST/Expr.h"
+#include "clang/AST/HlslTypes.h" // HLSL Change
 #include "clang/AST/RecordLayout.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Module.h"
-#include "CodeGenModule.h" // HLSL Change
-#include "CGHLSLRuntime.h" // HLSL Change
 using namespace clang;
 using namespace CodeGen;
 
@@ -365,7 +367,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
                  .getConstantArrayType(eltTy, llvm::APInt(32, count),
                                        ArrayType::ArraySizeModifier::Normal, 0)
                  .getTypePtr();
-      }
+      } else if (hlsl::IsHLSLHitObjectType(T)) // HLSL Change
+        return hlsl::dxilutil::GetHLSLHitObjectType(&TheModule);
       else
         return ConvertRecordDeclType(RT->getDecl());
     }
diff --git a/tools/clang/lib/Lex/PPMacroExpansion.cpp b/tools/clang/lib/Lex/PPMacroExpansion.cpp
index 64ce8c9182..ebfb93df2e 100644
--- a/tools/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/tools/clang/lib/Lex/PPMacroExpansion.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 // This file implements the top level handling of macro expansion for the
@@ -1080,7 +1083,8 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("nullability", true)
       .Case("memory_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Memory))
       .Case("thread_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Thread))
-      .Case("dataflow_sanitizer", LangOpts.Sanitize.has(SanitizerKind::DataFlow))
+      .Case("dataflow_sanitizer",
+            LangOpts.Sanitize.has(SanitizerKind::DataFlow))
       // Objective-C features
       .Case("objc_arr", LangOpts.ObjCAutoRefCount) // FIXME: REMOVE?
       .Case("objc_arc", LangOpts.ObjCAutoRefCount)
@@ -1180,6 +1184,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("has_trivial_constructor", LangOpts.CPlusPlus)
       .Case("has_trivial_destructor", LangOpts.CPlusPlus)
       .Case("has_virtual_destructor", LangOpts.CPlusPlus)
+      .Case("hlsl_vk_buffer_pointer", LangOpts.SPIRV)
       .Case("is_abstract", LangOpts.CPlusPlus)
       .Case("is_base_of", LangOpts.CPlusPlus)
       .Case("is_class", LangOpts.CPlusPlus)
diff --git a/tools/clang/lib/Parse/ParseAST.cpp b/tools/clang/lib/Parse/ParseAST.cpp
index e06a4ee09e..c8009b9b53 100644
--- a/tools/clang/lib/Parse/ParseAST.cpp
+++ b/tools/clang/lib/Parse/ParseAST.cpp
@@ -100,8 +100,6 @@ void clang::ParseAST(Preprocessor &PP, ASTConsumer *Consumer,
 
 void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) {
 
-  // HLSL Change - Support hierarchial time tracing.
-  llvm::TimeTraceScope TimeScope("Frontend", StringRef(""));
   // Collect global stats on Decls/Stmts (until we have a module streamer).
   if (PrintStats) {
     Decl::EnableStatistics();
@@ -137,6 +135,8 @@ void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) {
     External->StartTranslationUnit(Consumer);
 
   if (!S.getDiagnostics().hasUnrecoverableErrorOccurred()) {  // HLSL Change: Skip if fatal error already occurred
+    // HLSL Change - Support hierarchial time tracing.
+    llvm::TimeTraceScope TimeScope("Frontend", StringRef(""));
     if (P.ParseTopLevelDecl(ADecl)) {
       if (!External && !S.getLangOpts().CPlusPlus)
         P.Diag(diag::ext_empty_translation_unit);
@@ -151,10 +151,14 @@ void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) {
     }
   } // HLSL Change: Skip if fatal error already occurred
 
-  // Process any TopLevelDecls generated by #pragma weak.
-  for (Decl *D : S.WeakTopLevelDecls())
-    Consumer->HandleTopLevelDecl(DeclGroupRef(D));
-  
+  {
+    // HLSL Change - Support hierarchial time tracing.
+    llvm::TimeTraceScope TimeScope("Frontend - Consumer", StringRef(""));
+    // Process any TopLevelDecls generated by #pragma weak.
+    for (Decl *D : S.WeakTopLevelDecls())
+      Consumer->HandleTopLevelDecl(DeclGroupRef(D));
+  }
+
   // HLSL Change Starts
   // Provide the opportunity to generate translation-unit level validation
   // errors in the front-end, without relying on code generation being
diff --git a/tools/clang/lib/Parse/ParseDecl.cpp b/tools/clang/lib/Parse/ParseDecl.cpp
index 4ca80fcec6..59be41a484 100644
--- a/tools/clang/lib/Parse/ParseDecl.cpp
+++ b/tools/clang/lib/Parse/ParseDecl.cpp
@@ -3877,6 +3877,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
     case tok::kw_precise:
     case tok::kw_sample:
     case tok::kw_globallycoherent:
+    case tok::kw_reordercoherent:
     case tok::kw_center:
     case tok::kw_indices:
     case tok::kw_vertices:
@@ -5321,6 +5322,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
   case tok::kw_shared:
   case tok::kw_groupshared:
   case tok::kw_globallycoherent:
+  case tok::kw_reordercoherent:
   case tok::kw_uniform:
   case tok::kw_in:
   case tok::kw_out:
@@ -6125,6 +6127,7 @@ void Parser::ParseDirectDeclarator(Declarator &D) {
       switch (Tok.getKind()) {
       case tok::kw_center:
       case tok::kw_globallycoherent:
+      case tok::kw_reordercoherent:
       case tok::kw_precise:
       case tok::kw_sample:
       case tok::kw_indices:
diff --git a/tools/clang/lib/Parse/ParseExpr.cpp b/tools/clang/lib/Parse/ParseExpr.cpp
index 745b506468..8f51dd4b6c 100644
--- a/tools/clang/lib/Parse/ParseExpr.cpp
+++ b/tools/clang/lib/Parse/ParseExpr.cpp
@@ -795,6 +795,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
   case tok::kw_precise:
   case tok::kw_sample:
   case tok::kw_globallycoherent:
+  case tok::kw_reordercoherent:
   case tok::kw_center:
   case tok::kw_indices:
   case tok::kw_vertices:
@@ -1740,6 +1741,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
         switch (auto tk = Tok.getKind()) {
         case tok::kw_center:
         case tok::kw_globallycoherent:
+        case tok::kw_reordercoherent:
         case tok::kw_precise:
         case tok::kw_sample:
         case tok::kw_indices:
diff --git a/tools/clang/lib/Parse/ParseStmt.cpp b/tools/clang/lib/Parse/ParseStmt.cpp
index 95dea4ab2c..6fa33d7108 100644
--- a/tools/clang/lib/Parse/ParseStmt.cpp
+++ b/tools/clang/lib/Parse/ParseStmt.cpp
@@ -179,6 +179,7 @@ Parser::ParseStatementOrDeclarationAfterAttributes(StmtVector &Stmts,
   case tok::kw_precise:
   case tok::kw_sample:
   case tok::kw_globallycoherent:
+  case tok::kw_reordercoherent:
   case tok::kw_center:
   case tok::kw_indices:
   case tok::kw_vertices:
diff --git a/tools/clang/lib/Parse/ParseTentative.cpp b/tools/clang/lib/Parse/ParseTentative.cpp
index 29c6e49770..6bdef3a547 100644
--- a/tools/clang/lib/Parse/ParseTentative.cpp
+++ b/tools/clang/lib/Parse/ParseTentative.cpp
@@ -1275,6 +1275,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
   case tok::kw_precise:
   case tok::kw_center:
   case tok::kw_globallycoherent:
+  case tok::kw_reordercoherent:
   case tok::kw_indices:
   case tok::kw_vertices:
   case tok::kw_primitives:
diff --git a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp
index 492640c493..db140f4766 100644
--- a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp
+++ b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #include "AlignmentSizeCalculator.h"
@@ -277,14 +280,20 @@ std::pair<uint32_t, uint32_t> AlignmentSizeCalculator::getAlignmentAndSize(
   if (recordType != nullptr) {
     const llvm::StringRef name = recordType->getDecl()->getName();
 
-    if (isTypeInVkNamespace(recordType) && name == "SpirvType") {
-      const ClassTemplateSpecializationDecl *templateDecl =
-          cast<ClassTemplateSpecializationDecl>(recordType->getDecl());
-      const uint64_t size =
-          templateDecl->getTemplateArgs()[1].getAsIntegral().getZExtValue();
-      const uint64_t alignment =
-          templateDecl->getTemplateArgs()[2].getAsIntegral().getZExtValue();
-      return {alignment, size};
+    if (isTypeInVkNamespace(recordType)) {
+      if (name == "BufferPointer") {
+        return {8, 8}; // same as uint64_t
+      }
+
+      if (name == "SpirvType") {
+        const ClassTemplateSpecializationDecl *templateDecl =
+            cast<ClassTemplateSpecializationDecl>(recordType->getDecl());
+        const uint64_t size =
+            templateDecl->getTemplateArgs()[1].getAsIntegral().getZExtValue();
+        const uint64_t alignment =
+            templateDecl->getTemplateArgs()[2].getAsIntegral().getZExtValue();
+        return {alignment, size};
+      }
     }
   }
 
diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
index 50a7ab0905..24dfdc2e9a 100644
--- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
+++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #include "CapabilityVisitor.h"
@@ -200,8 +203,10 @@ void CapabilityVisitor::addCapabilityForType(const SpirvType *type,
   }
   // Pointer type
   else if (const auto *ptrType = dyn_cast<SpirvPointerType>(type)) {
-    addCapabilityForType(ptrType->getPointeeType(), loc, sc);
-    if (sc == spv::StorageClass::PhysicalStorageBuffer) {
+    addCapabilityForType(ptrType->getPointeeType(), loc,
+                         ptrType->getStorageClass());
+    if (ptrType->getStorageClass() ==
+        spv::StorageClass::PhysicalStorageBuffer) {
       addExtension(Extension::KHR_physical_storage_buffer,
                    "SPV_KHR_physical_storage_buffer", loc);
       addCapability(spv::Capability::PhysicalStorageBufferAddresses);
@@ -852,6 +857,12 @@ bool CapabilityVisitor::visit(SpirvModule *, Visitor::Phase phase) {
           spv::Capability::FragmentShaderShadingRateInterlockEXT,
       });
 
+  addExtensionAndCapabilitiesIfEnabled(
+      Extension::KHR_compute_shader_derivatives,
+      {
+          spv::Capability::ComputeDerivativeGroupQuadsKHR,
+          spv::Capability::ComputeDerivativeGroupLinearKHR,
+      });
   addExtensionAndCapabilitiesIfEnabled(
       Extension::NV_compute_shader_derivatives,
       {
@@ -876,6 +887,9 @@ bool CapabilityVisitor::visit(SpirvModule *, Visitor::Phase phase) {
 
   addCapability(spv::Capability::InterpolationFunction);
 
+  addExtensionAndCapabilitiesIfEnabled(Extension::KHR_quad_control,
+                                       {spv::Capability::QuadControlKHR});
+
   return true;
 }
 
diff --git a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
index fd0fa8a3d0..de73d5e417 100644
--- a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
+++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
@@ -860,7 +860,7 @@ bool DeclResultIdMapper::createStageOutputVar(const DeclaratorDecl *decl,
         QualType arrayType = astContext.getConstantArrayType(
             type, llvm::APInt(32, arraySize), clang::ArrayType::Normal, 0);
 
-        stageVarInstructions[cast<DeclaratorDecl>(decl)] =
+        msOutIndicesBuiltin =
             getBuiltinVar(builtinID, arrayType, decl->getLocation());
       } else {
         // For NV_mesh_shader, the built type is PrimitiveIndicesNV
@@ -871,7 +871,7 @@ bool DeclResultIdMapper::createStageOutputVar(const DeclaratorDecl *decl,
             astContext.UnsignedIntTy, llvm::APInt(32, arraySize),
             clang::ArrayType::Normal, 0);
 
-        stageVarInstructions[cast<DeclaratorDecl>(decl)] =
+        msOutIndicesBuiltin =
             getBuiltinVar(builtinID, arrayType, decl->getLocation());
       }
 
@@ -3522,7 +3522,8 @@ SpirvVariable *DeclResultIdMapper::createSpirvInterfaceVariable(
       // Decorate with PerPrimitiveNV for per-primitive out variables.
       spvBuilder.decoratePerPrimitiveNV(varInstr,
                                         varInstr->getSourceLocation());
-    } else {
+    } else if (stageVar.getSemanticInfo().getKind() !=
+               hlsl::Semantic::Kind::DomainLocation) {
       spvBuilder.decoratePatch(varInstr, varInstr->getSourceLocation());
     }
   }
diff --git a/tools/clang/lib/SPIRV/DeclResultIdMapper.h b/tools/clang/lib/SPIRV/DeclResultIdMapper.h
index 80723393ce..6ac17fde9d 100644
--- a/tools/clang/lib/SPIRV/DeclResultIdMapper.h
+++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.h
@@ -559,6 +559,11 @@ class DeclResultIdMapper {
     return value;
   }
 
+  SpirvVariable *getMSOutIndicesBuiltin() {
+    assert(msOutIndicesBuiltin && "Variable usage before decl parsing.");
+    return msOutIndicesBuiltin;
+  }
+
   /// Decorate with spirv intrinsic attributes with lamda function variable
   /// check
   void decorateWithIntrinsicAttrs(
@@ -1014,6 +1019,25 @@ class DeclResultIdMapper {
   /// creating that stage variable, so that we don't need to query them again
   /// for reading and writing.
   llvm::DenseMap<const ValueDecl *, SpirvVariable *> stageVarInstructions;
+
+  /// Special case for the Indices builtin:
+  /// - this builtin has a different layout in HLSL & SPIR-V, meaning it
+  /// requires
+  ///   the same kind of handling as classic stageVarInstructions:
+  ///   -> load into a HLSL compatible tmp
+  ///   -> write back into the SPIR-V compatible layout.
+  /// - but the builtin is shared across invocations (not only lanes).
+  ///   -> we must only write/read from the indices requested by the user.
+  /// - the variable can be passed to other functions as a out param
+  ///   -> we cannot copy-in/copy-out because shared across invocations.
+  ///   -> we cannot pass a simple pointer: layout differences between
+  ///   HLSL/SPIR-V.
+  ///
+  /// All this means we must keep track of the builtin, and each assignment to
+  /// this will have to handle the layout differences. The easiest solution is
+  /// to keep this builtin global to the module if present.
+  SpirvVariable *msOutIndicesBuiltin = nullptr;
+
   /// Vector of all defined resource variables.
   llvm::SmallVector<ResourceVar, 8> resourceVars;
   /// Mapping from {RW|Append|Consume}StructuredBuffers to their
diff --git a/tools/clang/lib/SPIRV/EmitVisitor.cpp b/tools/clang/lib/SPIRV/EmitVisitor.cpp
index 6f6f5f88cd..eb00f59632 100644
--- a/tools/clang/lib/SPIRV/EmitVisitor.cpp
+++ b/tools/clang/lib/SPIRV/EmitVisitor.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 // Do not change the inclusion order between "dxc/Support/*" files.
@@ -488,6 +491,7 @@ std::vector<uint32_t> EmitVisitor::takeBinary() {
                 debugVariableBinary.end());
   result.insert(result.end(), annotationsBinary.begin(),
                 annotationsBinary.end());
+  result.insert(result.end(), fwdDeclBinary.begin(), fwdDeclBinary.end());
   result.insert(result.end(), typeConstantBinary.begin(),
                 typeConstantBinary.end());
   result.insert(result.end(), globalVarsBinary.begin(), globalVarsBinary.end());
@@ -1016,6 +1020,28 @@ bool EmitVisitor::visit(SpirvConstantNull *inst) {
   return true;
 }
 
+bool EmitVisitor::visit(SpirvConvertPtrToU *inst) {
+  initInstruction(inst);
+  curInst.push_back(inst->getResultTypeId());
+  curInst.push_back(getOrAssignResultId<SpirvInstruction>(inst));
+  curInst.push_back(getOrAssignResultId<SpirvInstruction>(inst->getPtr()));
+  finalizeInstruction(&mainBinary);
+  emitDebugNameForInstruction(getOrAssignResultId<SpirvInstruction>(inst),
+                              inst->getDebugName());
+  return true;
+}
+
+bool EmitVisitor::visit(SpirvConvertUToPtr *inst) {
+  initInstruction(inst);
+  curInst.push_back(inst->getResultTypeId());
+  curInst.push_back(getOrAssignResultId<SpirvInstruction>(inst));
+  curInst.push_back(getOrAssignResultId<SpirvInstruction>(inst->getVal()));
+  finalizeInstruction(&mainBinary);
+  emitDebugNameForInstruction(getOrAssignResultId<SpirvInstruction>(inst),
+                              inst->getDebugName());
+  return true;
+}
+
 bool EmitVisitor::visit(SpirvUndef *inst) {
   typeHandler.getOrCreateUndef(inst);
   emitDebugNameForInstruction(getOrAssignResultId<SpirvInstruction>(inst),
@@ -1108,9 +1134,10 @@ bool EmitVisitor::visit(SpirvGroupNonUniformOp *inst) {
   initInstruction(inst);
   curInst.push_back(inst->getResultTypeId());
   curInst.push_back(getOrAssignResultId<SpirvInstruction>(inst));
-  curInst.push_back(typeHandler.getOrCreateConstantInt(
-      llvm::APInt(32, static_cast<uint32_t>(inst->getExecutionScope())),
-      context.getUIntType(32), /* isSpecConst */ false));
+  if (inst->hasExecutionScope())
+    curInst.push_back(typeHandler.getOrCreateConstantInt(
+        llvm::APInt(32, static_cast<uint32_t>(inst->getExecutionScope())),
+        context.getUIntType(32), /* isSpecConst */ false));
   if (inst->hasGroupOp())
     curInst.push_back(static_cast<uint32_t>(inst->getGroupOp()));
   for (auto *operand : inst->getOperands())
@@ -2012,10 +2039,11 @@ void EmitTypeHandler::initTypeInstruction(spv::Op op) {
   curTypeInst.push_back(static_cast<uint32_t>(op));
 }
 
-void EmitTypeHandler::finalizeTypeInstruction() {
+void EmitTypeHandler::finalizeTypeInstruction(bool isFwdDecl) {
   curTypeInst[0] |= static_cast<uint32_t>(curTypeInst.size()) << 16;
-  typeConstantBinary->insert(typeConstantBinary->end(), curTypeInst.begin(),
-                             curTypeInst.end());
+  auto binarySection = isFwdDecl ? fwdDeclBinary : typeConstantBinary;
+  binarySection->insert(binarySection->end(), curTypeInst.begin(),
+                        curTypeInst.end());
 }
 
 uint32_t EmitTypeHandler::getResultIdForType(const SpirvType *type,
@@ -2594,6 +2622,17 @@ uint32_t EmitTypeHandler::emitType(const SpirvType *type) {
     curTypeInst.push_back(pointeeType);
     finalizeTypeInstruction();
   }
+  // Forward pointer types
+  else if (const auto *fwdPtrType = dyn_cast<ForwardPointerType>(type)) {
+    const SpirvPointerType *ptrType =
+        context.getForwardReference(fwdPtrType->getPointeeType());
+    const uint32_t refId = emitType(ptrType);
+    initTypeInstruction(spv::Op::OpTypeForwardPointer);
+    curTypeInst.push_back(refId);
+    curTypeInst.push_back(static_cast<uint32_t>(ptrType->getStorageClass()));
+    finalizeTypeInstruction(true);
+    return refId;
+  }
   // Function types
   else if (const auto *fnType = dyn_cast<FunctionType>(type)) {
     const uint32_t retTypeId = emitType(fnType->getReturnType());
diff --git a/tools/clang/lib/SPIRV/EmitVisitor.h b/tools/clang/lib/SPIRV/EmitVisitor.h
index 2f5d99b89d..1f9b0939e6 100644
--- a/tools/clang/lib/SPIRV/EmitVisitor.h
+++ b/tools/clang/lib/SPIRV/EmitVisitor.h
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_SPIRV_EMITVISITOR_H
 #define LLVM_CLANG_SPIRV_EMITVISITOR_H
@@ -49,15 +53,15 @@ class EmitTypeHandler {
   EmitTypeHandler(ASTContext &astCtx, SpirvContext &spvContext,
                   const SpirvCodeGenOptions &opts, FeatureManager &featureMgr,
                   std::vector<uint32_t> *debugVec,
-                  std::vector<uint32_t> *decVec,
+                  std::vector<uint32_t> *decVec, std::vector<uint32_t> *fwdVec,
                   std::vector<uint32_t> *typesVec,
                   const std::function<uint32_t()> &takeNextIdFn)
       : astContext(astCtx), context(spvContext), featureManager(featureMgr),
         debugVariableBinary(debugVec), annotationsBinary(decVec),
-        typeConstantBinary(typesVec), takeNextIdFunction(takeNextIdFn),
-        emittedConstantInts({}), emittedConstantFloats({}),
-        emittedConstantComposites({}), emittedConstantNulls({}),
-        emittedUndef({}), emittedConstantBools() {
+        fwdDeclBinary(fwdVec), typeConstantBinary(typesVec),
+        takeNextIdFunction(takeNextIdFn), emittedConstantInts({}),
+        emittedConstantFloats({}), emittedConstantComposites({}),
+        emittedConstantNulls({}), emittedUndef({}), emittedConstantBools() {
     assert(decVec);
     assert(typesVec);
   }
@@ -120,7 +124,7 @@ class EmitTypeHandler {
 
 private:
   void initTypeInstruction(spv::Op op);
-  void finalizeTypeInstruction();
+  void finalizeTypeInstruction(bool isFwdDecl = false);
 
   // Returns the result-id for the given type and decorations. If a type with
   // the same decorations have already been used, it returns the existing
@@ -161,6 +165,7 @@ class EmitTypeHandler {
   std::vector<uint32_t> curDecorationInst;
   std::vector<uint32_t> *debugVariableBinary;
   std::vector<uint32_t> *annotationsBinary;
+  std::vector<uint32_t> *fwdDeclBinary;
   std::vector<uint32_t> *typeConstantBinary;
   std::function<uint32_t()> takeNextIdFunction;
 
@@ -207,7 +212,7 @@ class EmitVisitor : public Visitor {
       : Visitor(opts, spvCtx), astContext(astCtx), featureManager(featureMgr),
         id(0),
         typeHandler(astCtx, spvCtx, opts, featureMgr, &debugVariableBinary,
-                    &annotationsBinary, &typeConstantBinary,
+                    &annotationsBinary, &fwdDeclBinary, &typeConstantBinary,
                     [this]() -> uint32_t { return takeNextId(); }),
         debugMainFileId(0), debugInfoExtInstId(0), debugLineStart(0),
         debugLineEnd(0), debugColumnStart(0), debugColumnEnd(0),
@@ -254,6 +259,8 @@ class EmitVisitor : public Visitor {
   bool visit(SpirvConstantFloat *) override;
   bool visit(SpirvConstantComposite *) override;
   bool visit(SpirvConstantNull *) override;
+  bool visit(SpirvConvertPtrToU *) override;
+  bool visit(SpirvConvertUToPtr *) override;
   bool visit(SpirvUndef *) override;
   bool visit(SpirvCompositeConstruct *) override;
   bool visit(SpirvCompositeExtract *) override;
@@ -438,7 +445,9 @@ class EmitVisitor : public Visitor {
   // All annotation instructions: OpDecorate, OpMemberDecorate, OpGroupDecorate,
   // OpGroupMemberDecorate, and OpDecorationGroup.
   std::vector<uint32_t> annotationsBinary;
-  // All type and constant instructions
+  // All forward pointer type declaration instructions
+  std::vector<uint32_t> fwdDeclBinary;
+  // All other type and constant instructions
   std::vector<uint32_t> typeConstantBinary;
   // All global variable declarations (all OpVariable instructions whose Storage
   // Class is not Function)
diff --git a/tools/clang/lib/SPIRV/FeatureManager.cpp b/tools/clang/lib/SPIRV/FeatureManager.cpp
index 2512984a4c..7fb449fee9 100644
--- a/tools/clang/lib/SPIRV/FeatureManager.cpp
+++ b/tools/clang/lib/SPIRV/FeatureManager.cpp
@@ -215,6 +215,8 @@ Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) {
       .Case("SPV_KHR_physical_storage_buffer",
             Extension::KHR_physical_storage_buffer)
       .Case("SPV_KHR_vulkan_memory_model", Extension::KHR_vulkan_memory_model)
+      .Case("SPV_KHR_compute_shader_derivatives",
+            Extension::KHR_compute_shader_derivatives)
       .Case("SPV_NV_compute_shader_derivatives",
             Extension::NV_compute_shader_derivatives)
       .Case("SPV_KHR_fragment_shader_barycentric",
@@ -224,6 +226,7 @@ Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) {
       .Case("SPV_KHR_float_controls", Extension::KHR_float_controls)
       .Case("SPV_NV_shader_subgroup_partitioned",
             Extension::NV_shader_subgroup_partitioned)
+      .Case("SPV_KHR_quad_control", Extension::KHR_quad_control)
       .Default(Extension::Unknown);
 }
 
@@ -283,6 +286,8 @@ const char *FeatureManager::getExtensionName(Extension symbol) {
     return "SPV_KHR_physical_storage_buffer";
   case Extension::KHR_vulkan_memory_model:
     return "SPV_KHR_vulkan_memory_model";
+  case Extension::KHR_compute_shader_derivatives:
+    return "SPV_KHR_compute_shader_derivatives";
   case Extension::NV_compute_shader_derivatives:
     return "SPV_NV_compute_shader_derivatives";
   case Extension::KHR_fragment_shader_barycentric:
@@ -293,6 +298,8 @@ const char *FeatureManager::getExtensionName(Extension symbol) {
     return "SPV_KHR_float_controls";
   case Extension::NV_shader_subgroup_partitioned:
     return "SPV_NV_shader_subgroup_partitioned";
+  case Extension::KHR_quad_control:
+    return "SPV_KHR_quad_control";
   default:
     break;
   }
@@ -370,6 +377,10 @@ bool FeatureManager::enabledByDefault(Extension ext) {
     // KHR_ray_tracing and NV_ray_tracing are mutually exclusive so enable only
     // KHR extension by default
   case Extension::NV_ray_tracing:
+    return false;
+    // KHR_compute_shader_derivatives and NV_compute_shader_derivatives are
+    // mutually exclusive so enable only KHR extension by default
+  case Extension::NV_compute_shader_derivatives:
     return false;
     // Enabling EXT_demote_to_helper_invocation changes the code generation
     // behavior for the 'discard' statement. Therefore we will only enable it if
@@ -405,5 +416,23 @@ bool FeatureManager::isTargetEnvVulkan1p3OrAbove() {
   return targetEnv >= SPV_ENV_VULKAN_1_3;
 }
 
+bool FeatureManager::isTargetEnvVulkan() {
+  // This assert ensure that this list will be updated, if necessary, when
+  // a new target environment is added.
+  static_assert(SPV_ENV_VULKAN_1_4 + 1 == SPV_ENV_MAX);
+
+  switch (targetEnv) {
+  case SPV_ENV_VULKAN_1_0:
+  case SPV_ENV_VULKAN_1_1:
+  case SPV_ENV_VULKAN_1_2:
+  case SPV_ENV_VULKAN_1_1_SPIRV_1_4:
+  case SPV_ENV_VULKAN_1_3:
+  case SPV_ENV_VULKAN_1_4:
+    return true;
+  default:
+    return false;
+  }
+}
+
 } // end namespace spirv
 } // end namespace clang
diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
index 24cce9d89e..b31d19b5d8 100644
--- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
+++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #include "LowerTypeVisitor.h"
@@ -549,7 +552,9 @@ const SpirvType *LowerTypeVisitor::lowerType(QualType type,
     // checking the general struct type.
     if (const auto *spvType =
             lowerResourceType(type, rule, isRowMajor, srcLoc)) {
-      spvContext.registerStructDeclForSpirvType(spvType, decl);
+      if (!isa<SpirvPointerType>(spvType)) {
+        spvContext.registerStructDeclForSpirvType(spvType, decl);
+      }
       return spvType;
     }
 
@@ -809,6 +814,32 @@ const SpirvType *LowerTypeVisitor::lowerVkTypeInVkNamespace(
     QualType realType = hlsl::GetHLSLResourceTemplateParamType(type);
     return lowerType(realType, rule, llvm::None, srcLoc);
   }
+  if (name == "BufferPointer") {
+    const size_t visitedTypeStackSize = visitedTypeStack.size();
+    (void)visitedTypeStackSize; // suppress unused warning (used only in assert)
+
+    for (QualType t : visitedTypeStack) {
+      if (t == type) {
+        return spvContext.getForwardPointerType(type);
+      }
+    }
+
+    QualType realType = hlsl::GetHLSLResourceTemplateParamType(type);
+    if (rule == SpirvLayoutRule::Void) {
+      rule = spvOptions.sBufferLayoutRule;
+    }
+    visitedTypeStack.push_back(type);
+
+    const SpirvType *spirvType = lowerType(realType, rule, llvm::None, srcLoc);
+    const auto *pointerType = spvContext.getPointerType(
+        spirvType, spv::StorageClass::PhysicalStorageBuffer);
+    spvContext.registerForwardReference(type, pointerType);
+
+    assert(visitedTypeStack.back() == type);
+    visitedTypeStack.pop_back();
+    assert(visitedTypeStack.size() == visitedTypeStackSize);
+    return pointerType;
+  }
   emitError("unknown type %0 in vk namespace", srcLoc) << type;
   return nullptr;
 }
@@ -834,26 +865,6 @@ LowerTypeVisitor::lowerResourceType(QualType type, SpirvLayoutRule rule,
 
   // TODO: avoid string comparison once hlsl::IsHLSLResouceType() does that.
 
-  // Vulkan does not yet support true 16-bit float texture objexts.
-  if (name == "Buffer" || name == "RWBuffer" || name == "Texture1D" ||
-      name == "Texture2D" || name == "Texture3D" || name == "TextureCube" ||
-      name == "Texture1DArray" || name == "Texture2DArray" ||
-      name == "Texture2DMS" || name == "Texture2DMSArray" ||
-      name == "TextureCubeArray" || name == "RWTexture1D" ||
-      name == "RWTexture2D" || name == "RWTexture3D" ||
-      name == "RWTexture1DArray" || name == "RWTexture2DArray") {
-    const auto sampledType = hlsl::GetHLSLResourceResultType(type);
-    const auto loweredType =
-        lowerType(getElementType(astContext, sampledType), rule,
-                  /*isRowMajor*/ llvm::None, srcLoc);
-    if (const auto *floatType = dyn_cast<FloatType>(loweredType)) {
-      if (floatType->getBitwidth() == 16) {
-        emitError("16-bit texture types not yet supported with -spirv", srcLoc);
-        return nullptr;
-      }
-    }
-  }
-
   { // Texture types
     spv::Dim dim = {};
     bool isArray = {};
diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.h b/tools/clang/lib/SPIRV/LowerTypeVisitor.h
index 96235d1508..5b26b67e3a 100644
--- a/tools/clang/lib/SPIRV/LowerTypeVisitor.h
+++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.h
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_LIB_SPIRV_LOWERTYPEVISITOR_H
@@ -137,6 +140,7 @@ class LowerTypeVisitor : public Visitor {
   AlignmentSizeCalculator alignmentCalc; /// alignment calculator
   bool useArrayForMat1xN;                /// SPIR-V array for HLSL Matrix 1xN
   SpirvBuilder &spvBuilder;
+  SmallVector<QualType, 4> visitedTypeStack; // for type recursion detection
 };
 
 } // end namespace spirv
diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp
index b1e7388f16..689fc0715f 100644
--- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp
+++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #include "clang/SPIRV/SpirvBuilder.h"
@@ -202,6 +205,14 @@ SpirvInstruction *SpirvBuilder::createLoad(QualType resultType,
   instruction->setLayoutRule(pointer->getLayoutRule());
   instruction->setRValue(true);
 
+  if (pointer->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer) {
+    AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions);
+    uint32_t align, size, stride;
+    std::tie(align, size) = alignmentCalc.getAlignmentAndSize(
+        resultType, pointer->getLayoutRule(), llvm::None, &stride);
+    instruction->setAlignment(align);
+  }
+
   if (pointer->containsAliasComponent() &&
       isAKindOfStructuredOrByteBuffer(resultType)) {
     instruction->setStorageClass(spv::StorageClass::Uniform);
@@ -300,6 +311,16 @@ SpirvStore *SpirvBuilder::createStore(SpirvInstruction *address,
       new (context) SpirvStore(loc, address, source, llvm::None, range);
   insertPoint->addInstruction(instruction);
 
+  if (address->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer &&
+      address->getAstResultType() != QualType()) { // exclude raw buffer
+    AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions);
+    uint32_t align, size, stride;
+    std::tie(align, size) = alignmentCalc.getAlignmentAndSize(
+        address->getAstResultType(), address->getLayoutRule(), llvm::None,
+        &stride);
+    instruction->setAlignment(align);
+  }
+
   if (address->isRasterizerOrdered()) {
     createEndInvocationInterlockEXT(loc, range);
   }
@@ -432,7 +453,7 @@ SpirvSpecConstantBinaryOp *SpirvBuilder::createSpecConstantBinaryOp(
 }
 
 SpirvGroupNonUniformOp *SpirvBuilder::createGroupNonUniformOp(
-    spv::Op op, QualType resultType, spv::Scope execScope,
+    spv::Op op, QualType resultType, llvm::Optional<spv::Scope> execScope,
     llvm::ArrayRef<SpirvInstruction *> operands, SourceLocation loc,
     llvm::Optional<spv::GroupOperation> groupOp) {
   assert(insertPoint && "null insert point");
@@ -491,6 +512,22 @@ SpirvImageTexelPointer *SpirvBuilder::createImageTexelPointer(
   return instruction;
 }
 
+SpirvConvertPtrToU *SpirvBuilder::createConvertPtrToU(SpirvInstruction *ptr,
+                                                      QualType type) {
+  auto *instruction = new (context) SpirvConvertPtrToU(ptr, type);
+  instruction->setRValue(true);
+  insertPoint->addInstruction(instruction);
+  return instruction;
+}
+
+SpirvConvertUToPtr *SpirvBuilder::createConvertUToPtr(SpirvInstruction *val,
+                                                      QualType type) {
+  auto *instruction = new (context) SpirvConvertUToPtr(val, type);
+  instruction->setRValue(false);
+  insertPoint->addInstruction(instruction);
+  return instruction;
+}
+
 spv::ImageOperandsMask SpirvBuilder::composeImageOperandsMask(
     SpirvInstruction *bias, SpirvInstruction *lod,
     const std::pair<SpirvInstruction *, SpirvInstruction *> &grad,
@@ -994,6 +1031,8 @@ SpirvInstruction *SpirvBuilder::createEmulatedBitFieldExtract(
     rightShift->setResultType(baseType);
   }
 
+  rightShift->setRValue(true);
+
   return rightShift;
 }
 
diff --git a/tools/clang/lib/SPIRV/SpirvContext.cpp b/tools/clang/lib/SPIRV/SpirvContext.cpp
index 6af36eb691..47dfc67433 100644
--- a/tools/clang/lib/SPIRV/SpirvContext.cpp
+++ b/tools/clang/lib/SPIRV/SpirvContext.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 
 #include <algorithm>
@@ -328,6 +331,29 @@ const HybridPointerType *SpirvContext::getPointerType(QualType pointee,
   return result;
 }
 
+const ForwardPointerType *
+SpirvContext::getForwardPointerType(QualType pointee) {
+  assert(hlsl::IsVKBufferPointerType(pointee));
+
+  auto foundPointee = forwardPointerTypes.find(pointee);
+  if (foundPointee != forwardPointerTypes.end()) {
+    return foundPointee->second;
+  }
+
+  return forwardPointerTypes[pointee] = new (this) ForwardPointerType(pointee);
+}
+
+const SpirvPointerType *SpirvContext::getForwardReference(QualType type) {
+  return forwardReferences[type];
+}
+
+void SpirvContext::registerForwardReference(
+    QualType type, const SpirvPointerType *pointerType) {
+  assert(pointerType->getStorageClass() ==
+         spv::StorageClass::PhysicalStorageBuffer);
+  forwardReferences[type] = pointerType;
+}
+
 FunctionType *
 SpirvContext::getFunctionType(const SpirvType *ret,
                               llvm::ArrayRef<const SpirvType *> param) {
diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
index 3a67257da7..cd5f860555 100644
--- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 //  This file implements a SPIR-V emitter class that takes in HLSL AST and emits
@@ -809,21 +813,17 @@ void SpirvEmitter::HandleTranslationUnit(ASTContext &context) {
   spvBuilder.setMemoryModel(spv::AddressingModel::Logical,
                             spv::MemoryModel::GLSL450);
 
-  // Even though the 'workQueue' grows due to the above loop, the first
-  // 'numEntryPoints' entries in the 'workQueue' are the ones with the HLSL
-  // 'shader' attribute, and must therefore be entry functions.
-  assert(numEntryPoints <= workQueue.size());
-
-  for (uint32_t i = 0; i < numEntryPoints; ++i) {
+  for (uint32_t i = 0; i < workQueue.size(); ++i) {
     // TODO: assign specific StageVars w.r.t. to entry point
     const FunctionInfo *entryInfo = workQueue[i];
-    assert(entryInfo->isEntryFunction);
-    spvBuilder.addEntryPoint(
-        getSpirvShaderStage(
-            entryInfo->shaderModelKind,
-            featureManager.isExtensionEnabled(Extension::EXT_mesh_shader)),
-        entryInfo->entryFunction, getEntryPointName(entryInfo),
-        getInterfacesForEntryPoint(entryInfo->entryFunction));
+    if (entryInfo->isEntryFunction) {
+      spvBuilder.addEntryPoint(
+          getSpirvShaderStage(
+              entryInfo->shaderModelKind,
+              featureManager.isExtensionEnabled(Extension::EXT_mesh_shader)),
+          entryInfo->entryFunction, getEntryPointName(entryInfo),
+          getInterfacesForEntryPoint(entryInfo->entryFunction));
+    }
   }
 
   // Add Location decorations to stage input/output variables.
@@ -1237,12 +1237,17 @@ SpirvInstruction *SpirvEmitter::doExpr(const Expr *expr,
   } else if (isa<CXXThisExpr>(expr)) {
     assert(curThis);
     result = curThis;
-  } else if (isa<CXXConstructExpr>(expr)) {
+  } else if (const auto *constructExpr = dyn_cast<CXXConstructExpr>(expr)) {
     // For RayQuery type, we should not explicitly initialize it using
     // CXXConstructExpr e.g., RayQuery<0> r = RayQuery<0>() is the same as we do
     // not have a variable initialization. Setting nullptr for the SPIR-V
     // instruction used for expr will let us skip the variable initialization.
-    if (!hlsl::IsHLSLRayQueryType(expr->getType()))
+    if (hlsl::IsVKBufferPointerType(expr->getType())) {
+      const Expr *arg = constructExpr->getArg(0);
+      SpirvInstruction *value = loadIfGLValue(arg, arg->getSourceRange());
+      result = spvBuilder.createConvertUToPtr(value, expr->getType());
+      result->setRValue();
+    } else if (!hlsl::IsHLSLRayQueryType(expr->getType()))
       result = curThis;
   } else if (const auto *unaryExpr = dyn_cast<UnaryExprOrTypeTraitExpr>(expr)) {
     result = doUnaryExprOrTypeTraitExpr(unaryExpr);
@@ -1547,7 +1552,23 @@ void SpirvEmitter::doFunctionDecl(const FunctionDecl *decl) {
   // Create all parameters.
   for (uint32_t i = 0; i < decl->getNumParams(); ++i) {
     const ParmVarDecl *paramDecl = decl->getParamDecl(i);
-    (void)declIdMapper.createFnParam(paramDecl, i + 1 + isNonStaticMemberFn);
+    QualType paramType = paramDecl->getType();
+    auto *param =
+        declIdMapper.createFnParam(paramDecl, i + 1 + isNonStaticMemberFn);
+#ifdef ENABLE_SPIRV_CODEGEN
+    if (hlsl::IsVKBufferPointerType(paramType)) {
+      Optional<bool> isRowMajor = llvm::None;
+      QualType desugaredType = desugarType(paramType, &isRowMajor);
+      if (hlsl::IsVKBufferPointerType(desugaredType)) {
+        spvBuilder.decorateWithLiterals(
+            param,
+            static_cast<unsigned>(paramDecl->hasAttr<VKAliasedPointerAttr>()
+                                      ? spv::Decoration::AliasedPointer
+                                      : spv::Decoration::RestrictPointer),
+            {}, loc);
+      }
+    }
+#endif
   }
 
   if (decl->hasBody()) {
@@ -1648,6 +1669,15 @@ bool SpirvEmitter::validateVKAttributes(const NamedDecl *decl) {
                 loc);
       success = false;
     }
+
+#ifdef ENABLE_SPIRV_CODEGEN
+    if (hlsl::IsVKBufferPointerType(cast<VarDecl>(decl)->getType())) {
+      emitError("vk::push_constant attribute cannot be used on declarations "
+                "with vk::BufferPointer type",
+                loc);
+      success = false;
+    }
+#endif
   }
 
   // vk::shader_record_nv is supported only on cbuffer/ConstantBuffer
@@ -1884,6 +1914,19 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) {
     }
   }
 
+  if (featureManager.isTargetEnvVulkan() &&
+      (isTexture(decl->getType()) || isRWTexture(decl->getType()) ||
+       isBuffer(decl->getType()) || isRWBuffer(decl->getType()))) {
+    const auto sampledType = hlsl::GetHLSLResourceResultType(decl->getType());
+    if (isFloatOrVecMatOfFloatType(sampledType) &&
+        isOrContains16BitType(sampledType, spirvOptions.enable16BitTypes)) {
+      emitError("The sampled type for textures cannot be a floating point type "
+                "smaller than 32-bits when targeting a Vulkan environment.",
+                loc);
+      return;
+    }
+  }
+
   if (decl->hasAttr<VKConstantIdAttr>()) {
     // This is a VarDecl for specialization constant.
     createSpecConstant(decl);
@@ -1942,6 +1985,11 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) {
     return;
   }
 
+  if (hlsl::IsVKBufferPointerType(decl->getType()) && !decl->hasInit()) {
+    emitError("vk::BufferPointer has no default constructor", loc);
+    return;
+  }
+
   // We can have VarDecls inside cbuffer/tbuffer. For those VarDecls, we need
   // to emit their cbuffer/tbuffer as a whole and access each individual one
   // using access chains.
@@ -2028,10 +2076,24 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) {
       needsLegalization = true;
   }
 
-  if (var != nullptr && decl->hasAttrs()) {
-    declIdMapper.decorateWithIntrinsicAttrs(decl, var);
-    if (auto attr = decl->getAttr<VKStorageClassExtAttr>()) {
-      var->setStorageClass(static_cast<spv::StorageClass>(attr->getStclass()));
+  if (var != nullptr) {
+    Optional<bool> isRowMajor = llvm::None;
+    QualType desugaredType = desugarType(decl->getType(), &isRowMajor);
+    if (hlsl::IsVKBufferPointerType(desugaredType)) {
+      spvBuilder.decorateWithLiterals(
+          var,
+          static_cast<unsigned>(decl->hasAttr<VKAliasedPointerAttr>()
+                                    ? spv::Decoration::AliasedPointer
+                                    : spv::Decoration::RestrictPointer),
+          {}, loc);
+    }
+
+    if (decl->hasAttrs()) {
+      declIdMapper.decorateWithIntrinsicAttrs(decl, var);
+      if (auto attr = decl->getAttr<VKStorageClassExtAttr>()) {
+        var->setStorageClass(
+            static_cast<spv::StorageClass>(attr->getStclass()));
+      }
     }
   }
 
@@ -3104,12 +3166,6 @@ SpirvInstruction *SpirvEmitter::processCall(const CallExpr *callExpr) {
         argInfo && argInfo->getStorageClass() != spv::StorageClass::Function &&
         isResourceType(paramType);
 
-    // HLSL requires that the parameters be copied in and out from temporaries.
-    // This looks for cases where the copy can be elided. To generate valid
-    // SPIR-V, the argument must be a memory declaration.
-    //
-    //
-
     // If argInfo is nullptr and argInst is a rvalue, we do not have a proper
     // pointer to pass to the function. we need a temporary variable in that
     // case.
@@ -3118,7 +3174,7 @@ SpirvInstruction *SpirvEmitter::processCall(const CallExpr *callExpr) {
     // create a temporary variable for it because the function definition
     // expects are point-to-pointer argument for resources, which will be
     // resolved by legalization.
-    if ((argInfo || (argInst && argInst->getopcode() == spv::Op::OpVariable)) &&
+    if ((argInfo || (argInst && !argInst->isRValue())) &&
         canActAsOutParmVar(param) && !isArgGlobalVarWithResourceType &&
         paramTypeMatchesArgType(paramType, arg->getType())) {
       // Based on SPIR-V spec, function parameter must be always Function
@@ -3657,14 +3713,22 @@ SpirvInstruction *SpirvEmitter::doCastExpr(const CastExpr *expr,
       emitError("implicit cast kind '%0' unimplemented", expr->getExprLoc())
           << expr->getCastKindName() << expr->getSourceRange();
       expr->dump();
-      return 0;
+      return nullptr;
     }
   }
+  case CastKind::CK_ToVoid:
+    return nullptr;
+  case CastKind::CK_VK_BufferPointerToIntegral: {
+    return spvBuilder.createConvertPtrToU(doExpr(subExpr, range), toType);
+  }
+  case CastKind::CK_VK_IntegralToBufferPointer: {
+    return spvBuilder.createConvertUToPtr(doExpr(subExpr, range), toType);
+  }
   default:
     emitError("implicit cast kind '%0' unimplemented", expr->getExprLoc())
         << expr->getCastKindName() << expr->getSourceRange();
     expr->dump();
-    return 0;
+    return nullptr;
   }
 }
 
@@ -5437,6 +5501,8 @@ SpirvEmitter::processIntrinsicMemberCall(const CXXMemberCallExpr *expr,
   case IntrinsicOp::MOP_WorldRayDirection:
   case IntrinsicOp::MOP_WorldRayOrigin:
     return processRayQueryIntrinsics(expr, opcode);
+  case IntrinsicOp::MOP_GetBufferContents:
+    return processIntrinsicGetBufferContents(expr);
   default:
     emitError("intrinsic '%0' method unimplemented",
               expr->getCallee()->getExprLoc())
@@ -7016,6 +7082,12 @@ SpirvInstruction *SpirvEmitter::reconstructValue(SpirvInstruction *srcVal,
   if (const auto *recordType = valType->getAs<RecordType>()) {
     assert(recordType->isStructureType());
 
+    if (isTypeInVkNamespace(recordType) &&
+        recordType->getDecl()->getName().equals("BufferPointer")) {
+      // Uniquely among structs, vk::BufferPointer<T> lowers to a pointer type.
+      return srcVal;
+    }
+
     LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions,
                                       spvBuilder);
     const StructType *spirvStructType =
@@ -8128,17 +8200,21 @@ void SpirvEmitter::assignToMSOutIndices(
   if (indices.size() > 1) {
     vecComponent = indices.back();
   }
-  auto *var = declIdMapper.getStageVarInstruction(decl);
-  const auto *varTypeDecl = astContext.getAsConstantArrayType(decl->getType());
-  QualType varType = varTypeDecl->getElementType();
+  SpirvVariable *var = declIdMapper.getMSOutIndicesBuiltin();
+
   uint32_t numVertices = 1;
-  if (!isVectorType(varType, nullptr, &numVertices)) {
-    assert(isScalarType(varType));
-  }
-  QualType valueType = value->getAstResultType();
   uint32_t numValues = 1;
-  if (!isVectorType(valueType, nullptr, &numValues)) {
-    assert(isScalarType(valueType));
+  {
+    const auto *varTypeDecl =
+        astContext.getAsConstantArrayType(decl->getType());
+    QualType varType = varTypeDecl->getElementType();
+    if (!isVectorType(varType, nullptr, &numVertices)) {
+      assert(isScalarType(varType));
+    }
+    QualType valueType = value->getAstResultType();
+    if (!isVectorType(valueType, nullptr, &numValues)) {
+      assert(isScalarType(valueType));
+    }
   }
 
   const auto loc = decl->getLocation();
@@ -8185,7 +8261,10 @@ void SpirvEmitter::assignToMSOutIndices(
       assert(numValues == numVertices);
       if (extMesh) {
         // create accesschain for Primitive*IndicesEXT[vertIndex].
-        auto *ptr = spvBuilder.createAccessChain(varType, var, vertIndex, loc);
+        const ConstantArrayType *CAT =
+            astContext.getAsConstantArrayType(var->getAstResultType());
+        auto *ptr = spvBuilder.createAccessChain(CAT->getElementType(), var,
+                                                 vertIndex, loc);
         // finally create store for Primitive*IndicesEXT[vertIndex] = value.
         spvBuilder.createStore(ptr, value, loc);
       } else {
@@ -9192,6 +9271,10 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
   case hlsl::IntrinsicOp::IOP_QuadReadLaneAt:
     retVal = processWaveQuadWideShuffle(callExpr, hlslOpcode);
     break;
+  case hlsl::IntrinsicOp::IOP_QuadAny:
+  case hlsl::IntrinsicOp::IOP_QuadAll:
+    retVal = processWaveQuadAnyAll(callExpr, hlslOpcode);
+    break;
   case hlsl::IntrinsicOp::IOP_abort:
   case hlsl::IntrinsicOp::IOP_GetRenderTargetSampleCount:
   case hlsl::IntrinsicOp::IOP_GetRenderTargetSamplePosition: {
@@ -9391,6 +9474,14 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
   case hlsl::IntrinsicOp::IOP_EvaluateAttributeSnapped: {
     retVal = processEvaluateAttributeAt(callExpr, hlslOpcode, srcLoc, srcRange);
     break;
+  }
+  case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast: {
+    retVal = processIntrinsicPointerCast(callExpr, false);
+    break;
+  }
+  case hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast: {
+    retVal = processIntrinsicPointerCast(callExpr, true);
+    break;
   }
     INTRINSIC_SPIRV_OP_CASE(ddx, DPdx, true);
     INTRINSIC_SPIRV_OP_CASE(ddx_coarse, DPdxCoarse, false);
@@ -10146,6 +10237,53 @@ SpirvEmitter::processWaveQuadWideShuffle(const CallExpr *callExpr,
       opcode, retType, spv::Scope::Subgroup, {value, target}, srcLoc);
 }
 
+SpirvInstruction *SpirvEmitter::processWaveQuadAnyAll(const CallExpr *callExpr,
+                                                      hlsl::IntrinsicOp op) {
+  // Signatures:
+  // bool QuadAny(bool localValue)
+  // bool QuadAll(bool localValue)
+  assert(callExpr->getNumArgs() == 1);
+  assert(op == hlsl::IntrinsicOp::IOP_QuadAny ||
+         op == hlsl::IntrinsicOp::IOP_QuadAll);
+  featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_1, "Wave Operation",
+                                  callExpr->getExprLoc());
+
+  auto *predicate = doExpr(callExpr->getArg(0));
+  const auto srcLoc = callExpr->getExprLoc();
+
+  if (!featureManager.isExtensionEnabled(Extension::KHR_quad_control)) {
+    // We can't use QuadAny/QuadAll, so implement them using QuadSwap. We
+    // will read the value at each quad invocation, then combine them.
+
+    spv::Op reducer = op == hlsl::IntrinsicOp::IOP_QuadAny
+                          ? spv::Op::OpLogicalOr
+                          : spv::Op::OpLogicalAnd;
+
+    SpirvInstruction *result = predicate;
+
+    for (size_t i = 0; i < 3; i++) {
+      SpirvInstruction *invocationValue = spvBuilder.createGroupNonUniformOp(
+          spv::Op::OpGroupNonUniformQuadSwap, astContext.BoolTy,
+          spv::Scope::Subgroup,
+          {predicate, spvBuilder.getConstantInt(astContext.UnsignedIntTy,
+                                                llvm::APInt(32, i))},
+          srcLoc);
+      result = spvBuilder.createBinaryOp(reducer, astContext.BoolTy, result,
+                                         invocationValue, srcLoc);
+    }
+
+    return result;
+  }
+
+  spv::Op opcode = op == hlsl::IntrinsicOp::IOP_QuadAny
+                       ? spv::Op::OpGroupNonUniformQuadAnyKHR
+                       : spv::Op::OpGroupNonUniformQuadAllKHR;
+
+  return spvBuilder.createGroupNonUniformOp(opcode, astContext.BoolTy,
+                                            llvm::Optional<spv::Scope>(),
+                                            {predicate}, srcLoc);
+}
+
 SpirvInstruction *
 SpirvEmitter::processWaveActiveAllEqual(const CallExpr *callExpr) {
   assert(callExpr->getNumArgs() == 1);
@@ -10770,6 +10908,56 @@ SpirvEmitter::processIntrinsicClamp(const CallExpr *callExpr) {
                                       loc, range);
 }
 
+SpirvInstruction *
+SpirvEmitter::processIntrinsicPointerCast(const CallExpr *callExpr,
+                                          bool isStatic) {
+  const Expr *argExpr = callExpr->getArg(0);
+  SpirvInstruction *ptr = doExpr(argExpr);
+  QualType srcType = argExpr->getType();
+  QualType destType = callExpr->getType();
+  QualType srcTypeArg = hlsl::GetVKBufferPointerBufferType(srcType);
+  QualType destTypeArg = hlsl::GetVKBufferPointerBufferType(destType);
+  return srcTypeArg == destTypeArg
+             ? ptr
+             : spvBuilder.createUnaryOp(spv::Op::OpBitcast, destType, ptr,
+                                        callExpr->getExprLoc(),
+                                        callExpr->getSourceRange());
+}
+
+SpirvInstruction *SpirvEmitter::processIntrinsicGetBufferContents(
+    const CXXMemberCallExpr *callExpr) {
+  LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions,
+                                    spvBuilder);
+  Expr *obj = callExpr->getImplicitObjectArgument();
+  SpirvInstruction *bufferPointer = doExpr(obj);
+  if (!bufferPointer)
+    return nullptr;
+  if (bufferPointer->isRValue()) {
+    bufferPointer->setRValue(false);
+    bufferPointer->setStorageClass(spv::StorageClass::PhysicalStorageBuffer);
+    return bufferPointer;
+  }
+
+  unsigned align = hlsl::GetVKBufferPointerAlignment(obj->getType());
+  lowerTypeVisitor.visitInstruction(bufferPointer);
+
+  const SpirvPointerType *bufferPointerType =
+      dyn_cast<SpirvPointerType>(bufferPointer->getResultType());
+  SpirvLoad *retVal =
+      spvBuilder.createLoad(bufferPointerType->getPointeeType(), bufferPointer,
+                            callExpr->getLocStart());
+  if (!align) {
+    QualType bufferType = hlsl::GetVKBufferPointerBufferType(obj->getType());
+    AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions);
+    uint32_t stride;
+    std::tie(align, std::ignore) = alignmentCalc.getAlignmentAndSize(
+        bufferType, retVal->getLayoutRule(), llvm::None, &stride);
+  }
+  retVal->setAlignment(align);
+  retVal->setRValue(false);
+  return retVal;
+}
+
 SpirvInstruction *
 SpirvEmitter::processIntrinsicMemoryBarrier(const CallExpr *callExpr,
                                             bool isDevice, bool groupSync,
@@ -15044,6 +15232,10 @@ void SpirvEmitter::addDerivativeGroupExecutionMode() {
   // to 2D quad rules. Using derivative operations in any numthreads
   // configuration not matching either of these is invalid and will produce an
   // error.
+  static_assert(spv::ExecutionMode::DerivativeGroupQuadsNV ==
+                spv::ExecutionMode::DerivativeGroupQuadsKHR);
+  static_assert(spv::ExecutionMode::DerivativeGroupLinearNV ==
+                spv::ExecutionMode::DerivativeGroupLinearKHR);
   spv::ExecutionMode em = spv::ExecutionMode::DerivativeGroupQuadsNV;
   if (numThreads[0] % 4 == 0 && numThreads[1] == 1 && numThreads[2] == 1) {
     em = spv::ExecutionMode::DerivativeGroupLinearNV;
diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h
index eca038527f..79d2c43c35 100644
--- a/tools/clang/lib/SPIRV/SpirvEmitter.h
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.h
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 //  This file defines a SPIR-V emitter class that takes in HLSL AST and emits
@@ -491,6 +495,15 @@ class SpirvEmitter : public ASTConsumer {
   /// Processes the 'lit' intrinsic function.
   SpirvInstruction *processIntrinsicLit(const CallExpr *);
 
+  /// Processes the 'vk::static_pointer_cast' and 'vk_reinterpret_pointer_cast'
+  /// intrinsic functions.
+  SpirvInstruction *processIntrinsicPointerCast(const CallExpr *,
+                                                bool isStatic);
+
+  /// Processes the vk::BufferPointer intrinsic function 'Get'.
+  SpirvInstruction *
+  processIntrinsicGetBufferContents(const CXXMemberCallExpr *);
+
   /// Processes the 'GroupMemoryBarrier', 'GroupMemoryBarrierWithGroupSync',
   /// 'DeviceMemoryBarrier', 'DeviceMemoryBarrierWithGroupSync',
   /// 'AllMemoryBarrier', and 'AllMemoryBarrierWithGroupSync' intrinsic
@@ -657,6 +670,10 @@ class SpirvEmitter : public ASTConsumer {
   SpirvInstruction *processWaveQuadWideShuffle(const CallExpr *,
                                                hlsl::IntrinsicOp op);
 
+  /// Processes SM6.7 quad any/all.
+  SpirvInstruction *processWaveQuadAnyAll(const CallExpr *,
+                                          hlsl::IntrinsicOp op);
+
   /// Generates the Spir-V instructions needed to implement the given call to
   /// WaveActiveAllEqual. Returns a pointer to the instruction that produces the
   /// final result.
diff --git a/tools/clang/lib/SPIRV/SpirvInstruction.cpp b/tools/clang/lib/SPIRV/SpirvInstruction.cpp
index 21aada9e82..f41de03adc 100644
--- a/tools/clang/lib/SPIRV/SpirvInstruction.cpp
+++ b/tools/clang/lib/SPIRV/SpirvInstruction.cpp
@@ -4,6 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
+//
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 //  This file implements the in-memory representation of SPIR-V instructions.
@@ -57,6 +61,8 @@ DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantInteger)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantFloat)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantComposite)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantNull)
+DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertPtrToU)
+DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertUToPtr)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvUndef)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvCompositeConstruct)
 DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvCompositeExtract)
@@ -620,6 +626,28 @@ bool SpirvConstantNull::operator==(const SpirvConstantNull &that) const {
          astResultType == that.astResultType;
 }
 
+SpirvConvertPtrToU::SpirvConvertPtrToU(SpirvInstruction *ptr, QualType type,
+                                       SourceLocation loc, SourceRange range)
+    : SpirvInstruction(IK_ConvertPtrToU, spv::Op::OpConvertPtrToU, type, loc,
+                       range),
+      ptr(ptr) {}
+
+bool SpirvConvertPtrToU::operator==(const SpirvConvertPtrToU &that) const {
+  return opcode == that.opcode && resultType == that.resultType &&
+         astResultType == that.astResultType && ptr == that.ptr;
+}
+
+SpirvConvertUToPtr::SpirvConvertUToPtr(SpirvInstruction *val, QualType type,
+                                       SourceLocation loc, SourceRange range)
+    : SpirvInstruction(IK_ConvertUToPtr, spv::Op::OpConvertUToPtr, type, loc,
+                       range),
+      val(val) {}
+
+bool SpirvConvertUToPtr::operator==(const SpirvConvertUToPtr &that) const {
+  return opcode == that.opcode && resultType == that.resultType &&
+         astResultType == that.astResultType && val == that.val;
+}
+
 SpirvUndef::SpirvUndef(QualType type)
     : SpirvInstruction(IK_Undef, spv::Op::OpUndef, type,
                        /*SourceLocation*/ {}) {}
@@ -677,7 +705,7 @@ SpirvFunctionCall::SpirvFunctionCall(QualType resultType, SourceLocation loc,
       function(fn), args(argsVec.begin(), argsVec.end()) {}
 
 SpirvGroupNonUniformOp::SpirvGroupNonUniformOp(
-    spv::Op op, QualType resultType, spv::Scope scope,
+    spv::Op op, QualType resultType, llvm::Optional<spv::Scope> scope,
     llvm::ArrayRef<SpirvInstruction *> operandsVec, SourceLocation loc,
     llvm::Optional<spv::GroupOperation> group)
     : SpirvInstruction(IK_GroupNonUniformOp, op, resultType, loc),
@@ -709,6 +737,8 @@ SpirvGroupNonUniformOp::SpirvGroupNonUniformOp(
   case spv::Op::OpGroupNonUniformLogicalAnd:
   case spv::Op::OpGroupNonUniformLogicalOr:
   case spv::Op::OpGroupNonUniformLogicalXor:
+  case spv::Op::OpGroupNonUniformQuadAnyKHR:
+  case spv::Op::OpGroupNonUniformQuadAllKHR:
     assert(operandsVec.size() == 1);
     break;
 
@@ -740,6 +770,11 @@ SpirvGroupNonUniformOp::SpirvGroupNonUniformOp(
     assert(false && "Unexpected Group non-uniform opcode");
     break;
   }
+
+  if (op != spv::Op::OpGroupNonUniformQuadAnyKHR &&
+      op != spv::Op::OpGroupNonUniformQuadAllKHR) {
+    assert(scope.hasValue());
+  }
 }
 
 SpirvImageOp::SpirvImageOp(
diff --git a/tools/clang/lib/Sema/SemaCast.cpp b/tools/clang/lib/Sema/SemaCast.cpp
index 10668dc388..f5a864e2b6 100644
--- a/tools/clang/lib/Sema/SemaCast.cpp
+++ b/tools/clang/lib/Sema/SemaCast.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 //
 //  This file implements semantic analysis for cast expressions, including
@@ -1543,6 +1546,20 @@ TryStaticImplicitCast(Sema &Self, ExprResult &SrcExpr, QualType DestType,
   
   if (InitSeq.isConstructorInitialization())
     Kind = CK_ConstructorConversion;
+#ifdef ENABLE_SPIRV_CODEGEN
+  // Special cases for vk::BufferPointer.
+  else if (hlsl::IsVKBufferPointerType(SrcExpr.get()->getType()) &&
+           DestType->isIntegerType() && CCK == Sema::CCK_CStyleCast) {
+    Kind = CK_VK_BufferPointerToIntegral;
+    SrcExpr = Result;
+    return TC_Success;
+  } else if (hlsl::IsVKBufferPointerType(DestType) &&
+             SrcExpr.get()->getType()->isIntegerType()) {
+    Kind = CK_VK_IntegralToBufferPointer;
+    SrcExpr = Result;
+    return TC_Success;
+  }
+#endif
   else
     Kind = CK_NoOp;
   
diff --git a/tools/clang/lib/Sema/SemaChecking.cpp b/tools/clang/lib/Sema/SemaChecking.cpp
index 2fde458499..9e64732336 100644
--- a/tools/clang/lib/Sema/SemaChecking.cpp
+++ b/tools/clang/lib/Sema/SemaChecking.cpp
@@ -6772,8 +6772,8 @@ static void AnalyzeAssignment(Sema &S, BinaryOperator *E) {
   // Just recurse on the LHS.
   AnalyzeImplicitConversions(S, E->getLHS(), E->getOperatorLoc());
 
-  S.DiagnoseGloballyCoherentMismatch(E->getRHS(), E->getLHS()->getType(),
-                                     E->getOperatorLoc());
+  S.DiagnoseCoherenceMismatch(E->getRHS(), E->getLHS()->getType(),
+                              E->getOperatorLoc());
 
   // We want to recurse on the RHS as normal unless we're assigning to
   // a bitfield.
@@ -6887,7 +6887,7 @@ void CheckImplicitArgumentConversions(Sema &S, CallExpr *TheCall,
          ++ArgIdx, ++ParmIdx) {
       ParmVarDecl *PD = FD->getParamDecl(ParmIdx);
       Expr *CurrA = TheCall->getArg(ArgIdx);
-      S.DiagnoseGloballyCoherentMismatch(CurrA, PD->getType(), CC);
+      S.DiagnoseCoherenceMismatch(CurrA, PD->getType(), CC);
     }
   }
   // HLSL CHange End
diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp
index 6d838fb203..36ab55ea10 100644
--- a/tools/clang/lib/Sema/SemaDXR.cpp
+++ b/tools/clang/lib/Sema/SemaDXR.cpp
@@ -810,6 +810,13 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload,
     return;
   }
 
+  if (ContainsLongVector(Payload->getType())) {
+    const unsigned PayloadParametersIdx = 10;
+    S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector)
+        << PayloadParametersIdx;
+    return;
+  }
+
   CollectNonAccessableFields(PayloadType, CallerStage, {}, {},
                              NonWriteableFields, NonReadableFields);
 
diff --git a/tools/clang/lib/Sema/SemaDecl.cpp b/tools/clang/lib/Sema/SemaDecl.cpp
index 06bdeb491a..e09bf4623c 100644
--- a/tools/clang/lib/Sema/SemaDecl.cpp
+++ b/tools/clang/lib/Sema/SemaDecl.cpp
@@ -9167,9 +9167,10 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
 
   // HLSL Change begin
   // When initializing an HLSL resource type we should diagnose mismatches in
-  // globally coherent annotations _unless_ the source is a dynamic resource
-  // placeholder type where we safely infer the globallycoherent annotaiton.
-  DiagnoseGloballyCoherentMismatch(Init, DclT, Init->getExprLoc());
+  // globally and reorder coherent annotations _unless_ the source is a dynamic
+  // resource placeholder type where we safely infer the coherence
+  // annotations.
+  DiagnoseCoherenceMismatch(Init, DclT, Init->getExprLoc());
   // HLSL Change end
   
   // Expressions default to 'id' when we're in a debugger
diff --git a/tools/clang/lib/Sema/SemaDeclAttr.cpp b/tools/clang/lib/Sema/SemaDeclAttr.cpp
index 723900cd07..085874a0ed 100644
--- a/tools/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/tools/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5105,6 +5105,17 @@ void Sema::ProcessDeclAttributeList(Scope *S, Decl *D,
   for (const AttributeList* l = AttrList; l; l = l->getNext())
     ProcessDeclAttribute(*this, S, D, *l, IncludeCXX11Attributes);
 
+  // HLSL Change Starts - Warn of redundant reorder / globally coherent
+  // attributes
+  if (D->hasAttr<HLSLGloballyCoherentAttr>() &&
+      D->hasAttr<HLSLReorderCoherentAttr>()) {
+    Diag(AttrList->getLoc(), diag::warn_hlsl_gc_implies_rc_attribute)
+        << cast<NamedDecl>(D);
+    D->dropAttr<HLSLReorderCoherentAttr>();
+    return;
+  }
+  // HLSL Change Ends
+
   // FIXME: We should be able to handle these cases in TableGen.
   // GCC accepts
   // static int a9 __attribute__((weakref));
diff --git a/tools/clang/lib/Sema/SemaExpr.cpp b/tools/clang/lib/Sema/SemaExpr.cpp
index c8c762a0a1..507b6a7508 100644
--- a/tools/clang/lib/Sema/SemaExpr.cpp
+++ b/tools/clang/lib/Sema/SemaExpr.cpp
@@ -2787,13 +2787,18 @@ bool Sema::UseArgumentDependentLookup(const CXXScopeSpec &SS,
   // Never if a scope specifier was provided.
   if (SS.isSet()) {
     // HLSL Change begins
-    // We want to be able to have intrinsics inside the "vk" namespace.
+    // We want to be able to have intrinsics inside the "vk" and "dx"
+    // namespaces.
     const bool isVkNamespace =
         SS.getScopeRep() && SS.getScopeRep()->getAsNamespace() &&
         SS.getScopeRep()->getAsNamespace()->getName() == "vk";
 
-    if (!isVkNamespace)
-    // HLSL Change ends
+    const bool isDxNamespace =
+        SS.getScopeRep() && SS.getScopeRep()->getAsNamespace() &&
+        SS.getScopeRep()->getAsNamespace()->getName() == "dx";
+
+    if (!isVkNamespace && !isDxNamespace)
+      // HLSL Change ends
       return false;
   }
 
diff --git a/tools/clang/lib/Sema/SemaExprCXX.cpp b/tools/clang/lib/Sema/SemaExprCXX.cpp
index f46bb0ad9f..5113c56205 100644
--- a/tools/clang/lib/Sema/SemaExprCXX.cpp
+++ b/tools/clang/lib/Sema/SemaExprCXX.cpp
@@ -5,6 +5,9 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 //===----------------------------------------------------------------------===//
 ///
 /// \file
@@ -1052,6 +1055,56 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
   // corresponding cast expression.
   if (Exprs.size() == 1 && !ListInitialization) {
     Expr *Arg = Exprs[0];
+#ifdef ENABLE_SPIRV_CODEGEN
+    if (hlsl::IsVKBufferPointerType(Ty) && Arg->getType()->isIntegerType()) {
+      typedef DeclContext::specific_decl_iterator<FunctionTemplateDecl> ft_iter;
+      auto *recordDecl = Ty->getAsCXXRecordDecl();
+      auto *specDecl = cast<ClassTemplateSpecializationDecl>(recordDecl);
+      auto *templatedDecl =
+          specDecl->getSpecializedTemplate()->getTemplatedDecl();
+      auto functionTemplateDecls =
+          llvm::iterator_range<ft_iter>(ft_iter(templatedDecl->decls_begin()),
+                                        ft_iter(templatedDecl->decls_end()));
+      for (auto *ftd : functionTemplateDecls) {
+        auto *fd = ftd->getTemplatedDecl();
+        if (fd->getNumParams() != 1 ||
+            !fd->getParamDecl(0)->getType()->isIntegerType())
+          continue;
+
+        void *insertPos;
+        auto templateArgs = ftd->getInjectedTemplateArgs();
+        auto *functionDecl = ftd->findSpecialization(templateArgs, insertPos);
+        if (!functionDecl) {
+          DeclarationNameInfo DInfo(ftd->getDeclName(),
+                                    recordDecl->getLocation());
+          auto *templateArgList = TemplateArgumentList::CreateCopy(
+              Context, templateArgs.data(), templateArgs.size());
+          functionDecl = CXXConstructorDecl::Create(
+              Context, recordDecl, Arg->getLocStart(), DInfo, Ty, TInfo, false,
+              false, false, false);
+          functionDecl->setFunctionTemplateSpecialization(ftd, templateArgList,
+                                                          insertPos);
+        } else if (functionDecl->getDeclKind() != Decl::Kind::CXXConstructor) {
+          continue;
+        }
+
+        CanQualType argType = Arg->getType()->getCanonicalTypeUnqualified();
+        if (!Arg->isRValue()) {
+          Arg = ImpCastExprToType(Arg, argType, CK_LValueToRValue).get();
+        }
+        if (argType != Context.UnsignedLongLongTy) {
+          Arg = ImpCastExprToType(Arg, Context.UnsignedLongLongTy,
+                                  CK_IntegralCast)
+                    .get();
+        }
+        return CXXConstructExpr::Create(
+            Context, Ty, TyBeginLoc, cast<CXXConstructorDecl>(functionDecl),
+            false, {Arg}, false, false, false, false,
+            CXXConstructExpr::ConstructionKind::CK_Complete,
+            SourceRange(LParenLoc, RParenLoc));
+      }
+    }
+#endif
     return BuildCXXFunctionalCastExpr(TInfo, LParenLoc, Arg, RParenLoc);
   }
 
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index fb3937cfd5..418425a468 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -6,6 +6,9 @@
 // This file is distributed under the University of Illinois Open Source     //
 // License. See LICENSE.TXT for details.                                     //
 //                                                                           //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.              //
+// All rights reserved.                                                      //
+//                                                                           //
 //  This file implements the semantic support for HLSL.                      //
 //                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
@@ -14,6 +17,7 @@
 #include "VkConstantsTables.h"
 #include "dxc/DXIL/DxilFunctionProps.h"
 #include "dxc/DXIL/DxilShaderModel.h"
+#include "dxc/DXIL/DxilUtil.h"
 #include "dxc/HLSL/HLOperations.h"
 #include "dxc/HlslIntrinsicOp.h"
 #include "dxc/Support/Global.h"
@@ -31,6 +35,8 @@
 #include "clang/AST/HlslTypes.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/Specifiers.h"
+#include "clang/Parse/ParseDiagnostic.h"
 #include "clang/Sema/ExternalSemaSource.h"
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/Lookup.h"
@@ -40,6 +46,7 @@
 #include "clang/Sema/TemplateDeduction.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -191,6 +198,7 @@ enum ArBasicKind {
   AR_OBJECT_VK_LITERAL,
   AR_OBJECT_VK_SPV_INTRINSIC_TYPE,
   AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID,
+  AR_OBJECT_VK_BUFFER_POINTER,
 #endif // ENABLE_SPIRV_CODEGEN
   // SPIRV change ends
 
@@ -243,6 +251,9 @@ enum ArBasicKind {
   AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS,
   AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS,
 
+  // Shader Execution Reordering
+  AR_OBJECT_HIT_OBJECT,
+
   AR_BASIC_MAXIMUM_COUNT
 };
 
@@ -363,6 +374,8 @@ enum ArBasicKind {
 
 #define IS_BPROP_STREAM(_Props) (((_Props)&BPROP_STREAM) != 0)
 
+#define IS_BPROP_PATCH(_Props) (((_Props) & BPROP_PATCH) != 0)
+
 #define IS_BPROP_SAMPLER(_Props) (((_Props)&BPROP_SAMPLER) != 0)
 
 #define IS_BPROP_TEXTURE(_Props) (((_Props)&BPROP_TEXTURE) != 0)
@@ -541,6 +554,7 @@ const UINT g_uBasicKindProps[] = {
     BPROP_OBJECT,                 // AR_OBJECT_VK_LITERAL,
     BPROP_OBJECT, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE use recordType
     BPROP_OBJECT, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID use recordType
+    BPROP_OBJECT, // AR_OBJECT_VK_BUFFER_POINTER use recordType
 #endif            // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
@@ -566,9 +580,9 @@ const UINT g_uBasicKindProps[] = {
     0, // AR_OBJECT_PROCEDURAL_PRIMITIVE_HIT_GROUP,
     0, // AR_OBJECT_RAYTRACING_PIPELINE_CONFIG1,
 
-    BPROP_OBJECT, // AR_OBJECT_RAY_QUERY,
-    BPROP_OBJECT, // AR_OBJECT_HEAP_RESOURCE,
-    BPROP_OBJECT, // AR_OBJECT_HEAP_SAMPLER,
+    LICOMPTYPE_RAY_QUERY, // AR_OBJECT_RAY_QUERY,
+    BPROP_OBJECT,         // AR_OBJECT_HEAP_RESOURCE,
+    BPROP_OBJECT,         // AR_OBJECT_HEAP_SAMPLER,
 
     BPROP_OBJECT | BPROP_RWBUFFER | BPROP_TEXTURE, // AR_OBJECT_RWTEXTURE2DMS
     BPROP_OBJECT | BPROP_RWBUFFER |
@@ -591,6 +605,9 @@ const UINT g_uBasicKindProps[] = {
     BPROP_OBJECT | BPROP_RWBUFFER, // AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS,
     BPROP_OBJECT | BPROP_RWBUFFER, // AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS,
 
+    // Shader Execution Reordering
+    LICOMPTYPE_HIT_OBJECT, // AR_OBJECT_HIT_OBJECT,
+
     // AR_BASIC_MAXIMUM_COUNT
 };
 
@@ -616,6 +633,8 @@ C_ASSERT(ARRAYSIZE(g_uBasicKindProps) == AR_BASIC_MAXIMUM_COUNT);
 
 #define IS_BASIC_STREAM(_Kind) IS_BPROP_STREAM(GetBasicKindProps(_Kind))
 
+#define IS_BASIC_PATCH(_Kind) IS_BPROP_PATCH(GetBasicKindProps(_Kind))
+
 #define IS_BASIC_SAMPLER(_Kind) IS_BPROP_SAMPLER(GetBasicKindProps(_Kind))
 #define IS_BASIC_TEXTURE(_Kind) IS_BPROP_TEXTURE(GetBasicKindProps(_Kind))
 #define IS_BASIC_OBJECT(_Kind) IS_BPROP_OBJECT(GetBasicKindProps(_Kind))
@@ -1116,6 +1135,9 @@ static const ArBasicKind g_ResourceCT[] = {AR_OBJECT_HEAP_RESOURCE,
 
 static const ArBasicKind g_RayDescCT[] = {AR_OBJECT_RAY_DESC, AR_BASIC_UNKNOWN};
 
+static const ArBasicKind g_RayQueryCT[] = {AR_OBJECT_RAY_QUERY,
+                                           AR_BASIC_UNKNOWN};
+
 static const ArBasicKind g_AccelerationStructCT[] = {
     AR_OBJECT_ACCELERATION_STRUCT, AR_BASIC_UNKNOWN};
 
@@ -1214,6 +1236,15 @@ static const ArBasicKind g_AnyOutputRecordCT[] = {
     AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS, AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS,
     AR_BASIC_UNKNOWN};
 
+// Shader Execution Reordering
+static const ArBasicKind g_DxHitObjectCT[] = {AR_OBJECT_HIT_OBJECT,
+                                              AR_BASIC_UNKNOWN};
+
+#ifdef ENABLE_SPIRV_CODEGEN
+static const ArBasicKind g_VKBufferPointerCT[] = {AR_OBJECT_VK_BUFFER_POINTER,
+                                                  AR_BASIC_UNKNOWN};
+#endif
+
 // Basic kinds, indexed by a LEGAL_INTRINSIC_COMPTYPES value.
 const ArBasicKind *g_LegalIntrinsicCompTypes[] = {
     g_NullCT,               // LICOMPTYPE_VOID
@@ -1268,6 +1299,11 @@ const ArBasicKind *g_LegalIntrinsicCompTypes[] = {
     g_AnyOutputRecordCT,         // LICOMPTYPE_ANY_NODE_OUTPUT_RECORD
     g_GroupNodeOutputRecordsCT,  // LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS
     g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS
+    g_DxHitObjectCT,             // LICOMPTYPE_HIT_OBJECT
+    g_RayQueryCT,                // LICOMPTYPE_RAY_QUERY
+#ifdef ENABLE_SPIRV_CODEGEN
+    g_VKBufferPointerCT, // LICOMPTYPE_VK_BUFFER_POINTER
+#endif
 };
 static_assert(
     ARRAYSIZE(g_LegalIntrinsicCompTypes) == LICOMPTYPE_COUNT,
@@ -1326,6 +1362,7 @@ static const ArBasicKind g_ArBasicKindsAsTypes[] = {
     AR_OBJECT_VK_SPIRV_TYPE, AR_OBJECT_VK_SPIRV_OPAQUE_TYPE,
     AR_OBJECT_VK_INTEGRAL_CONSTANT, AR_OBJECT_VK_LITERAL,
     AR_OBJECT_VK_SPV_INTRINSIC_TYPE, AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID,
+    AR_OBJECT_VK_BUFFER_POINTER,
 #endif // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
@@ -1356,7 +1393,10 @@ static const ArBasicKind g_ArBasicKindsAsTypes[] = {
     AR_OBJECT_NODE_OUTPUT, AR_OBJECT_EMPTY_NODE_OUTPUT,
     AR_OBJECT_NODE_OUTPUT_ARRAY, AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY,
 
-    AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS};
+    AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS,
+
+    // Shader Execution Reordering
+    AR_OBJECT_HIT_OBJECT};
 
 // Count of template arguments for basic kind of objects that look like
 // templates (one or more type arguments).
@@ -1429,6 +1469,7 @@ static const uint8_t g_ArBasicKindsTemplateCount[] = {
     1, // AR_OBJECT_VK_LITERAL,
     1, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE
     1, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID
+    2, // AR_OBJECT_VK_BUFFER_POINTER
 #endif // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
@@ -1472,6 +1513,9 @@ static const uint8_t g_ArBasicKindsTemplateCount[] = {
 
     1, // AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS,
     1, // AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS
+
+    // Shader Execution Reordering
+    0, // AR_OBJECT_HIT_OBJECT,
 };
 
 C_ASSERT(_countof(g_ArBasicKindsAsTypes) ==
@@ -1574,6 +1618,7 @@ static const SubscriptOperatorRecord g_ArBasicKindsSubscripts[] = {
     {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_LITERAL,
     {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_SPV_INTRINSIC_TYPE
     {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID
+    {0, MipsFalse, SampleFalse}, // AR_OBJECT_VK_BUFFER_POINTER
 #endif                           // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
@@ -1618,76 +1663,177 @@ static const SubscriptOperatorRecord g_ArBasicKindsSubscripts[] = {
 
     {1, MipsFalse, SampleFalse}, // AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS
     {1, MipsFalse, SampleFalse}, // AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS
+
+    // Shader Execution Reordering
+    {0, MipsFalse, SampleFalse}, // AR_OBJECT_HIT_OBJECT,
 };
 
 C_ASSERT(_countof(g_ArBasicKindsAsTypes) == _countof(g_ArBasicKindsSubscripts));
 
 // Type names for ArBasicKind values.
 static const char *g_ArBasicTypeNames[] = {
-    "bool", "float", "half", "half", "float", "double", "int", "sbyte", "byte",
-    "short", "ushort", "int", "uint", "long", "ulong", "min10float",
-    "min16float", "min12int", "min16int", "min16uint", "int8_t4_packed",
-    "uint8_t4_packed", "enum",
-
-    "<count>", "<none>", "<unknown>", "<nocast>", "<dependent>", "<pointer>",
+    "bool",
+    "float",
+    "half",
+    "half",
+    "float",
+    "double",
+    "int",
+    "sbyte",
+    "byte",
+    "short",
+    "ushort",
+    "int",
+    "uint",
+    "long",
+    "ulong",
+    "min10float",
+    "min16float",
+    "min12int",
+    "min16int",
+    "min16uint",
+    "int8_t4_packed",
+    "uint8_t4_packed",
+    "enum",
+
+    "<count>",
+    "<none>",
+    "<unknown>",
+    "<nocast>",
+    "<dependent>",
+    "<pointer>",
     "enum class",
 
-    "null", "literal string", "string",
+    "null",
+    "literal string",
+    "string",
     // "texture",
-    "Texture1D", "Texture1DArray", "Texture2D", "Texture2DArray", "Texture3D",
-    "TextureCube", "TextureCubeArray", "Texture2DMS", "Texture2DMSArray",
-    "SamplerState", "sampler1D", "sampler2D", "sampler3D", "samplerCUBE",
-    "SamplerComparisonState", "Buffer", "RenderTargetView", "DepthStencilView",
-    "ComputeShader", "DomainShader", "GeometryShader", "HullShader",
-    "PixelShader", "VertexShader", "pixelfragment", "vertexfragment",
-    "StateBlock", "Rasterizer", "DepthStencil", "Blend", "PointStream",
-    "LineStream", "TriangleStream", "InputPatch", "OutputPatch", "RWTexture1D",
-    "RWTexture1DArray", "RWTexture2D", "RWTexture2DArray", "RWTexture3D",
-    "RWBuffer", "ByteAddressBuffer", "RWByteAddressBuffer", "StructuredBuffer",
-    "RWStructuredBuffer", "RWStructuredBuffer(Incrementable)",
-    "RWStructuredBuffer(Decrementable)", "AppendStructuredBuffer",
+    "Texture1D",
+    "Texture1DArray",
+    "Texture2D",
+    "Texture2DArray",
+    "Texture3D",
+    "TextureCube",
+    "TextureCubeArray",
+    "Texture2DMS",
+    "Texture2DMSArray",
+    "SamplerState",
+    "sampler1D",
+    "sampler2D",
+    "sampler3D",
+    "samplerCUBE",
+    "SamplerComparisonState",
+    "Buffer",
+    "RenderTargetView",
+    "DepthStencilView",
+    "ComputeShader",
+    "DomainShader",
+    "GeometryShader",
+    "HullShader",
+    "PixelShader",
+    "VertexShader",
+    "pixelfragment",
+    "vertexfragment",
+    "StateBlock",
+    "Rasterizer",
+    "DepthStencil",
+    "Blend",
+    "PointStream",
+    "LineStream",
+    "TriangleStream",
+    "InputPatch",
+    "OutputPatch",
+    "RWTexture1D",
+    "RWTexture1DArray",
+    "RWTexture2D",
+    "RWTexture2DArray",
+    "RWTexture3D",
+    "RWBuffer",
+    "ByteAddressBuffer",
+    "RWByteAddressBuffer",
+    "StructuredBuffer",
+    "RWStructuredBuffer",
+    "RWStructuredBuffer(Incrementable)",
+    "RWStructuredBuffer(Decrementable)",
+    "AppendStructuredBuffer",
     "ConsumeStructuredBuffer",
 
-    "ConstantBuffer", "TextureBuffer",
+    "ConstantBuffer",
+    "TextureBuffer",
 
-    "RasterizerOrderedBuffer", "RasterizerOrderedByteAddressBuffer",
-    "RasterizerOrderedStructuredBuffer", "RasterizerOrderedTexture1D",
-    "RasterizerOrderedTexture1DArray", "RasterizerOrderedTexture2D",
-    "RasterizerOrderedTexture2DArray", "RasterizerOrderedTexture3D",
+    "RasterizerOrderedBuffer",
+    "RasterizerOrderedByteAddressBuffer",
+    "RasterizerOrderedStructuredBuffer",
+    "RasterizerOrderedTexture1D",
+    "RasterizerOrderedTexture1DArray",
+    "RasterizerOrderedTexture2D",
+    "RasterizerOrderedTexture2DArray",
+    "RasterizerOrderedTexture3D",
 
-    "FeedbackTexture2D", "FeedbackTexture2DArray",
+    "FeedbackTexture2D",
+    "FeedbackTexture2DArray",
 
 // SPIRV change starts
 #ifdef ENABLE_SPIRV_CODEGEN
-    "SubpassInput", "SubpassInputMS", "SpirvType", "SpirvOpaqueType",
-    "integral_constant", "Literal", "ext_type", "ext_result_id",
+    "SubpassInput",
+    "SubpassInputMS",
+    "SpirvType",
+    "SpirvOpaqueType",
+    "integral_constant",
+    "Literal",
+    "ext_type",
+    "ext_result_id",
+    "BufferPointer",
 #endif // ENABLE_SPIRV_CODEGEN
     // SPIRV change ends
 
     "<internal inner type object>",
 
-    "deprecated effect object", "wave_t", "RayDesc",
-    "RaytracingAccelerationStructure", "user defined type",
+    "deprecated effect object",
+    "wave_t",
+    "RayDesc",
+    "RaytracingAccelerationStructure",
+    "user defined type",
     "BuiltInTriangleIntersectionAttributes",
 
     // subobjects
-    "StateObjectConfig", "GlobalRootSignature", "LocalRootSignature",
-    "SubobjectToExportsAssociation", "RaytracingShaderConfig",
-    "RaytracingPipelineConfig", "TriangleHitGroup",
-    "ProceduralPrimitiveHitGroup", "RaytracingPipelineConfig1",
-
-    "RayQuery", "HEAP_Resource", "HEAP_Sampler",
-
-    "RWTexture2DMS", "RWTexture2DMSArray",
+    "StateObjectConfig",
+    "GlobalRootSignature",
+    "LocalRootSignature",
+    "SubobjectToExportsAssociation",
+    "RaytracingShaderConfig",
+    "RaytracingPipelineConfig",
+    "TriangleHitGroup",
+    "ProceduralPrimitiveHitGroup",
+    "RaytracingPipelineConfig1",
+
+    "RayQuery",
+    "HEAP_Resource",
+    "HEAP_Sampler",
+
+    "RWTexture2DMS",
+    "RWTexture2DMSArray",
 
     // Workgraphs
-    "EmptyNodeInput", "DispatchNodeInputRecord", "RWDispatchNodeInputRecord",
-    "GroupNodeInputRecords", "RWGroupNodeInputRecords", "ThreadNodeInputRecord",
+    "EmptyNodeInput",
+    "DispatchNodeInputRecord",
+    "RWDispatchNodeInputRecord",
+    "GroupNodeInputRecords",
+    "RWGroupNodeInputRecords",
+    "ThreadNodeInputRecord",
     "RWThreadNodeInputRecord",
 
-    "NodeOutput", "EmptyNodeOutput", "NodeOutputArray", "EmptyNodeOutputArray",
+    "NodeOutput",
+    "EmptyNodeOutput",
+    "NodeOutputArray",
+    "EmptyNodeOutputArray",
 
-    "ThreadNodeOutputRecords", "GroupNodeOutputRecords"};
+    "ThreadNodeOutputRecords",
+    "GroupNodeOutputRecords",
+
+    // Shader Execution Reordering
+    "HitObject",
+};
 
 C_ASSERT(_countof(g_ArBasicTypeNames) == AR_BASIC_MAXIMUM_COUNT);
 
@@ -1727,6 +1873,10 @@ static const char *g_DeprecatedEffectObjectNames[] = {
     "RenderTargetView",  // 16
 };
 
+static bool IsStaticMember(const HLSL_INTRINSIC *fn) {
+  return fn->Flags & INTRIN_FLAG_STATIC_MEMBER;
+}
+
 static bool IsVariadicIntrinsicFunction(const HLSL_INTRINSIC *fn) {
   return fn->pArgs[fn->uNumArgs - 1].uTemplateId == INTRIN_TEMPLATE_VARARGS;
 }
@@ -1806,12 +1956,19 @@ static void AddHLSLIntrinsicAttr(FunctionDecl *FD, ASTContext &context,
   }
   FD->addAttr(
       HLSLIntrinsicAttr::CreateImplicit(context, tableName, lowering, opcode));
-  if (pIntrinsic->bReadNone)
+  if (pIntrinsic->Flags & INTRIN_FLAG_READ_NONE)
     FD->addAttr(ConstAttr::CreateImplicit(context));
-  if (pIntrinsic->bReadOnly)
+  if (pIntrinsic->Flags & INTRIN_FLAG_READ_ONLY)
     FD->addAttr(PureAttr::CreateImplicit(context));
-  if (pIntrinsic->bIsWave)
+  if (pIntrinsic->Flags & INTRIN_FLAG_IS_WAVE)
     FD->addAttr(HLSLWaveSensitiveAttr::CreateImplicit(context));
+  if (pIntrinsic->MinShaderModel) {
+    unsigned Major = pIntrinsic->MinShaderModel >> 4;
+    unsigned Minor = pIntrinsic->MinShaderModel & 0xF;
+    FD->addAttr(AvailabilityAttr::CreateImplicit(
+        context, &context.Idents.get(""), clang::VersionTuple(Major, Minor),
+        clang::VersionTuple(), clang::VersionTuple(), false, ""));
+  }
 }
 
 static FunctionDecl *
@@ -1857,12 +2014,14 @@ AddHLSLIntrinsicFunction(ASTContext &context, NamespaceDecl *NS,
   const QualType fnReturnType = functionArgQualTypes[0];
   std::vector<QualType> fnArgTypes(functionArgQualTypes.begin() + 1,
                                    functionArgQualTypes.end());
+
+  StorageClass SC = IsStaticMember(pIntrinsic) ? SC_Static : SC_Extern;
   QualType functionType =
       context.getFunctionType(fnReturnType, fnArgTypes, protoInfo, paramMods);
   FunctionDecl *functionDecl = FunctionDecl::Create(
       context, currentDeclContext, NoLoc,
-      DeclarationNameInfo(functionName, NoLoc), functionType, nullptr,
-      StorageClass::SC_Extern, InlineSpecifiedFalse, HasWrittenPrototypeTrue);
+      DeclarationNameInfo(functionName, NoLoc), functionType, nullptr, SC,
+      InlineSpecifiedFalse, HasWrittenPrototypeTrue);
   currentDeclContext->addDecl(functionDecl);
 
   functionDecl->setLexicalDeclContext(currentDeclContext);
@@ -2271,6 +2430,10 @@ static void GetIntrinsicMethods(ArBasicKind kind,
     *intrinsics = g_RayQueryMethods;
     *intrinsicCount = _countof(g_RayQueryMethods);
     break;
+  case AR_OBJECT_HIT_OBJECT:
+    *intrinsics = g_DxHitObjectMethods;
+    *intrinsicCount = _countof(g_DxHitObjectMethods);
+    break;
   case AR_OBJECT_RWTEXTURE2DMS:
     *intrinsics = g_RWTexture2DMSMethods;
     *intrinsicCount = _countof(g_RWTexture2DMSMethods);
@@ -2643,13 +2806,17 @@ AddBuiltInTriangleIntersectionAttributes(ASTContext &context,
 //
 // Subobjects
 
-static CXXRecordDecl *StartSubobjectDecl(ASTContext &context,
-                                         const char *name) {
+static CXXRecordDecl *
+StartSubobjectDecl(ASTContext &context, const char *name,
+                   DXIL::SubobjectKind Kind,
+                   DXIL::HitGroupType HGT = DXIL::HitGroupType::LastEntry) {
   IdentifierInfo &id =
       context.Idents.get(StringRef(name), tok::TokenKind::identifier);
   CXXRecordDecl *decl = CXXRecordDecl::Create(
       context, TagTypeKind::TTK_Struct, context.getTranslationUnitDecl(), NoLoc,
       NoLoc, &id, nullptr, DelayTypeCreationTrue);
+  decl->addAttr(HLSLSubObjectAttr::CreateImplicit(
+      context, static_cast<unsigned>(Kind), static_cast<unsigned>(HGT)));
   decl->addAttr(FinalAttr::CreateImplicit(context, FinalAttr::Keyword_final));
   decl->startDefinition();
   return decl;
@@ -2666,7 +2833,8 @@ void FinishSubobjectDecl(ASTContext &context, CXXRecordDecl *decl) {
 //   uint32_t Flags;
 // };
 static CXXRecordDecl *CreateSubobjectStateObjectConfig(ASTContext &context) {
-  CXXRecordDecl *decl = StartSubobjectDecl(context, "StateObjectConfig");
+  CXXRecordDecl *decl = StartSubobjectDecl(
+      context, "StateObjectConfig", DXIL::SubobjectKind::StateObjectConfig);
   CreateSimpleField(context, decl, "Flags", context.UnsignedIntTy,
                     AccessSpecifier::AS_private);
   FinishSubobjectDecl(context, decl);
@@ -2680,7 +2848,10 @@ static CXXRecordDecl *CreateSubobjectStateObjectConfig(ASTContext &context) {
 static CXXRecordDecl *CreateSubobjectRootSignature(ASTContext &context,
                                                    bool global) {
   CXXRecordDecl *decl = StartSubobjectDecl(
-      context, global ? "GlobalRootSignature" : "LocalRootSignature");
+      context, global ? "GlobalRootSignature" : "LocalRootSignature",
+      global ? DXIL::SubobjectKind::GlobalRootSignature
+             : DXIL::SubobjectKind::LocalRootSignature);
+
   CreateSimpleField(context, decl, "Data", context.HLSLStringTy,
                     AccessSpecifier::AS_private);
   FinishSubobjectDecl(context, decl);
@@ -2695,7 +2866,8 @@ static CXXRecordDecl *CreateSubobjectRootSignature(ASTContext &context,
 static CXXRecordDecl *
 CreateSubobjectSubobjectToExportsAssoc(ASTContext &context) {
   CXXRecordDecl *decl =
-      StartSubobjectDecl(context, "SubobjectToExportsAssociation");
+      StartSubobjectDecl(context, "SubobjectToExportsAssociation",
+                         DXIL::SubobjectKind::SubobjectToExportsAssociation);
   CreateSimpleField(context, decl, "Subobject", context.HLSLStringTy,
                     AccessSpecifier::AS_private);
   CreateSimpleField(context, decl, "Exports", context.HLSLStringTy,
@@ -2711,7 +2883,9 @@ CreateSubobjectSubobjectToExportsAssoc(ASTContext &context) {
 // };
 static CXXRecordDecl *
 CreateSubobjectRaytracingShaderConfig(ASTContext &context) {
-  CXXRecordDecl *decl = StartSubobjectDecl(context, "RaytracingShaderConfig");
+  CXXRecordDecl *decl =
+      StartSubobjectDecl(context, "RaytracingShaderConfig",
+                         DXIL::SubobjectKind::RaytracingShaderConfig);
   CreateSimpleField(context, decl, "MaxPayloadSizeInBytes",
                     context.UnsignedIntTy, AccessSpecifier::AS_private);
   CreateSimpleField(context, decl, "MaxAttributeSizeInBytes",
@@ -2726,7 +2900,9 @@ CreateSubobjectRaytracingShaderConfig(ASTContext &context) {
 // };
 static CXXRecordDecl *
 CreateSubobjectRaytracingPipelineConfig(ASTContext &context) {
-  CXXRecordDecl *decl = StartSubobjectDecl(context, "RaytracingPipelineConfig");
+  CXXRecordDecl *decl =
+      StartSubobjectDecl(context, "RaytracingPipelineConfig",
+                         DXIL::SubobjectKind::RaytracingPipelineConfig);
   CreateSimpleField(context, decl, "MaxTraceRecursionDepth",
                     context.UnsignedIntTy, AccessSpecifier::AS_private);
   FinishSubobjectDecl(context, decl);
@@ -2741,7 +2917,8 @@ CreateSubobjectRaytracingPipelineConfig(ASTContext &context) {
 static CXXRecordDecl *
 CreateSubobjectRaytracingPipelineConfig1(ASTContext &context) {
   CXXRecordDecl *decl =
-      StartSubobjectDecl(context, "RaytracingPipelineConfig1");
+      StartSubobjectDecl(context, "RaytracingPipelineConfig1",
+                         DXIL::SubobjectKind::RaytracingPipelineConfig1);
   CreateSimpleField(context, decl, "MaxTraceRecursionDepth",
                     context.UnsignedIntTy, AccessSpecifier::AS_private);
   CreateSimpleField(context, decl, "Flags", context.UnsignedIntTy,
@@ -2756,7 +2933,9 @@ CreateSubobjectRaytracingPipelineConfig1(ASTContext &context) {
 //   string ClosestHit;
 // };
 static CXXRecordDecl *CreateSubobjectTriangleHitGroup(ASTContext &context) {
-  CXXRecordDecl *decl = StartSubobjectDecl(context, "TriangleHitGroup");
+  CXXRecordDecl *decl = StartSubobjectDecl(context, "TriangleHitGroup",
+                                           DXIL::SubobjectKind::HitGroup,
+                                           DXIL::HitGroupType::Triangle);
   CreateSimpleField(context, decl, "AnyHit", context.HLSLStringTy,
                     AccessSpecifier::AS_private);
   CreateSimpleField(context, decl, "ClosestHit", context.HLSLStringTy,
@@ -2773,8 +2952,9 @@ static CXXRecordDecl *CreateSubobjectTriangleHitGroup(ASTContext &context) {
 // };
 static CXXRecordDecl *
 CreateSubobjectProceduralPrimitiveHitGroup(ASTContext &context) {
-  CXXRecordDecl *decl =
-      StartSubobjectDecl(context, "ProceduralPrimitiveHitGroup");
+  CXXRecordDecl *decl = StartSubobjectDecl(
+      context, "ProceduralPrimitiveHitGroup", DXIL::SubobjectKind::HitGroup,
+      DXIL::HitGroupType::ProceduralPrimitive);
   CreateSimpleField(context, decl, "AnyHit", context.HLSLStringTy,
                     AccessSpecifier::AS_private);
   CreateSimpleField(context, decl, "ClosestHit", context.HLSLStringTy,
@@ -2822,6 +3002,7 @@ class HLSLExternalSource : public ExternalSemaSource {
 
   ClassTemplateDecl *m_vkIntegralConstantTemplateDecl;
   ClassTemplateDecl *m_vkLiteralTemplateDecl;
+  ClassTemplateDecl *m_vkBufferPointerTemplateDecl;
 
   // Declarations for Work Graph Output Record types
   ClassTemplateDecl *m_GroupNodeOutputRecordsTemplateDecl;
@@ -2833,6 +3014,9 @@ class HLSLExternalSource : public ExternalSemaSource {
   // Namespace decl for Vulkan-specific intrinsic functions
   NamespaceDecl *m_vkNSDecl;
 
+  // Namespace decl for dx intrinsic functions
+  NamespaceDecl *m_dxNSDecl;
+
   // Context being processed.
   ASTContext *m_context;
 
@@ -2856,8 +3040,9 @@ class HLSLExternalSource : public ExternalSemaSource {
   TypedefDecl *m_matrixShorthandTypes[HLSLScalarTypeCount][4][4];
 
   // Vector types already built.
-  QualType m_vectorTypes[HLSLScalarTypeCount][4];
-  TypedefDecl *m_vectorTypedefs[HLSLScalarTypeCount][4];
+  QualType m_vectorTypes[HLSLScalarTypeCount][DXIL::kDefaultMaxVectorLength];
+  TypedefDecl
+      *m_vectorTypedefs[HLSLScalarTypeCount][DXIL::kDefaultMaxVectorLength];
 
   // BuiltinType for each scalar type.
   QualType m_baseTypes[HLSLScalarTypeCount];
@@ -3049,10 +3234,13 @@ class HLSLExternalSource : public ExternalSemaSource {
     IdentifierInfo *ii =
         &m_context->Idents.get(StringRef(intrinsic->pArgs[0].pName));
     DeclarationName declarationName = DeclarationName(ii);
+
+    StorageClass SC = IsStaticMember(intrinsic) ? SC_Static : SC_None;
+
     CXXMethodDecl *functionDecl = CreateObjectFunctionDeclarationWithParams(
         *m_context, recordDecl, functionResultQT,
         ArrayRef<QualType>(argsQTs, numParams),
-        ArrayRef<StringRef>(argNames, numParams), declarationName, true,
+        ArrayRef<StringRef>(argNames, numParams), declarationName, true, SC,
         templateParamNamedDeclsCount > 0);
     functionDecl->setImplicit(true);
 
@@ -3254,7 +3442,7 @@ class HLSLExternalSource : public ExternalSemaSource {
         *m_context, recordDecl, resultType, ArrayRef<QualType>(indexType),
         ArrayRef<StringRef>(StringRef("index")),
         m_context->DeclarationNames.getCXXOperatorName(OO_Subscript), true,
-        true);
+        StorageClass::SC_None, true);
     hlsl::CreateFunctionTemplateDecl(
         *m_context, recordDecl, functionDecl,
         reinterpret_cast<NamedDecl **>(&templateTypeParmDecl), 1);
@@ -3298,9 +3486,8 @@ class HLSLExternalSource : public ExternalSemaSource {
       return -1;
   }
 
-#ifdef ENABLE_SPIRV_CODEGEN
-  SmallVector<NamedDecl *, 1> CreateTemplateTypeParmDeclsForVkIntrinsicFunction(
-      const HLSL_INTRINSIC *intrinsic) {
+  SmallVector<NamedDecl *, 1> CreateTemplateTypeParmDeclsForIntrinsicFunction(
+      const HLSL_INTRINSIC *intrinsic, NamespaceDecl *nsDecl) {
     SmallVector<NamedDecl *, 1> templateTypeParmDecls;
     auto &context = m_sema->getASTContext();
     const HLSL_INTRINSIC_ARGUMENT *pArgs = intrinsic->pArgs;
@@ -3311,9 +3498,8 @@ class HLSLExternalSource : public ExternalSemaSource {
           pArgs[i].uLegalTemplates == LITEMPLATE_ANY) {
         IdentifierInfo *id = &context.Idents.get("T");
         TemplateTypeParmDecl *templateTypeParmDecl =
-            TemplateTypeParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc, 0,
-                                         0, id, TypenameTrue,
-                                         ParameterPackFalse);
+            TemplateTypeParmDecl::Create(context, nsDecl, NoLoc, NoLoc, 0, 0,
+                                         id, TypenameTrue, ParameterPackFalse);
         if (TInfo == nullptr) {
           TInfo = m_sema->getASTContext().CreateTypeSourceInfo(
               m_context->UnsignedIntTy, 0);
@@ -3322,12 +3508,31 @@ class HLSLExternalSource : public ExternalSemaSource {
         templateTypeParmDecls.push_back(templateTypeParmDecl);
         continue;
       }
+      if (pArgs[i].uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) {
+        if (TInfo == nullptr) {
+          TInfo = m_sema->getASTContext().CreateTypeSourceInfo(
+              m_context->UnsignedIntTy, 0);
+        }
+        IdentifierInfo *idT = &context.Idents.get("T");
+        IdentifierInfo *idA = &context.Idents.get("A");
+        TemplateTypeParmDecl *templateTypeParmDecl =
+            TemplateTypeParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc, 0,
+                                         0, idT, TypenameTrue,
+                                         ParameterPackFalse);
+        NonTypeTemplateParmDecl *nonTypeTemplateParmDecl =
+            NonTypeTemplateParmDecl::Create(context, m_vkNSDecl, NoLoc, NoLoc,
+                                            0, 1, idA, context.UnsignedIntTy,
+                                            ParameterPackFalse, TInfo);
+        templateTypeParmDecl->setDefaultArgument(TInfo);
+        templateTypeParmDecls.push_back(templateTypeParmDecl);
+        templateTypeParmDecls.push_back(nonTypeTemplateParmDecl);
+      }
     }
     return templateTypeParmDecls;
   }
 
   SmallVector<ParmVarDecl *, g_MaxIntrinsicParamCount>
-  CreateParmDeclsForVkIntrinsicFunction(
+  CreateParmDeclsForIntrinsicFunction(
       const HLSL_INTRINSIC *intrinsic,
       const SmallVectorImpl<QualType> &paramTypes,
       const SmallVectorImpl<ParameterModifier> &paramMods) {
@@ -3352,7 +3557,7 @@ class HLSLExternalSource : public ExternalSemaSource {
     return paramDecls;
   }
 
-  SmallVector<QualType, 2> VkIntrinsicFunctionParamTypes(
+  SmallVector<QualType, 2> getIntrinsicFunctionParamTypes(
       const HLSL_INTRINSIC *intrinsic,
       const SmallVectorImpl<NamedDecl *> &templateTypeParmDecls) {
     auto &context = m_sema->getASTContext();
@@ -3387,8 +3592,26 @@ class HLSLExternalSource : public ExternalSemaSource {
       case LICOMPTYPE_VOID:
         paramTypes.push_back(context.VoidTy);
         break;
+      case LICOMPTYPE_HIT_OBJECT:
+        paramTypes.push_back(GetBasicKindType(AR_OBJECT_HIT_OBJECT));
+        break;
+#ifdef ENABLE_SPIRV_CODEGEN
+      case LICOMPTYPE_VK_BUFFER_POINTER: {
+        const ArBasicKind *match =
+            std::find(g_ArBasicKindsAsTypes,
+                      &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)],
+                      AR_OBJECT_VK_BUFFER_POINTER);
+        DXASSERT(match !=
+                     &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)],
+                 "otherwise can't find constant in basic kinds");
+        size_t index = match - g_ArBasicKindsAsTypes;
+        paramTypes.push_back(
+            m_sema->getASTContext().getTypeDeclType(m_objectTypeDecls[index]));
+        break;
+      }
+#endif
       default:
-        DXASSERT(false, "Argument type of vk:: intrinsic function is not "
+        DXASSERT(false, "Argument type of intrinsic function is not "
                         "supported");
         break;
       }
@@ -3396,9 +3619,9 @@ class HLSLExternalSource : public ExternalSemaSource {
     return paramTypes;
   }
 
-  QualType
-  VkIntrinsicFunctionType(const SmallVectorImpl<QualType> &paramTypes,
-                          const SmallVectorImpl<ParameterModifier> &paramMods) {
+  QualType getIntrinsicFunctionType(
+      const SmallVectorImpl<QualType> &paramTypes,
+      const SmallVectorImpl<ParameterModifier> &paramMods) {
     DXASSERT(!paramTypes.empty(), "Given param type vector is empty");
 
     ArrayRef<QualType> params({});
@@ -3411,7 +3634,7 @@ class HLSLExternalSource : public ExternalSemaSource {
                                                    EmptyEPI, paramMods);
   }
 
-  void SetParmDeclsForVkIntrinsicFunction(
+  void SetParmDeclsForIntrinsicFunction(
       TypeSourceInfo *TInfo, FunctionDecl *functionDecl,
       const SmallVectorImpl<ParmVarDecl *> &paramDecls) {
     FunctionProtoTypeLoc Proto =
@@ -3426,47 +3649,39 @@ class HLSLExternalSource : public ExternalSemaSource {
     functionDecl->setParams(paramDecls);
   }
 
-  // Adds intrinsic function declarations to the "vk" namespace.
-  // It does so only if SPIR-V code generation is being done.
-  // Assumes the implicit "vk" namespace has already been created.
-  void AddVkIntrinsicFunctions() {
-    // If not doing SPIR-V CodeGen, return.
-    if (!m_sema->getLangOpts().SPIRV)
-      return;
-
-    DXASSERT(m_vkNSDecl, "caller has not created the vk namespace yet");
-
+  void AddIntrinsicFunctionsToNamespace(const HLSL_INTRINSIC *table,
+                                        uint32_t tableSize,
+                                        NamespaceDecl *nsDecl) {
     auto &context = m_sema->getASTContext();
-    for (uint32_t i = 0; i < _countof(g_VkIntrinsics); ++i) {
-      const HLSL_INTRINSIC *intrinsic = &g_VkIntrinsics[i];
+    for (uint32_t i = 0; i < tableSize; ++i) {
+      const HLSL_INTRINSIC *intrinsic = &table[i];
       const IdentifierInfo &fnII = context.Idents.get(
           intrinsic->pArgs->pName, tok::TokenKind::identifier);
       DeclarationName functionName(&fnII);
 
       // Create TemplateTypeParmDecl.
       SmallVector<NamedDecl *, 1> templateTypeParmDecls =
-          CreateTemplateTypeParmDeclsForVkIntrinsicFunction(intrinsic);
+          CreateTemplateTypeParmDeclsForIntrinsicFunction(intrinsic, nsDecl);
 
       // Get types for parameters.
       SmallVector<QualType, 2> paramTypes =
-          VkIntrinsicFunctionParamTypes(intrinsic, templateTypeParmDecls);
+          getIntrinsicFunctionParamTypes(intrinsic, templateTypeParmDecls);
       SmallVector<hlsl::ParameterModifier, g_MaxIntrinsicParamCount> paramMods;
       InitParamMods(intrinsic, paramMods);
 
       // Create FunctionDecl.
-      QualType fnType = VkIntrinsicFunctionType(paramTypes, paramMods);
+      StorageClass SC = IsStaticMember(intrinsic) ? SC_Static : SC_Extern;
+      QualType fnType = getIntrinsicFunctionType(paramTypes, paramMods);
       TypeSourceInfo *TInfo =
           m_sema->getASTContext().CreateTypeSourceInfo(fnType, 0);
       FunctionDecl *functionDecl = FunctionDecl::Create(
-          context, m_vkNSDecl, NoLoc, DeclarationNameInfo(functionName, NoLoc),
-          fnType, TInfo, StorageClass::SC_Extern, InlineSpecifiedFalse,
-          HasWrittenPrototypeTrue);
+          context, nsDecl, NoLoc, DeclarationNameInfo(functionName, NoLoc),
+          fnType, TInfo, SC, InlineSpecifiedFalse, HasWrittenPrototypeTrue);
 
       // Create and set ParmVarDecl.
       SmallVector<ParmVarDecl *, g_MaxIntrinsicParamCount> paramDecls =
-          CreateParmDeclsForVkIntrinsicFunction(intrinsic, paramTypes,
-                                                paramMods);
-      SetParmDeclsForVkIntrinsicFunction(TInfo, functionDecl, paramDecls);
+          CreateParmDeclsForIntrinsicFunction(intrinsic, paramTypes, paramMods);
+      SetParmDeclsForIntrinsicFunction(TInfo, functionDecl, paramDecls);
 
       if (!templateTypeParmDecls.empty()) {
         TemplateParameterList *templateParmList = TemplateParameterList::Create(
@@ -3474,22 +3689,52 @@ class HLSLExternalSource : public ExternalSemaSource {
             templateTypeParmDecls.size(), NoLoc);
         functionDecl->setTemplateParameterListsInfo(context, 1,
                                                     &templateParmList);
-        FunctionTemplateDecl *functionTemplate = FunctionTemplateDecl::Create(
-            context, m_vkNSDecl, NoLoc, functionName, templateParmList,
-            functionDecl);
+        FunctionTemplateDecl *functionTemplate =
+            FunctionTemplateDecl::Create(context, nsDecl, NoLoc, functionName,
+                                         templateParmList, functionDecl);
         functionDecl->setDescribedFunctionTemplate(functionTemplate);
-        m_vkNSDecl->addDecl(functionTemplate);
-        functionTemplate->setDeclContext(m_vkNSDecl);
+        nsDecl->addDecl(functionTemplate);
+        functionTemplate->setDeclContext(nsDecl);
       } else {
-        m_vkNSDecl->addDecl(functionDecl);
-        functionDecl->setLexicalDeclContext(m_vkNSDecl);
-        functionDecl->setDeclContext(m_vkNSDecl);
+        nsDecl->addDecl(functionDecl);
+        functionDecl->setLexicalDeclContext(nsDecl);
+        functionDecl->setDeclContext(nsDecl);
       }
 
       functionDecl->setImplicit(true);
     }
   }
 
+  // Adds intrinsic function declarations to the "dx" namespace.
+  // Assumes the implicit "vk" namespace has already been created.
+  void AddDxIntrinsicFunctions() {
+    DXASSERT(m_dxNSDecl, "caller has not created the dx namespace yet");
+
+    AddIntrinsicFunctionsToNamespace(g_DxIntrinsics, _countof(g_DxIntrinsics),
+                                     m_dxNSDecl);
+    // Eagerly declare HitObject methods. This is required to make lookup of
+    // 'static' HLSL member functions work without special-casing HLSL scope
+    // lookup.
+    CXXRecordDecl *HitObjectDecl =
+        GetBasicKindType(AR_OBJECT_HIT_OBJECT)->getAsCXXRecordDecl();
+    CompleteType(HitObjectDecl);
+  }
+
+#ifdef ENABLE_SPIRV_CODEGEN
+  // Adds intrinsic function declarations to the "vk" namespace.
+  // It does so only if SPIR-V code generation is being done.
+  // Assumes the implicit "vk" namespace has already been created.
+  void AddVkIntrinsicFunctions() {
+    // If not doing SPIR-V CodeGen, return.
+    if (!m_sema->getLangOpts().SPIRV)
+      return;
+
+    DXASSERT(m_vkNSDecl, "caller has not created the vk namespace yet");
+
+    AddIntrinsicFunctionsToNamespace(g_VkIntrinsics, _countof(g_VkIntrinsics),
+                                     m_vkNSDecl);
+  }
+
   // Adds implicitly defined Vulkan-specific constants to the "vk" namespace.
   // It does so only if SPIR-V code generation is being done.
   // Assumes the implicit "vk" namespace has already been created.
@@ -3540,6 +3785,20 @@ class HLSLExternalSource : public ExternalSemaSource {
       if (kind == AR_OBJECT_LEGACY_EFFECT)
         effectKindIndex = i;
 
+      InheritableAttr *Attr = nullptr;
+      if (IS_BASIC_STREAM(kind))
+        Attr = HLSLStreamOutputAttr::CreateImplicit(
+            *m_context, kind - AR_OBJECT_POINTSTREAM + 1);
+      else if (IS_BASIC_PATCH(kind))
+        Attr = HLSLTessPatchAttr::CreateImplicit(*m_context,
+                                                 kind == AR_OBJECT_INPUTPATCH);
+      else {
+        DXIL::ResourceKind ResKind = DXIL::ResourceKind::NumEntries;
+        DXIL::ResourceClass ResClass = DXIL::ResourceClass::Invalid;
+        if (GetBasicKindResourceKindAndClass(kind, ResKind, ResClass))
+          Attr = HLSLResourceAttr::CreateImplicit(*m_context, (unsigned)ResKind,
+                                                  (unsigned)ResClass);
+      }
       DXASSERT(kind < _countof(g_ArBasicTypeNames),
                "g_ArBasicTypeNames has the wrong number of entries");
       assert(kind < _countof(g_ArBasicTypeNames));
@@ -3586,11 +3845,15 @@ class HLSLExternalSource : public ExternalSemaSource {
           break;
         }
       } else if (kind == AR_OBJECT_CONSTANT_BUFFER) {
-        recordDecl = DeclareConstantBufferViewType(*m_context, /*bTBuf*/ false);
+        recordDecl = DeclareConstantBufferViewType(*m_context, Attr);
       } else if (kind == AR_OBJECT_TEXTURE_BUFFER) {
-        recordDecl = DeclareConstantBufferViewType(*m_context, /*bTBuf*/ true);
+        recordDecl = DeclareConstantBufferViewType(*m_context, Attr);
       } else if (kind == AR_OBJECT_RAY_QUERY) {
         recordDecl = DeclareRayQueryType(*m_context);
+      } else if (kind == AR_OBJECT_HIT_OBJECT) {
+        // Declare 'HitObject' in '::dx' extension namespace.
+        DXASSERT(m_dxNSDecl, "namespace ::dx must be declared in SM6.9+");
+        recordDecl = DeclareHitObjectType(*m_dxNSDecl);
       } else if (kind == AR_OBJECT_HEAP_RESOURCE) {
         recordDecl = DeclareResourceType(*m_context, /*bSampler*/ false);
         if (SM->IsSM66Plus()) {
@@ -3609,10 +3872,10 @@ class HLSLExternalSource : public ExternalSemaSource {
         }
       } else if (kind == AR_OBJECT_FEEDBACKTEXTURE2D) {
         recordDecl = DeclareUIntTemplatedTypeWithHandle(
-            *m_context, "FeedbackTexture2D", "kind");
+            *m_context, "FeedbackTexture2D", "kind", Attr);
       } else if (kind == AR_OBJECT_FEEDBACKTEXTURE2D_ARRAY) {
         recordDecl = DeclareUIntTemplatedTypeWithHandle(
-            *m_context, "FeedbackTexture2DArray", "kind");
+            *m_context, "FeedbackTexture2DArray", "kind", Attr);
       } else if (kind == AR_OBJECT_EMPTY_NODE_INPUT) {
         recordDecl = DeclareNodeOrRecordType(
             *m_context, DXIL::NodeIOKind::EmptyInput,
@@ -3725,23 +3988,21 @@ class HLSLExternalSource : public ExternalSemaSource {
         recordDecl = DeclareTemplateTypeWithHandleInDeclContext(
             *m_context, m_vkNSDecl, typeName, 1, nullptr);
         recordDecl->setImplicit(true);
+      } else if (kind == AR_OBJECT_VK_BUFFER_POINTER) {
+        if (!m_vkNSDecl)
+          continue;
+        recordDecl = DeclareVkBufferPointerType(*m_context, m_vkNSDecl);
+        recordDecl->setImplicit(true);
+        m_vkBufferPointerTemplateDecl = recordDecl->getDescribedClassTemplate();
       }
 #endif
       else if (templateArgCount == 0) {
-        recordDecl = DeclareRecordTypeWithHandle(*m_context, typeName,
-                                                 /*isCompleteType*/ false);
+        recordDecl =
+            DeclareRecordTypeWithHandle(*m_context, typeName,
+                                        /*isCompleteType*/ false, Attr);
       } else {
         DXASSERT(templateArgCount == 1 || templateArgCount == 2,
                  "otherwise a new case has been added");
-
-        InheritableAttr *Attr = nullptr;
-        DXIL::ResourceKind ResKind = DXIL::ResourceKind::NumEntries;
-        DXIL::ResourceClass ResClass = DXIL::ResourceClass::Invalid;
-        if (GetBasicKindResourceKindAndClass(kind, ResKind, ResClass)) {
-          Attr = HLSLResourceAttr::CreateImplicit(*m_context, (unsigned)ResKind,
-                                                  (unsigned)ResClass);
-        }
-
         TypeSourceInfo *typeDefault =
             TemplateHasDefaultType(kind) ? float4TypeSourceInfo : nullptr;
         recordDecl = DeclareTemplateTypeWithHandle(
@@ -3830,7 +4091,7 @@ class HLSLExternalSource : public ExternalSemaSource {
   clang::TypedefDecl *LookupVectorShorthandType(HLSLScalarType scalarType,
                                                 UINT colCount) {
     DXASSERT_NOMSG(scalarType != HLSLScalarType::HLSLScalarType_unknown &&
-                   colCount <= 4);
+                   colCount <= DXIL::kDefaultMaxVectorLength);
     TypedefDecl *qts = m_vectorTypedefs[scalarType][colCount - 1];
     if (qts == nullptr) {
       QualType type = LookupVectorType(scalarType, colCount);
@@ -3845,9 +4106,10 @@ class HLSLExternalSource : public ExternalSemaSource {
   HLSLExternalSource()
       : m_matrixTemplateDecl(nullptr), m_vectorTemplateDecl(nullptr),
         m_vkIntegralConstantTemplateDecl(nullptr),
-        m_vkLiteralTemplateDecl(nullptr), m_hlslNSDecl(nullptr),
-        m_vkNSDecl(nullptr), m_context(nullptr), m_sema(nullptr),
-        m_hlslStringTypedef(nullptr) {
+        m_vkLiteralTemplateDecl(nullptr),
+        m_vkBufferPointerTemplateDecl(nullptr), m_hlslNSDecl(nullptr),
+        m_vkNSDecl(nullptr), m_dxNSDecl(nullptr), m_context(nullptr),
+        m_sema(nullptr), m_hlslStringTypedef(nullptr) {
     memset(m_matrixTypes, 0, sizeof(m_matrixTypes));
     memset(m_matrixShorthandTypes, 0, sizeof(m_matrixShorthandTypes));
     memset(m_vectorTypes, 0, sizeof(m_vectorTypes));
@@ -3876,6 +4138,14 @@ class HLSLExternalSource : public ExternalSemaSource {
     m_sema = &S;
     S.addExternalSource(this);
 
+    m_dxNSDecl =
+        NamespaceDecl::Create(context, context.getTranslationUnitDecl(),
+                              /*Inline*/ false, SourceLocation(),
+                              SourceLocation(), &context.Idents.get("dx"),
+                              /*PrevDecl*/ nullptr);
+    m_dxNSDecl->setImplicit();
+    context.getTranslationUnitDecl()->addDecl(m_dxNSDecl);
+
 #ifdef ENABLE_SPIRV_CODEGEN
     if (m_sema->getLangOpts().SPIRV) {
       // Create the "vk" namespace which contains Vulkan-specific intrinsics.
@@ -3894,6 +4164,8 @@ class HLSLExternalSource : public ExternalSemaSource {
       AddIntrinsicTableMethods(intrinsic);
     }
 
+    AddDxIntrinsicFunctions();
+
 #ifdef ENABLE_SPIRV_CODEGEN
     if (m_sema->getLangOpts().SPIRV) {
       // Add Vulkan-specific intrinsics.
@@ -3937,7 +4209,9 @@ class HLSLExternalSource : public ExternalSemaSource {
   }
 
   QualType LookupVectorType(HLSLScalarType scalarType, unsigned int colCount) {
-    QualType qt = m_vectorTypes[scalarType][colCount - 1];
+    QualType qt;
+    if (colCount < DXIL::kDefaultMaxVectorLength)
+      qt = m_vectorTypes[scalarType][colCount - 1];
     if (qt.isNull()) {
       if (m_scalarTypes[scalarType].isNull()) {
         LookupScalarTypeDef(scalarType);
@@ -3945,7 +4219,8 @@ class HLSLExternalSource : public ExternalSemaSource {
       qt = GetOrCreateVectorSpecialization(*m_context, m_sema,
                                            m_vectorTemplateDecl,
                                            m_scalarTypes[scalarType], colCount);
-      m_vectorTypes[scalarType][colCount - 1] = qt;
+      if (colCount < DXIL::kDefaultMaxVectorLength)
+        m_vectorTypes[scalarType][colCount - 1] = qt;
     }
     return qt;
   }
@@ -3969,13 +4244,6 @@ class HLSLExternalSource : public ExternalSemaSource {
     return IsSubobjectBasicKind(GetTypeElementKind(type));
   }
 
-  bool IsRayQueryBasicKind(ArBasicKind kind) {
-    return kind == AR_OBJECT_RAY_QUERY;
-  }
-  bool IsRayQueryType(QualType type) {
-    return IsRayQueryBasicKind(GetTypeElementKind(type));
-  }
-
   void WarnMinPrecision(QualType Type, SourceLocation Loc) {
     Type = Type->getCanonicalTypeUnqualified();
     if (IsVectorType(m_sema, Type) || IsMatrixType(m_sema, Type)) {
@@ -4580,6 +4848,7 @@ class HLSLExternalSource : public ExternalSemaSource {
     case AR_OBJECT_WAVE:
     case AR_OBJECT_ACCELERATION_STRUCT:
     case AR_OBJECT_RAY_DESC:
+    case AR_OBJECT_HIT_OBJECT:
     case AR_OBJECT_TRIANGLE_INTERSECTION_ATTRIBUTES:
     case AR_OBJECT_RWTEXTURE2DMS:
     case AR_OBJECT_RWTEXTURE2DMS_ARRAY:
@@ -4596,7 +4865,11 @@ class HLSLExternalSource : public ExternalSemaSource {
     case AR_OBJECT_NODE_OUTPUT_ARRAY:
     case AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY:
     case AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS:
-    case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS: {
+    case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS:
+#ifdef ENABLE_SPIRV_CODEGEN
+    case AR_OBJECT_VK_BUFFER_POINTER:
+#endif
+    {
       const ArBasicKind *match = std::find(
           g_ArBasicKindsAsTypes,
           &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], kind);
@@ -4722,16 +4995,14 @@ class HLSLExternalSource : public ExternalSemaSource {
       ResKind = DXIL::ResourceKind::RawBuffer;
       ResClass = DXIL::ResourceClass::UAV;
       return true;
-    case AR_OBJECT_CONSUME_STRUCTURED_BUFFER:
-    case AR_OBJECT_APPEND_STRUCTURED_BUFFER:
-      // It may seem incorrect to make these SRV,
-      // but it is consistent with GetHLSLResourceProperties().
     case AR_OBJECT_STRUCTURED_BUFFER:
       ResKind = DXIL::ResourceKind::StructuredBuffer;
       ResClass = DXIL::ResourceClass::SRV;
       return true;
     case AR_OBJECT_RWSTRUCTURED_BUFFER:
     case AR_OBJECT_ROVSTRUCTURED_BUFFER:
+    case AR_OBJECT_CONSUME_STRUCTURED_BUFFER:
+    case AR_OBJECT_APPEND_STRUCTURED_BUFFER:
       ResKind = DXIL::ResourceKind::StructuredBuffer;
       ResClass = DXIL::ResourceClass::UAV;
       return true;
@@ -4741,7 +5012,7 @@ class HLSLExternalSource : public ExternalSemaSource {
       return true;
     case AR_OBJECT_TEXTURE_BUFFER:
       ResKind = DXIL::ResourceKind::TBuffer;
-      ResClass = DXIL::ResourceClass::CBuffer;
+      ResClass = DXIL::ResourceClass::SRV;
       return true;
     case AR_OBJECT_FEEDBACKTEXTURE2D:
       ResKind = DXIL::ResourceKind::FeedbackTexture2D;
@@ -4751,6 +5022,15 @@ class HLSLExternalSource : public ExternalSemaSource {
       ResKind = DXIL::ResourceKind::FeedbackTexture2DArray;
       ResClass = DXIL::ResourceClass::SRV;
       return true;
+    case AR_OBJECT_SAMPLER:
+    case AR_OBJECT_SAMPLERCOMPARISON:
+      ResKind = DXIL::ResourceKind::Sampler;
+      ResClass = DXIL::ResourceClass::Sampler;
+      return true;
+    case AR_OBJECT_ACCELERATION_STRUCT:
+      ResKind = DXIL::ResourceKind::RTAccelerationStructure;
+      ResClass = DXIL::ResourceClass::SRV;
+      return true;
     default:
       return false;
     }
@@ -4896,12 +5176,18 @@ class HLSLExternalSource : public ExternalSemaSource {
         ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace &&
         ULE->getQualifier()->getAsNamespace()->getName() == "vk";
 
+    const bool isDxNamespace =
+        ULE->getQualifier() &&
+        ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace &&
+        ULE->getQualifier()->getAsNamespace()->getName() == "dx";
+
     // Intrinsics live in the global namespace, so references to their names
     // should be either unqualified or '::'-prefixed.
-    // Exception: Vulkan-specific intrinsics live in the 'vk::' namespace.
-    if (isQualified && !isGlobalNamespace && !isVkNamespace) {
+    // Exceptions:
+    // - Vulkan-specific intrinsics live in the 'vk::' namespace.
+    // - DirectX-specific intrinsics live in the 'dx::' namespace.
+    if (isQualified && !isGlobalNamespace && !isVkNamespace && !isDxNamespace)
       return false;
-    }
 
     const DeclarationNameInfo declName = ULE->getNameInfo();
     IdentifierInfo *idInfo = declName.getName().getAsIdentifierInfo();
@@ -4912,6 +5198,10 @@ class HLSLExternalSource : public ExternalSemaSource {
     StringRef nameIdentifier = idInfo->getName();
     const HLSL_INTRINSIC *table = g_Intrinsics;
     auto tableCount = _countof(g_Intrinsics);
+    if (isDxNamespace) {
+      table = g_DxIntrinsics;
+      tableCount = _countof(g_DxIntrinsics);
+    }
 #ifdef ENABLE_SPIRV_CODEGEN
     if (isVkNamespace) {
       table = g_VkIntrinsics;
@@ -4948,11 +5238,16 @@ class HLSLExternalSource : public ExternalSemaSource {
           m_usedIntrinsics.insert(UsedIntrinsic(pIntrinsic, functionArgTypes));
       bool insertedNewValue = insertResult.second;
       if (insertedNewValue) {
+        NamespaceDecl *nsDecl = m_hlslNSDecl;
+        if (isVkNamespace)
+          nsDecl = m_vkNSDecl;
+        else if (isDxNamespace)
+          nsDecl = m_dxNSDecl;
         DXASSERT(tableName,
                  "otherwise IDxcIntrinsicTable::GetTableName() failed");
-        intrinsicFuncDecl = AddHLSLIntrinsicFunction(
-            *m_context, isVkNamespace ? m_vkNSDecl : m_hlslNSDecl, tableName,
-            lowering, pIntrinsic, &functionArgTypes);
+        intrinsicFuncDecl =
+            AddHLSLIntrinsicFunction(*m_context, nsDecl, tableName, lowering,
+                                     pIntrinsic, &functionArgTypes);
         insertResult.first->setFunctionDecl(intrinsicFuncDecl);
       } else {
         intrinsicFuncDecl = (*insertResult.first).getFunctionDecl();
@@ -5033,10 +5328,6 @@ class HLSLExternalSource : public ExternalSemaSource {
            AR_BASIC_UNKNOWN;
   }
 
-  /// <summary>Checks whether the specified value is a valid vector
-  /// size.</summary>
-  bool IsValidVectorSize(size_t length) { return 1 <= length && length <= 4; }
-
   /// <summary>Checks whether the specified value is a valid matrix row or
   /// column size.</summary>
   bool IsValidMatrixColOrRowSize(size_t length) {
@@ -5072,11 +5363,6 @@ class HLSLExternalSource : public ExternalSemaSource {
                                            false);
       } else if (objectKind == AR_TOBJ_VECTOR) {
         bool valid = true;
-        if (!IsValidVectorSize(GetHLSLVecSize(type))) {
-          valid = false;
-          m_sema->Diag(argLoc, diag::err_hlsl_unsupportedvectorsize)
-              << type << GetHLSLVecSize(type);
-        }
         if (!IsScalarType(GetMatrixOrVectorElementType(type))) {
           valid = false;
           m_sema->Diag(argLoc, diag::err_hlsl_unsupportedvectortype)
@@ -5099,6 +5385,10 @@ class HLSLExternalSource : public ExternalSemaSource {
               << type << GetMatrixOrVectorElementType(type);
         }
         return valid;
+#ifdef ENABLE_SPIRV_CODEGEN
+      } else if (hlsl::IsVKBufferPointerType(qt)) {
+        return true;
+#endif
       } else if (qt->isStructureOrClassType()) {
         const RecordType *recordType = qt->getAs<RecordType>();
         objectKind = ClassifyRecordType(recordType);
@@ -5194,9 +5484,13 @@ class HLSLExternalSource : public ExternalSemaSource {
                                                 SourceLocation Loc);
 
   bool CheckRangedTemplateArgument(SourceLocation diagLoc,
-                                   llvm::APSInt &sintValue) {
-    if (!sintValue.isStrictlyPositive() || sintValue.getLimitedValue() > 4) {
-      m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_4);
+                                   llvm::APSInt &sintValue, bool IsVector) {
+    unsigned MaxLength = DXIL::kDefaultMaxVectorLength;
+    if (IsVector)
+      MaxLength = m_sema->getLangOpts().MaxHLSLVectorLength;
+    if (!sintValue.isStrictlyPositive() ||
+        sintValue.getLimitedValue() > MaxLength) {
+      m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_to_max) << MaxLength;
       return true;
     }
 
@@ -5219,11 +5513,14 @@ class HLSLExternalSource : public ExternalSemaSource {
       return false;
     }
     // Allow object type for Constant/TextureBuffer.
-    if (templateName == "ConstantBuffer" || templateName == "TextureBuffer") {
+    HLSLResourceAttr *ResAttr =
+        Template->getTemplatedDecl()->getAttr<HLSLResourceAttr>();
+    if (ResAttr && DXIL::IsCTBuffer(ResAttr->getResKind())) {
       if (TemplateArgList.size() == 1) {
         const TemplateArgumentLoc &argLoc = TemplateArgList[0];
         const TemplateArgument &arg = argLoc.getArgument();
-        DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, "");
+        DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type,
+                 "cbuffer with non-type template arg");
         QualType argType = arg.getAsType();
         SourceLocation argSrcLoc = argLoc.getLocation();
         if (IsScalarType(argType) || IsVectorType(m_sema, argType) ||
@@ -5233,23 +5530,14 @@ class HLSLExternalSource : public ExternalSemaSource {
               << argType;
           return true;
         }
-        if (auto *TST = dyn_cast<TemplateSpecializationType>(argType)) {
-          // This is a bit of a special case we need to handle. Because the
-          // buffer types don't use their template parameter in a way that would
-          // force instantiation, we need to force specialization here.
-          GetOrCreateTemplateSpecialization(
-              *m_context, *m_sema,
-              cast<ClassTemplateDecl>(
-                  TST->getTemplateName().getAsTemplateDecl()),
-              llvm::ArrayRef<TemplateArgument>(TST->getArgs(),
-                                               TST->getNumArgs()));
-        }
-        if (const RecordType *recordType = argType->getAs<RecordType>()) {
-          if (!recordType->getDecl()->isCompleteDefinition()) {
-            m_sema->Diag(argSrcLoc, diag::err_typecheck_decl_incomplete_type)
-                << argType;
-            return true;
-          }
+        m_sema->RequireCompleteType(argSrcLoc, argType,
+                                    diag::err_typecheck_decl_incomplete_type);
+
+        if (ContainsLongVector(argType)) {
+          const unsigned ConstantBuffersOrTextureBuffersIdx = 0;
+          m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector)
+              << ConstantBuffersOrTextureBuffersIdx;
+          return true;
         }
       }
       return false;
@@ -5279,22 +5567,13 @@ class HLSLExternalSource : public ExternalSemaSource {
       // template instantiation.
       if (ArgTy->isDependentType())
         return false;
-      if (auto *recordType = ArgTy->getAs<RecordType>()) {
-        if (CXXRecordDecl *cxxRecordDecl =
-                dyn_cast<CXXRecordDecl>(recordType->getDecl())) {
-          if (ClassTemplateSpecializationDecl *templateSpecializationDecl =
-                  dyn_cast<ClassTemplateSpecializationDecl>(cxxRecordDecl)) {
-            if (templateSpecializationDecl->getSpecializationKind() ==
-                TSK_Undeclared) {
-              // Make sure specialization is done before IsTypeNumeric.
-              // If not, ArgTy might be treat as empty struct.
-              m_sema->RequireCompleteType(
-                  ArgLoc.getLocation(), ArgTy,
-                  diag::err_typecheck_decl_incomplete_type);
-            }
-          }
-        }
-      }
+      // Make sure specialization is done before IsTypeNumeric.
+      // If not, ArgTy might be treat as empty struct.
+      m_sema->RequireCompleteType(ArgLoc.getLocation(), ArgTy,
+                                  diag::err_typecheck_decl_incomplete_type);
+      CXXRecordDecl *Decl = ArgTy->getAsCXXRecordDecl();
+      if (Decl && !Decl->isCompleteDefinition())
+        return true;
       // The node record type must be compound - error if it is not.
       if (GetTypeObjectKind(ArgTy) != AR_TOBJ_COMPOUND) {
         m_sema->Diag(ArgLoc.getLocation(), diag::err_hlsl_node_record_type)
@@ -5316,6 +5595,78 @@ class HLSLExternalSource : public ExternalSemaSource {
         return true;
       }
       return false;
+    } else if (Template->getTemplatedDecl()
+                   ->hasAttr<HLSLRayQueryObjectAttr>()) {
+      int numArgs = TemplateArgList.size();
+      DXASSERT(numArgs == 1 || numArgs == 2,
+               "otherwise the template has not been declared properly");
+
+      // first, determine if the rayquery flag AllowOpacityMicromaps is set
+      bool HasRayQueryFlagAllowOpacityMicromaps = false;
+      if (numArgs > 1) {
+        const TemplateArgument &Arg2 = TemplateArgList[1].getArgument();
+        Expr *Expr2 = Arg2.getAsExpr();
+        llvm::APSInt Arg2val;
+        Expr2->isIntegerConstantExpr(Arg2val, m_sema->getASTContext());
+        if (Arg2val.getZExtValue() &
+            (unsigned)DXIL::RayQueryFlag::AllowOpacityMicromaps)
+          HasRayQueryFlagAllowOpacityMicromaps = true;
+      }
+
+      // next, get the first template argument, to check if
+      // the ForceOMM2State flag is set
+      const TemplateArgument &Arg1 = TemplateArgList[0].getArgument();
+      Expr *Expr1 = Arg1.getAsExpr();
+      llvm::APSInt Arg1val;
+      bool HasRayFlagForceOMM2State =
+          Expr1->isIntegerConstantExpr(Arg1val, m_sema->getASTContext()) &&
+          (Arg1val.getLimitedValue() &
+           (uint64_t)DXIL::RayFlag::ForceOMM2State) != 0;
+
+      // finally, if ForceOMM2State is set and AllowOpacityMicromaps
+      // isn't, emit a warning
+      if (HasRayFlagForceOMM2State && !HasRayQueryFlagAllowOpacityMicromaps)
+        m_sema->Diag(TemplateArgList[0].getLocation(),
+                     diag::warn_hlsl_rayquery_flags_conflict);
+    } else if (Template->getTemplatedDecl()->hasAttr<HLSLTessPatchAttr>()) {
+      DXASSERT(TemplateArgList.size() > 0,
+               "Tessellation patch should have at least one template args");
+      const TemplateArgumentLoc &argLoc = TemplateArgList[0];
+      const TemplateArgument &arg = argLoc.getArgument();
+      DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type,
+               "Tessellation patch requires type template arg 0");
+
+      m_sema->RequireCompleteType(argLoc.getLocation(), arg.getAsType(),
+                                  diag::err_typecheck_decl_incomplete_type);
+      CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl();
+      if (Decl && !Decl->isCompleteDefinition())
+        return true;
+      if (ContainsLongVector(arg.getAsType())) {
+        const unsigned TessellationPatchesIDx = 1;
+        m_sema->Diag(argLoc.getLocation(),
+                     diag::err_hlsl_unsupported_long_vector)
+            << TessellationPatchesIDx;
+        return true;
+      }
+    } else if (Template->getTemplatedDecl()->hasAttr<HLSLStreamOutputAttr>()) {
+      DXASSERT(TemplateArgList.size() > 0,
+               "Geometry streams should have at least one template args");
+      const TemplateArgumentLoc &argLoc = TemplateArgList[0];
+      const TemplateArgument &arg = argLoc.getArgument();
+      DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type,
+               "Geometry stream requires type template arg 0");
+      m_sema->RequireCompleteType(argLoc.getLocation(), arg.getAsType(),
+                                  diag::err_typecheck_decl_incomplete_type);
+      CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl();
+      if (Decl && !Decl->isCompleteDefinition())
+        return true;
+      if (ContainsLongVector(arg.getAsType())) {
+        const unsigned GeometryStreamsIdx = 2;
+        m_sema->Diag(argLoc.getLocation(),
+                     diag::err_hlsl_unsupported_long_vector)
+            << GeometryStreamsIdx;
+        return true;
+      }
     }
 
     bool isMatrix = Template->getCanonicalDecl() ==
@@ -5337,9 +5688,7 @@ class HLSLExternalSource : public ExternalSemaSource {
             // NOTE: IsValidTemplateArgumentType emits its own diagnostics
             return true;
           }
-          HLSLResourceAttr *ResAttr =
-              Template->getTemplatedDecl()->getAttr<HLSLResourceAttr>();
-          if (ResAttr && IsTyped((DXIL::ResourceKind)ResAttr->getResKind())) {
+          if (ResAttr && IsTyped(ResAttr->getResKind())) {
             // Check vectors for being too large.
             if (IsVectorType(m_sema, argType)) {
               unsigned NumElt = hlsl::GetElementCount(argType);
@@ -5368,17 +5717,16 @@ class HLSLExternalSource : public ExternalSemaSource {
           llvm::APSInt constantResult;
           if (expr != nullptr &&
               expr->isIntegerConstantExpr(constantResult, *m_context)) {
-            if (CheckRangedTemplateArgument(argSrcLoc, constantResult)) {
+            if (CheckRangedTemplateArgument(argSrcLoc, constantResult,
+                                            isVector))
               return true;
-            }
           }
         }
       } else if (arg.getKind() == TemplateArgument::ArgKind::Integral) {
         if (isMatrix || isVector) {
           llvm::APSInt Val = arg.getAsIntegral();
-          if (CheckRangedTemplateArgument(argSrcLoc, Val)) {
+          if (CheckRangedTemplateArgument(argSrcLoc, Val, isVector))
             return true;
-          }
         }
       }
     }
@@ -5670,11 +6018,12 @@ class HLSLExternalSource : public ExternalSemaSource {
       Params.push_back(paramDecl);
     }
 
+    StorageClass SC = IsStaticMember(intrinsic) ? SC_Static : SC_Extern;
     QualType T = TInfo->getType();
     DeclarationNameInfo NameInfo(FunctionTemplate->getDeclName(), NoLoc);
     CXXMethodDecl *method = CXXMethodDecl::Create(
         *m_context, dyn_cast<CXXRecordDecl>(owner), NoLoc, NameInfo, T, TInfo,
-        SC_Extern, InlineSpecifiedFalse, IsConstexprFalse, NoLoc);
+        SC, InlineSpecifiedFalse, IsConstexprFalse, NoLoc);
 
     // Add intrinsic attr
     AddHLSLIntrinsicAttr(method, *m_context, tableName, lowering, intrinsic);
@@ -6261,8 +6610,10 @@ bool HLSLExternalSource::MatchArguments(
   argTypes.clear();
   const bool isVariadic = IsVariadicIntrinsicFunction(pIntrinsic);
 
-  static const UINT UnusedSize = 0xFF;
-  static const BYTE MaxIntrinsicArgs = g_MaxIntrinsicParamCount + 1;
+  static const uint32_t UnusedSize = std::numeric_limits<uint32_t>::max();
+  static const uint32_t MaxIntrinsicArgs = g_MaxIntrinsicParamCount + 1;
+  assert(MaxIntrinsicArgs < std::numeric_limits<uint8_t>::max() &&
+         "This should be a pretty small number");
 #define CAB(cond, arg)                                                         \
   {                                                                            \
     if (!(cond)) {                                                             \
@@ -6277,7 +6628,7 @@ bool HLSLExternalSource::MatchArguments(
   ArBasicKind
       ComponentType[MaxIntrinsicArgs]; // Component type for each argument,
                                        // AR_BASIC_UNKNOWN if unspecified.
-  UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UNUSED_INDEX32
+  UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UnusedSize
                                        // if unspecified.
   badArgIdx = MaxIntrinsicArgs;
 
@@ -6512,6 +6863,7 @@ bool HLSLExternalSource::MatchArguments(
   if (pIntrinsic->pArgs[0].qwUsage &&
       pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_TYPE &&
       pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_FUNCTION &&
+      pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_FUNCTION_2 &&
       pIntrinsic->pArgs[0].uComponentTypeId !=
           INTRIN_COMPTYPE_FROM_NODEOUTPUT) {
     CAB(pIntrinsic->pArgs[0].uTemplateId < MaxIntrinsicArgs, 0);
@@ -6552,7 +6904,8 @@ bool HLSLExternalSource::MatchArguments(
 
     // Check template.
     if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_TYPE ||
-        pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION) {
+        pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION ||
+        pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) {
       continue; // Already verified that this is available.
     }
     if (pArgument->uLegalComponentTypes == LICOMPTYPE_USER_DEFINED_TYPE) {
@@ -6631,6 +6984,9 @@ bool HLSLExternalSource::MatchArguments(
     }
   }
 
+  std::string profile = m_sema->getLangOpts().HLSLProfile;
+  const ShaderModel *SM = hlsl::ShaderModel::GetByName(profile.c_str());
+
   // Populate argTypes.
   for (size_t i = 0; i <= Args.size(); i++) {
     const HLSL_INTRINSIC_ARGUMENT *pArgument = &pIntrinsic->pArgs[i];
@@ -6718,6 +7074,14 @@ bool HLSLExternalSource::MatchArguments(
       } else {
         pNewType = functionTemplateTypeArg;
       }
+    } else if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_FUNCTION_2) {
+      if (i == 0 &&
+          (builtinOp == hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast ||
+           builtinOp == hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast)) {
+        pNewType = Args[0]->getType();
+      } else {
+        badArgIdx = std::min(badArgIdx, i);
+      }
     } else if (pArgument->uLegalComponentTypes ==
                LICOMPTYPE_USER_DEFINED_TYPE) {
       if (objectElement.isNull()) {
@@ -6801,8 +7165,9 @@ bool HLSLExternalSource::MatchArguments(
       }
 
       // Verify that the final results are in bounds.
-      CAB(uCols > 0 && uCols <= MaxVectorSize && uRows > 0 &&
-              uRows <= MaxVectorSize,
+      CAB((uCols > 0 && uRows > 0 &&
+           ((uCols <= MaxVectorSize && uRows <= MaxVectorSize) ||
+            (SM->IsSM69Plus() && uRows == 1))),
           i);
 
       // Const
@@ -7935,7 +8300,8 @@ void HLSLExternalSource::InitializeInitSequenceForHLSL(
   DXASSERT_NOMSG(initSequence != nullptr);
 
   // In HLSL there are no default initializers, eg float4x4 m();
-  // Except for RayQuery constructor (also handle InitializationKind::IK_Value)
+  // Except for RayQuery and HitObject constructors (also handle
+  // InitializationKind::IK_Value)
   if (Kind.getKind() == InitializationKind::IK_Default ||
       Kind.getKind() == InitializationKind::IK_Value) {
     QualType destBaseType = m_context->getBaseElementType(Entity.getType());
@@ -7946,7 +8312,9 @@ void HLSLExternalSource::InitializeInitSequenceForHLSL(
           GetRecordDeclForBuiltInOrStruct(typeRecordDecl));
       DXASSERT(index != -1,
                "otherwise can't find type we already determined was an object");
-      if (g_ArBasicKindsAsTypes[index] == AR_OBJECT_RAY_QUERY) {
+
+      if (g_ArBasicKindsAsTypes[index] == AR_OBJECT_RAY_QUERY ||
+          g_ArBasicKindsAsTypes[index] == AR_OBJECT_HIT_OBJECT) {
         CXXConstructorDecl *Constructor = *typeRecordDecl->ctor_begin();
         initSequence->AddConstructorInitializationStep(
             Constructor, AccessSpecifier::AS_public, destBaseType, false, false,
@@ -8571,6 +8939,9 @@ ExprResult HLSLExternalSource::LookupVectorMemberExprForHLSL(
     llvm_unreachable("Unknown VectorMemberAccessError value");
   }
 
+  if (colCount > 4)
+    msg = diag::err_hlsl_vector_member_on_long_vector;
+
   if (msg != 0) {
     m_sema->Diag(MemberLoc, msg) << memberText;
 
@@ -9397,6 +9768,13 @@ bool HLSLExternalSource::CanConvert(SourceLocation loc, Expr *sourceExpr,
     return false;
   }
 
+#ifdef ENABLE_SPIRV_CODEGEN
+  // Cast vk::BufferPointer to pointer address.
+  if (SourceInfo.EltKind == AR_OBJECT_VK_BUFFER_POINTER) {
+    return TargetInfo.EltKind == AR_BASIC_UINT64;
+  }
+#endif
+
   // Cast cbuffer to its result value.
   if ((SourceInfo.EltKind == AR_OBJECT_CONSTANT_BUFFER ||
        SourceInfo.EltKind == AR_OBJECT_TEXTURE_BUFFER) &&
@@ -11202,7 +11580,8 @@ static bool CheckFinishedCrossGroupSharingCall(Sema &S, CXXMethodDecl *MD,
   return false;
 }
 
-static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) {
+static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE,
+                             const hlsl::ShaderModel *SM) {
   DXASSERT(FD->getNumParams() == 2, "otherwise, unknown Barrier overload");
 
   // Emit error when MemoryTypeFlags are known to be invalid.
@@ -11232,12 +11611,18 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) {
   llvm::APSInt SemanticFlagsVal;
   if (SemanticFlagsExpr->isIntegerConstantExpr(SemanticFlagsVal, S.Context)) {
     SemanticFlags = SemanticFlagsVal.getLimitedValue();
-    if ((uint32_t)SemanticFlags &
-        ~(uint32_t)DXIL::BarrierSemanticFlag::ValidMask) {
+    uint32_t ValidMask = 0U;
+    if (SM->IsSM69Plus()) {
+      ValidMask =
+          static_cast<unsigned>(hlsl::DXIL::BarrierSemanticFlag::ValidMask);
+    } else {
+      ValidMask =
+          static_cast<unsigned>(hlsl::DXIL::BarrierSemanticFlag::LegacyFlags);
+    }
+    if ((uint32_t)SemanticFlags & ~ValidMask) {
       S.Diags.Report(SemanticFlagsExpr->getExprLoc(),
                      diag::err_hlsl_barrier_invalid_semantic_flags)
-          << (uint32_t)SemanticFlags
-          << (uint32_t)DXIL::BarrierSemanticFlag::ValidMask;
+          << SM->IsSM69Plus();
       return true;
     }
   }
@@ -11245,6 +11630,32 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE) {
   return false;
 }
 
+#ifdef ENABLE_SPIRV_CODEGEN
+static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE,
+                                     bool isStatic) {
+  const Expr *argExpr = CE->getArg(0);
+  QualType srcType = argExpr->getType();
+  QualType destType = CE->getType();
+  QualType srcTypeArg = hlsl::GetVKBufferPointerBufferType(srcType);
+  QualType destTypeArg = hlsl::GetVKBufferPointerBufferType(destType);
+
+  if (isStatic && srcTypeArg != destTypeArg &&
+      !S.IsDerivedFrom(srcTypeArg, destTypeArg)) {
+    S.Diags.Report(CE->getExprLoc(),
+                   diag::err_hlsl_vk_static_pointer_cast_type);
+    return true;
+  }
+
+  if (hlsl::GetVKBufferPointerAlignment(destType) >
+      hlsl::GetVKBufferPointerAlignment(srcType)) {
+    S.Diags.Report(CE->getExprLoc(), diag::err_hlsl_vk_pointer_cast_alignment);
+    return true;
+  }
+
+  return false;
+}
+#endif
+
 // Check HLSL call constraints, not fatal to creating the AST.
 void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
                                  const FunctionProtoType *Proto) {
@@ -11254,6 +11665,9 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
   if (!IsBuiltinTable(IntrinsicAttr->getGroup()))
     return;
 
+  const auto *SM =
+      hlsl::ShaderModel::GetByName(getLangOpts().HLSLProfile.c_str());
+
   hlsl::IntrinsicOp opCode = (hlsl::IntrinsicOp)IntrinsicAttr->getOpcode();
   switch (opCode) {
   case hlsl::IntrinsicOp::MOP_FinishedCrossGroupSharing:
@@ -11261,8 +11675,16 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
                                        TheCall->getLocStart());
     break;
   case hlsl::IntrinsicOp::IOP_Barrier:
-    CheckBarrierCall(*this, FDecl, TheCall);
+    CheckBarrierCall(*this, FDecl, TheCall, SM);
     break;
+#ifdef ENABLE_SPIRV_CODEGEN
+  case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast:
+    CheckVKBufferPointerCast(*this, FDecl, TheCall, false);
+    break;
+  case hlsl::IntrinsicOp::IOP_Vkstatic_pointer_cast:
+    CheckVKBufferPointerCast(*this, FDecl, TheCall, true);
+    break;
+#endif
   default:
     break;
   }
@@ -11522,6 +11944,52 @@ static void DiagnoseReachableBarrier(Sema &S, CallExpr *CE,
   }
 }
 
+bool IsRayFlagForceOMM2StateSet(Sema &sema, const CallExpr *CE) {
+  const Expr *Expr1 = CE->getArg(1);
+  llvm::APSInt constantResult;
+  return Expr1->isIntegerConstantExpr(constantResult, sema.getASTContext()) &&
+         (constantResult.getLimitedValue() &
+          (uint64_t)DXIL::RayFlag::ForceOMM2State) != 0;
+}
+
+void DiagnoseTraceRayInline(Sema &sema, CallExpr *callExpr) {
+  // Validate if the RayFlag parameter has RAY_FLAG_FORCE_OMM_2_STATE set,
+  // the RayQuery decl must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set,
+  // otherwise emit a diagnostic.
+  if (IsRayFlagForceOMM2StateSet(sema, callExpr)) {
+    CXXMemberCallExpr *CXXCallExpr = dyn_cast<CXXMemberCallExpr>(callExpr);
+    if (!CXXCallExpr) {
+      return;
+    }
+    const DeclRefExpr *DRE =
+        dyn_cast<DeclRefExpr>(CXXCallExpr->getImplicitObjectArgument());
+    assert(DRE);
+    QualType QT = DRE->getType();
+    auto *typeRecordDecl = QT->getAsCXXRecordDecl();
+    ClassTemplateSpecializationDecl *SpecDecl =
+        llvm::dyn_cast<ClassTemplateSpecializationDecl>(typeRecordDecl);
+
+    if (!SpecDecl)
+      return;
+
+    // Guaranteed 2 arguments since the rayquery constructor
+    // automatically creates 2 template args
+    DXASSERT(SpecDecl->getTemplateArgs().size() == 2,
+             "else rayquery constructor template args are not 2");
+    llvm::APSInt Arg2val = SpecDecl->getTemplateArgs()[1].getAsIntegral();
+    bool IsRayQueryAllowOMMSet =
+        Arg2val.getZExtValue() &
+        (unsigned)DXIL::RayQueryFlag::AllowOpacityMicromaps;
+    if (!IsRayQueryAllowOMMSet) {
+      // Diagnose the call
+      sema.Diag(CXXCallExpr->getExprLoc(),
+                diag::warn_hlsl_rayquery_flags_conflict);
+      sema.Diag(DRE->getDecl()->getLocation(), diag::note_previous_decl)
+          << "RayQueryFlags";
+    }
+  }
+}
+
 static bool isStringLiteral(QualType type) {
   if (!type->isConstantArrayType())
     return false;
@@ -11529,6 +11997,35 @@ static bool isStringLiteral(QualType type) {
   return eType->isSpecificBuiltinType(BuiltinType::Char_S);
 }
 
+static void DiagnoseReachableSERCall(Sema &S, CallExpr *CE,
+                                     DXIL::ShaderKind EntrySK,
+                                     const FunctionDecl *EntryDecl,
+                                     bool IsReorderOperation) {
+  bool ValidEntry = false;
+  switch (EntrySK) {
+  default:
+    break;
+  case DXIL::ShaderKind::ClosestHit:
+  case DXIL::ShaderKind::Miss:
+    ValidEntry = !IsReorderOperation;
+    break;
+  case DXIL::ShaderKind::RayGeneration:
+    ValidEntry = true;
+    break;
+  }
+
+  if (ValidEntry)
+    return;
+
+  int DiagID = IsReorderOperation ? diag::err_hlsl_reorder_unsupported_stage
+                                  : diag::err_hlsl_hitobject_unsupported_stage;
+
+  SourceLocation EntryLoc = EntryDecl->getLocation();
+  SourceLocation Loc = CE->getExprLoc();
+  S.Diag(Loc, DiagID) << ShaderModel::FullNameFromKind(EntrySK);
+  S.Diag(EntryLoc, diag::note_hlsl_entry_defined_here);
+}
+
 // Check HLSL member call constraints for used functions.
 // locallyVisited is true if this call has been visited already from any other
 // entry function.  Used to avoid duplicate diagnostics when not dependent on
@@ -11566,6 +12063,16 @@ void Sema::DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM,
     DiagnoseReachableBarrier(*this, CE, SM, EntrySK, NodeLaunchTy, EntryDecl,
                              Diags);
     break;
+  case hlsl::IntrinsicOp::MOP_TraceRayInline:
+    DiagnoseTraceRayInline(*this, CE);
+    break;
+  case hlsl::IntrinsicOp::MOP_DxHitObject_MakeMiss:
+  case hlsl::IntrinsicOp::MOP_DxHitObject_MakeNop:
+    DiagnoseReachableSERCall(*this, CE, EntrySK, EntryDecl, false);
+    break;
+  case hlsl::IntrinsicOp::IOP_DxMaybeReorderThread:
+    DiagnoseReachableSERCall(*this, CE, EntrySK, EntryDecl, true);
+    break;
   default:
     break;
   }
@@ -11581,10 +12088,18 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc,
   HLSLExternalSource *source = HLSLExternalSource::FromSema(self);
   ArTypeObjectKind shapeKind = source->GetTypeObjectKind(ArgTy);
   switch (shapeKind) {
+  case AR_TOBJ_VECTOR:
+    if (GetHLSLVecSize(ArgTy) > DXIL::kDefaultMaxVectorLength) {
+      const unsigned NodeRecordsIdx = 3;
+      self->Diag(ArgLoc.getLocation(), diag::err_hlsl_unsupported_long_vector)
+          << NodeRecordsIdx;
+      Empty = false;
+      return false;
+    }
+    LLVM_FALLTHROUGH;
   case AR_TOBJ_ARRAY:
   case AR_TOBJ_BASIC:
   case AR_TOBJ_MATRIX:
-  case AR_TOBJ_VECTOR:
     Empty = false;
     return false;
   case AR_TOBJ_OBJECT:
@@ -11603,14 +12118,15 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc,
     bool ErrorFound = false;
     const RecordDecl *RD = ArgTy->getAs<RecordType>()->getDecl();
     // Check the fields of the RecordDecl
-    RecordDecl::field_iterator begin = RD->field_begin();
-    RecordDecl::field_iterator end = RD->field_end();
-    while (begin != end) {
-      const FieldDecl *FD = *begin;
+    for (auto *FD : RD->fields())
       ErrorFound |=
           DiagnoseNodeStructArgument(self, ArgLoc, FD->getType(), Empty, FD);
-      begin++;
-    }
+    if (RD->isCompleteDefinition())
+      if (auto *Child = dyn_cast<CXXRecordDecl>(RD))
+        // Walk up the inheritance chain and check base class fields
+        for (auto &B : Child->bases())
+          ErrorFound |=
+              DiagnoseNodeStructArgument(self, ArgLoc, B.getType(), Empty);
     return ErrorFound;
   }
   default:
@@ -12046,6 +12562,21 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType,
   return false;
 }
 
+bool hlsl::ContainsLongVector(QualType QT) {
+  if (QT.isNull() || QT->isDependentType())
+    return false;
+
+  while (const ArrayType *Arr = QT->getAsArrayTypeUnsafe())
+    QT = Arr->getElementType();
+
+  if (CXXRecordDecl *Decl = QT->getAsCXXRecordDecl()) {
+    if (!Decl->isCompleteDefinition())
+      return false;
+    return Decl->hasHLSLLongVector();
+  }
+  return false;
+}
+
 bool hlsl::IsConversionToLessOrEqualElements(
     clang::Sema *self, const clang::ExprResult &sourceExpr,
     const clang::QualType &targetType, bool explicitConversion) {
@@ -13158,8 +13689,9 @@ ValidateMaxRecordsSharedWithAttributes(Sema &S, Decl *D,
 
 void Sema::DiagnoseHLSLDeclAttr(const Decl *D, const Attr *A) {
   HLSLExternalSource *ExtSource = HLSLExternalSource::FromSema(this);
-  if (const HLSLGloballyCoherentAttr *HLSLGCAttr =
-          dyn_cast<HLSLGloballyCoherentAttr>(A)) {
+  const bool IsGCAttr = isa<HLSLGloballyCoherentAttr>(A);
+  const bool IsRCAttr = isa<HLSLReorderCoherentAttr>(A);
+  if (IsGCAttr || IsRCAttr) {
     const ValueDecl *TD = cast<ValueDecl>(D);
     if (TD->getType()->isDependentType())
       return;
@@ -13168,23 +13700,25 @@ void Sema::DiagnoseHLSLDeclAttr(const Decl *D, const Attr *A) {
       DeclType = FD->getReturnType();
     while (DeclType->isArrayType())
       DeclType = QualType(DeclType->getArrayElementTypeNoTypeQual(), 0);
+    const bool IsAllowedNodeIO =
+        IsGCAttr &&
+        GetNodeIOType(DeclType) == DXIL::NodeIOKind::RWDispatchNodeInputRecord;
+    const bool IsUAV =
+        hlsl::GetResourceClassForType(getASTContext(), DeclType) ==
+        hlsl::DXIL::ResourceClass::UAV;
     if (ExtSource->GetTypeObjectKind(DeclType) != AR_TOBJ_OBJECT ||
-        (hlsl::GetResourceClassForType(getASTContext(), DeclType) !=
-             hlsl::DXIL::ResourceClass::UAV &&
-         GetNodeIOType(DeclType) !=
-             DXIL::NodeIOKind::RWDispatchNodeInputRecord)) {
+        (!IsUAV && !IsAllowedNodeIO)) {
       Diag(A->getLocation(), diag::err_hlsl_varmodifierna_decltype)
           << A << DeclType->getCanonicalTypeUnqualified() << A->getRange();
-      Diag(A->getLocation(), diag::note_hlsl_globallycoherent_applies_to)
-          << A << A->getRange();
+      Diag(A->getLocation(), diag::note_hlsl_coherence_applies_to)
+          << (int)IsGCAttr << A << A->getRange();
     }
     return;
   }
 }
 
-void Sema::DiagnoseGloballyCoherentMismatch(const Expr *SrcExpr,
-                                            QualType TargetType,
-                                            SourceLocation Loc) {
+void Sema::DiagnoseCoherenceMismatch(const Expr *SrcExpr, QualType TargetType,
+                                     SourceLocation Loc) {
   QualType SrcTy = SrcExpr->getType();
   QualType DstTy = TargetType;
   if (SrcTy->isArrayType() && DstTy->isArrayType()) {
@@ -13196,9 +13730,39 @@ void Sema::DiagnoseGloballyCoherentMismatch(const Expr *SrcExpr,
       GetNodeIOType(DstTy) == DXIL::NodeIOKind::RWDispatchNodeInputRecord) {
     bool SrcGL = hlsl::HasHLSLGloballyCoherent(SrcTy);
     bool DstGL = hlsl::HasHLSLGloballyCoherent(DstTy);
-    if (SrcGL != DstGL)
-      Diag(Loc, diag::warn_hlsl_impcast_glc_mismatch)
-          << SrcExpr->getType() << TargetType << /*loses|adds*/ DstGL;
+    // 'reordercoherent' attribute dropped earlier in presence of
+    // 'globallycoherent'
+    bool SrcRD = hlsl::HasHLSLReorderCoherent(SrcTy);
+    bool DstRD = hlsl::HasHLSLReorderCoherent(DstTy);
+
+    enum {
+      NoMismatch = -1,
+      DemoteToRD = 0,
+      PromoteToGL = 1,
+      LosesRD = 2,
+      LosesGL = 3,
+      AddsRD = 4,
+      AddsGL = 5
+    } MismatchType = NoMismatch;
+
+    if (SrcGL && DstRD)
+      MismatchType = DemoteToRD;
+    else if (SrcRD && DstGL)
+      MismatchType = PromoteToGL;
+    else if (SrcRD && !DstRD)
+      MismatchType = LosesRD;
+    else if (SrcGL && !DstGL)
+      MismatchType = LosesGL;
+    else if (!SrcRD && DstRD)
+      MismatchType = AddsRD;
+    else if (!SrcGL && DstGL)
+      MismatchType = AddsGL;
+
+    if (MismatchType == NoMismatch)
+      return;
+
+    Diag(Loc, diag::warn_hlsl_impcast_coherence_mismatch)
+        << SrcExpr->getType() << TargetType << MismatchType;
   }
 }
 
@@ -13347,6 +13911,10 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A,
     declAttr = ::new (S.Context) HLSLGloballyCoherentAttr(
         A.getRange(), S.Context, A.getAttributeSpellingListIndex());
     break;
+  case AttributeList::AT_HLSLReorderCoherent:
+    declAttr = ::new (S.Context) HLSLReorderCoherentAttr(
+        A.getRange(), S.Context, A.getAttributeSpellingListIndex());
+    break;
   case AttributeList::AT_HLSLIndices:
     declAttr = ::new (S.Context) HLSLIndicesAttr(
         A.getRange(), S.Context, A.getAttributeSpellingListIndex());
@@ -13405,6 +13973,10 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A,
         A.getRange(), S.Context, A.getAttributeSpellingListIndex());
     break;
   // SPIRV Change Starts
+  case AttributeList::AT_VKAliasedPointer: {
+    declAttr = ::new (S.Context) VKAliasedPointerAttr(
+        A.getRange(), S.Context, A.getAttributeSpellingListIndex());
+  } break;
   case AttributeList::AT_VKDecorateIdExt: {
     if (A.getNumArgs() == 0 || !A.getArg(0).is<clang::Expr *>()) {
       Handled = false;
@@ -14369,6 +14941,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
                        *pDispatchGrid = nullptr, *pMaxDispatchGrid = nullptr;
   bool usageIn = false;
   bool usageOut = false;
+  bool isGroupShared = false;
 
   for (clang::AttributeList *pAttr = D.getDeclSpec().getAttributes().getList();
        pAttr != NULL; pAttr = pAttr->getNext()) {
@@ -14392,6 +14965,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
       }
       break;
     case AttributeList::AT_HLSLGroupShared:
+      isGroupShared = true;
       if (!isGlobal) {
         Diag(pAttr->getLoc(), diag::err_hlsl_varmodifierna)
             << pAttr->getName() << declarationType << pAttr->getRange();
@@ -14405,6 +14979,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
       }
       break;
     case AttributeList::AT_HLSLGloballyCoherent: // Handled elsewhere
+    case AttributeList::AT_HLSLReorderCoherent:  // Handled elsewhere
       break;
     case AttributeList::AT_HLSLUniform:
       if (!(isGlobal || isParameter)) {
@@ -14672,6 +15247,23 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth,
     result = false;
   }
 
+  // Disallow long vecs from $Global cbuffers.
+  if (isGlobal && !isStatic && !isGroupShared && !IS_BASIC_OBJECT(basicKind)) {
+    // Suppress actual emitting of errors for incompletable types here
+    // They are redundant to those produced in ActOnUninitializedDecl.
+    struct SilentDiagnoser : public TypeDiagnoser {
+      SilentDiagnoser() : TypeDiagnoser(true) {}
+      virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) {}
+    } SD;
+    RequireCompleteType(D.getLocStart(), qt, SD);
+    if (ContainsLongVector(qt)) {
+      unsigned CbuffersOrTbuffersIdx = 4;
+      Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector)
+          << CbuffersOrTbuffersIdx;
+      result = false;
+    }
+  }
+
   // SPIRV change starts
 #ifdef ENABLE_SPIRV_CODEGEN
   // Validate that Vulkan specific feature is only used when targeting SPIR-V
@@ -14783,15 +15375,17 @@ static QualType getUnderlyingType(QualType Type) {
 void hlsl::GetHLSLAttributedTypes(
     clang::Sema *self, clang::QualType type,
     const clang::AttributedType **ppMatrixOrientation,
-    const clang::AttributedType **ppNorm, const clang::AttributedType **ppGLC) {
+    const clang::AttributedType **ppNorm, const clang::AttributedType **ppGLC,
+    const clang::AttributedType **ppRDC) {
   AssignOpt<const clang::AttributedType *>(nullptr, ppMatrixOrientation);
   AssignOpt<const clang::AttributedType *>(nullptr, ppNorm);
   AssignOpt<const clang::AttributedType *>(nullptr, ppGLC);
+  AssignOpt<const clang::AttributedType *>(nullptr, ppRDC);
 
   // Note: we clear output pointers once set so we can stop searching
   QualType Desugared = getUnderlyingType(type);
   const AttributedType *AT = dyn_cast<AttributedType>(Desugared);
-  while (AT && (ppMatrixOrientation || ppNorm || ppGLC)) {
+  while (AT && (ppMatrixOrientation || ppNorm || ppGLC || ppRDC)) {
     AttributedType::Kind Kind = AT->getAttrKind();
 
     if (Kind == AttributedType::attr_hlsl_row_major ||
@@ -14811,6 +15405,11 @@ void hlsl::GetHLSLAttributedTypes(
         *ppGLC = AT;
         ppGLC = nullptr;
       }
+    } else if (Kind == AttributedType::attr_hlsl_reordercoherent) {
+      if (ppRDC) {
+        *ppRDC = AT;
+        ppRDC = nullptr;
+      }
     }
 
     Desugared = getUnderlyingType(AT->getEquivalentType());
@@ -15195,6 +15794,10 @@ void hlsl::CustomPrintHLSLAttr(const clang::Attr *A, llvm::raw_ostream &Out,
     Out << "globallycoherent ";
     break;
 
+  case clang::attr::HLSLReorderCoherent:
+    Out << "reordercoherent ";
+    break;
+
   case clang::attr::HLSLIndices:
     Out << "indices ";
     break;
@@ -15402,6 +16005,7 @@ bool hlsl::IsHLSLAttr(clang::attr::Kind AttrKind) {
   case clang::attr::HLSLNodeLocalRootArgumentsTableIndex:
   case clang::attr::HLSLNodeShareInputOf:
   case clang::attr::HLSLNodeTrackRWInputSharing:
+  case clang::attr::HLSLReorderCoherent:
   case clang::attr::VKBinding:
   case clang::attr::VKBuiltIn:
   case clang::attr::VKConstantId:
@@ -15560,6 +16164,17 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) {
   return false;
 }
 
+// Verify that user-defined intrinsic struct args contain no long vectors
+static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) {
+  if (ContainsLongVector(Arg->getType())) {
+    const unsigned UserDefinedStructParameterIdx = 5;
+    S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector)
+        << UserDefinedStructParameterIdx;
+    return true;
+  }
+  return false;
+}
+
 static bool CheckIntrinsicGetAttributeAtVertex(Sema *S, FunctionDecl *FDecl,
                                                CallExpr *TheCall) {
   assert(TheCall->getNumArgs() > 0);
@@ -15577,6 +16192,12 @@ static bool CheckIntrinsicGetAttributeAtVertex(Sema *S, FunctionDecl *FDecl,
 bool Sema::CheckHLSLIntrinsicCall(FunctionDecl *FDecl, CallExpr *TheCall) {
   auto attr = FDecl->getAttr<HLSLIntrinsicAttr>();
 
+  if (!attr)
+    return false;
+
+  if (!IsBuiltinTable(attr->getGroup()))
+    return false;
+
   switch (hlsl::IntrinsicOp(attr->getOpcode())) {
   case hlsl::IntrinsicOp::IOP_GetAttributeAtVertex:
     // See #hlsl-specs/issues/181. Feature is broken. For SPIR-V we want
@@ -15588,6 +16209,22 @@ bool Sema::CheckHLSLIntrinsicCall(FunctionDecl *FDecl, CallExpr *TheCall) {
     // existing ones. See the ExtensionTest.EvalAttributeCollision test.
     assert(FDecl->getName() == "GetAttributeAtVertex");
     return CheckIntrinsicGetAttributeAtVertex(this, FDecl, TheCall);
+  case hlsl::IntrinsicOp::IOP_DispatchMesh:
+    assert(TheCall->getNumArgs() > 3);
+    assert(FDecl->getName() == "DispatchMesh");
+    return CheckUDTIntrinsicArg(this, TheCall->getArg(3)->IgnoreCasts());
+  case hlsl::IntrinsicOp::IOP_CallShader:
+    assert(TheCall->getNumArgs() > 1);
+    assert(FDecl->getName() == "CallShader");
+    return CheckUDTIntrinsicArg(this, TheCall->getArg(1)->IgnoreCasts());
+  case hlsl::IntrinsicOp::IOP_TraceRay:
+    assert(TheCall->getNumArgs() > 7);
+    assert(FDecl->getName() == "TraceRay");
+    return CheckUDTIntrinsicArg(this, TheCall->getArg(7)->IgnoreCasts());
+  case hlsl::IntrinsicOp::IOP_ReportHit:
+    assert(TheCall->getNumArgs() > 2);
+    assert(FDecl->getName() == "ReportHit");
+    return CheckUDTIntrinsicArg(this, TheCall->getArg(2)->IgnoreCasts());
   default:
     break;
   }
@@ -16268,6 +16905,23 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) {
     return;
   }
 
+  // Check general parameter characteristics
+  // Would be nice to check for resources here as they crash the compiler now.
+  // See issue #7186.
+  for (const auto *param : FD->params()) {
+    if (ContainsLongVector(param->getType())) {
+      const unsigned EntryFunctionParametersIdx = 6;
+      S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector)
+          << EntryFunctionParametersIdx;
+    }
+  }
+
+  if (ContainsLongVector(FD->getReturnType())) {
+    const unsigned EntryFunctionReturnIdx = 7;
+    S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector)
+        << EntryFunctionReturnIdx;
+  }
+
   DXIL::ShaderKind Stage =
       ShaderModel::KindFromFullName(shaderAttr->getStage());
   llvm::StringRef StageName = shaderAttr->getStage();
diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
index cf5d741541..abca7cbf86 100644
--- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
+++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp
@@ -9,16 +9,24 @@
 //                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
 
+#include "dxc/DXIL/DxilFunctionProps.h"
 #include "dxc/DXIL/DxilShaderModel.h"
+#include "dxc/HLSL/HLOperations.h"
 #include "dxc/HlslIntrinsicOp.h"
 #include "dxc/Support/Global.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/HlslTypes.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/TypeLoc.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "clang/Sema/SemaHLSL.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include <optional>
 
 using namespace clang;
@@ -142,13 +150,21 @@ class CallGraphWithRecurseGuard {
   }
 
 public:
-  void BuildForEntry(FunctionDecl *EntryFnDecl) {
+  void BuildForEntry(FunctionDecl *EntryFnDecl,
+                     llvm::ArrayRef<VarDecl *> GlobalsWithInit) {
     DXASSERT_NOMSG(EntryFnDecl);
     EntryFnDecl = getFunctionWithBody(EntryFnDecl);
     PendingFunctions pendingFunctions;
     FnReferenceVisitor visitor(m_visitedFunctions, pendingFunctions,
                                m_callNodes);
-    pendingFunctions.push_back(EntryFnDecl);
+
+    // First, traverse all initializers, then entry function.
+    m_visitedFunctions.insert(EntryFnDecl);
+    visitor.setSourceFn(EntryFnDecl);
+    for (VarDecl *VD : GlobalsWithInit)
+      visitor.TraverseDecl(VD);
+    visitor.TraverseDecl(EntryFnDecl);
+
     while (!pendingFunctions.empty()) {
       FunctionDecl *pendingDecl = pendingFunctions.pop_back_val();
       if (m_visitedFunctions.insert(pendingDecl).second == true) {
@@ -284,33 +300,67 @@ std::vector<FunctionDecl *> GetAllExportedFDecls(clang::Sema *self) {
   return AllExportedFDecls;
 }
 
+void GatherGlobalsWithInitializers(
+    DeclContext *DC, llvm::SmallVectorImpl<VarDecl *> &GlobalsWithInit,
+    llvm::SmallVectorImpl<VarDecl *> &SubObjects) {
+  for (auto *D : DC->decls()) {
+    // Skip built-ins and function decls.
+    if (D->isImplicit() || isa<FunctionDecl>(D))
+      continue;
+    if (auto *VD = dyn_cast<VarDecl>(D)) {
+      // Add if user-defined static or groupshared global with initializer.
+      if (VD->hasInit() && VD->hasGlobalStorage() &&
+          (VD->getStorageClass() == SC_Static ||
+           VD->hasAttr<HLSLGroupSharedAttr>())) {
+        // Place subobjects in a separate collection.
+        if (const RecordType *RT = VD->getType()->getAs<RecordType>()) {
+          if (RT->getDecl()->hasAttr<HLSLSubObjectAttr>()) {
+            SubObjects.push_back(VD);
+            continue;
+          }
+        }
+        GlobalsWithInit.push_back(VD);
+      }
+    } else if (auto *DC = dyn_cast<DeclContext>(D)) {
+      // Recurse into DeclContexts like namespace, cbuffer, class/struct, etc.
+      GatherGlobalsWithInitializers(DC, GlobalsWithInit, SubObjects);
+    }
+  }
+}
+
 // in the non-library case, this function will be run only once,
 // but in the library case, this function will be run for each
 // viable top-level function declaration by
 // ValidateNoRecursionInTranslationUnit.
 //  (viable as in, is exported)
-clang::FunctionDecl *ValidateNoRecursion(CallGraphWithRecurseGuard &callGraph,
-                                         clang::FunctionDecl *FD) {
+clang::FunctionDecl *
+ValidateNoRecursion(CallGraphWithRecurseGuard &callGraph,
+                    clang::FunctionDecl *FD,
+                    llvm::ArrayRef<VarDecl *> GlobalsWithInit) {
   // Validate that there is no recursion reachable by this function declaration
   // NOTE: the information gathered here could be used to bypass code generation
   // on functions that are unreachable (as an early form of dead code
   // elimination).
   if (FD) {
-    callGraph.BuildForEntry(FD);
+    callGraph.BuildForEntry(FD, GlobalsWithInit);
     return callGraph.CheckRecursion(FD);
   }
   return nullptr;
 }
 
-class HLSLCallDiagnoseVisitor
-    : public RecursiveASTVisitor<HLSLCallDiagnoseVisitor> {
+class HLSLReachableDiagnoseVisitor
+    : public RecursiveASTVisitor<HLSLReachableDiagnoseVisitor> {
 public:
-  explicit HLSLCallDiagnoseVisitor(
+  explicit HLSLReachableDiagnoseVisitor(
       Sema *S, const hlsl::ShaderModel *SM, DXIL::ShaderKind EntrySK,
       DXIL::NodeLaunchType NodeLaunchTy, const FunctionDecl *EntryDecl,
-      llvm::SmallPtrSetImpl<CallExpr *> &DiagnosedCalls)
+      llvm::SmallPtrSetImpl<CallExpr *> &DiagnosedCalls,
+      llvm::SmallPtrSetImpl<DeclRefExpr *> &DeclAvailabilityChecked,
+      llvm::SmallSet<SourceLocation, 16> &DiagnosedTypeLocs)
       : sema(S), SM(SM), EntrySK(EntrySK), NodeLaunchTy(NodeLaunchTy),
-        EntryDecl(EntryDecl), DiagnosedCalls(DiagnosedCalls) {}
+        EntryDecl(EntryDecl), DiagnosedCalls(DiagnosedCalls),
+        DeclAvailabilityChecked(DeclAvailabilityChecked),
+        DiagnosedTypeLocs(DiagnosedTypeLocs) {}
 
   bool VisitCallExpr(CallExpr *CE) {
     // Set flag if already diagnosed from another entry, allowing some
@@ -325,6 +375,126 @@ class HLSLCallDiagnoseVisitor
     return true;
   }
 
+  bool VisitVarDecl(VarDecl *VD) {
+    QualType VarType = VD->getType();
+    if (const TemplateSpecializationType *TST =
+            dyn_cast<TemplateSpecializationType>(VarType.getTypePtr())) {
+      const TemplateDecl *TD = TST->getTemplateName().getAsTemplateDecl();
+      if (!TD)
+        return true;
+
+      // verify this is a rayquery decl
+      if (TD->getTemplatedDecl()->hasAttr<HLSLRayQueryObjectAttr>()) {
+        if (TST->getNumArgs() == 1) {
+          return true;
+        }
+        // now guaranteed 2 args
+        const TemplateArgument &Arg2 = TST->getArg(1);
+        Expr *Expr2 = Arg2.getAsExpr();
+        llvm::APSInt Arg2val;
+        Expr2->isIntegerConstantExpr(Arg2val, sema->getASTContext());
+
+        const ShaderModel *SM = hlsl::ShaderModel::GetByName(
+            sema->getLangOpts().HLSLProfile.c_str());
+
+        if (Arg2val.getZExtValue() != 0 && !SM->IsSMAtLeast(6, 9)) {
+          // if it's an integer literal, emit
+          // warn_hlsl_rayquery_flags_disallowed
+          if (Arg2.getKind() == TemplateArgument::Expression) {
+            if (auto *castExpr = dyn_cast<ImplicitCastExpr>(
+                    Arg2.getAsExpr()->IgnoreParens())) {
+              // Now check if the sub-expression is a DeclRefExpr
+              Expr *subExpr = castExpr->getSubExpr();
+              if (auto *IL = dyn_cast<IntegerLiteral>(subExpr))
+                sema->Diag(VD->getLocStart(),
+                           diag::warn_hlsl_rayquery_flags_disallowed);
+              return true;
+            }
+          }
+        }
+      }
+    }
+    return true;
+  }
+
+  bool VisitTypeLoc(TypeLoc TL) {
+    // Diagnose availability for used type.
+    if (AvailabilityAttr *AAttr = GetAvailabilityAttrOnce(TL)) {
+      UnqualTypeLoc UTL = TL.getUnqualifiedLoc();
+      DiagnoseAvailability(AAttr, TL.getType(), UTL.getLocStart());
+    }
+
+    return true;
+  }
+
+  bool VisitDeclRefExpr(DeclRefExpr *DRE) {
+    // Diagnose availability for referenced decl.
+    if (AvailabilityAttr *AAttr = GetAvailabilityAttrOnce(DRE)) {
+      DiagnoseAvailability(AAttr, DRE->getDecl(), DRE->getExprLoc());
+    }
+
+    return true;
+  }
+
+  AvailabilityAttr *GetAvailabilityAttrOnce(TypeLoc TL) {
+    QualType Ty = TL.getType();
+    CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
+    if (!RD)
+      return nullptr;
+    AvailabilityAttr *AAttr = RD->getAttr<AvailabilityAttr>();
+    if (!AAttr)
+      return nullptr;
+    // Skip redundant availability diagnostics for the same Type.
+    // Use the end location to avoid diagnosing the same type multiple times.
+    if (!DiagnosedTypeLocs.insert(TL.getEndLoc()).second)
+      return nullptr;
+
+    return AAttr;
+  }
+
+  AvailabilityAttr *GetAvailabilityAttrOnce(DeclRefExpr *DRE) {
+    AvailabilityAttr *AAttr = DRE->getDecl()->getAttr<AvailabilityAttr>();
+    if (!AAttr)
+      return nullptr;
+    // Skip redundant availability diagnostics for the same Decl.
+    if (!DeclAvailabilityChecked.insert(DRE).second)
+      return nullptr;
+
+    return AAttr;
+  }
+
+  bool CheckSMVersion(VersionTuple AAttrVT) {
+    VersionTuple SMVT = VersionTuple(SM->GetMajor(), SM->GetMinor());
+    return SMVT >= AAttrVT;
+  }
+
+  void DiagnoseAvailability(AvailabilityAttr *AAttr, QualType Ty,
+                            SourceLocation Loc) {
+    VersionTuple AAttrVT = AAttr->getIntroduced();
+    if (CheckSMVersion(AAttrVT))
+      return;
+
+    sema->Diag(Loc, diag::warn_hlsl_builtin_type_unavailable)
+        << Ty << SM->GetName() << AAttrVT.getAsString();
+  }
+
+  void DiagnoseAvailability(AvailabilityAttr *AAttr, NamedDecl *ND,
+                            SourceLocation Loc) {
+    VersionTuple AAttrVT = AAttr->getIntroduced();
+    if (CheckSMVersion(AAttrVT))
+      return;
+
+    if (isa<FunctionDecl>(ND)) {
+      sema->Diag(Loc, diag::warn_hlsl_intrinsic_in_wrong_shader_model)
+          << ND->getQualifiedNameAsString() << EntryDecl
+          << AAttrVT.getAsString();
+      return;
+    }
+
+    sema->Diag(Loc, diag::warn_hlsl_builtin_constant_unavailable)
+        << ND << SM->GetName() << AAttrVT.getAsString();
+  }
+
   clang::Sema *getSema() { return sema; }
 
 private:
@@ -334,6 +504,8 @@ class HLSLCallDiagnoseVisitor
   DXIL::NodeLaunchType NodeLaunchTy;
   const FunctionDecl *EntryDecl;
   llvm::SmallPtrSetImpl<CallExpr *> &DiagnosedCalls;
+  llvm::SmallPtrSetImpl<DeclRefExpr *> &DeclAvailabilityChecked;
+  llvm::SmallSet<SourceLocation, 16> &DiagnosedTypeLocs;
 };
 
 std::optional<uint32_t>
@@ -428,18 +600,38 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
   const auto *shaderModel =
       hlsl::ShaderModel::GetByName(self->getLangOpts().HLSLProfile.c_str());
 
-  std::set<FunctionDecl *> DiagnosedDecls;
+  llvm::SmallVector<VarDecl *, 16> GlobalsWithInit;
+  llvm::SmallVector<VarDecl *, 16> SubObjects;
+  std::set<FunctionDecl *> DiagnosedRecursiveDecls;
   llvm::SmallPtrSet<CallExpr *, 16> DiagnosedCalls;
+  llvm::SmallPtrSet<DeclRefExpr *, 16> DeclAvailabilityChecked;
+  llvm::SmallSet<SourceLocation, 16> DiagnosedTypeLocs;
+
+  GatherGlobalsWithInitializers(self->getASTContext().getTranslationUnitDecl(),
+                                GlobalsWithInit, SubObjects);
+
+  if (shaderModel->GetKind() == DXIL::ShaderKind::Library) {
+    DXIL::NodeLaunchType NodeLaunchTy = DXIL::NodeLaunchType::Invalid;
+    HLSLReachableDiagnoseVisitor Visitor(
+        self, shaderModel, shaderModel->GetKind(), NodeLaunchTy, nullptr,
+        DiagnosedCalls, DeclAvailabilityChecked, DiagnosedTypeLocs);
+    for (VarDecl *VD : SubObjects)
+      Visitor.TraverseDecl(VD);
+  }
+
   // for each FDecl, check for recursion
   for (FunctionDecl *FDecl : FDeclsToCheck) {
     CallGraphWithRecurseGuard callGraph;
-    FunctionDecl *result = ValidateNoRecursion(callGraph, FDecl);
+    ArrayRef<VarDecl *> InitGlobals = {};
+    // if entry function, include globals with initializers.
+    if (FDecl->hasAttr<HLSLShaderAttr>())
+      InitGlobals = GlobalsWithInit;
+    FunctionDecl *result = ValidateNoRecursion(callGraph, FDecl, InitGlobals);
 
     if (result) {
       // don't emit duplicate diagnostics for the same recursive function
       // if A and B call recursive function C, only emit 1 diagnostic for C.
-      if (DiagnosedDecls.find(result) == DiagnosedDecls.end()) {
-        DiagnosedDecls.insert(result);
+      if (DiagnosedRecursiveDecls.insert(result).second) {
         self->Diag(result->getSourceRange().getBegin(),
                    diag::err_hlsl_no_recursion)
             << FDecl->getQualifiedNameAsString()
@@ -463,12 +655,12 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
     }
 
     if (pPatchFnDecl) {
-      FunctionDecl *patchResult = ValidateNoRecursion(callGraph, pPatchFnDecl);
+      FunctionDecl *patchResult =
+          ValidateNoRecursion(callGraph, pPatchFnDecl, GlobalsWithInit);
 
       // In this case, recursion was detected in the patch-constant function
       if (patchResult) {
-        if (DiagnosedDecls.find(patchResult) == DiagnosedDecls.end()) {
-          DiagnosedDecls.insert(patchResult);
+        if (DiagnosedRecursiveDecls.insert(patchResult).second) {
           self->Diag(patchResult->getSourceRange().getBegin(),
                      diag::err_hlsl_no_recursion)
               << pPatchFnDecl->getQualifiedNameAsString()
@@ -482,15 +674,12 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
       // disconnected with respect to the call graph.
       // Only check this if neither function decl is recursive
       if (!result && !patchResult) {
-        CallGraphWithRecurseGuard CG;
-        CG.BuildForEntry(pPatchFnDecl);
-        if (CG.CheckReachability(pPatchFnDecl, FDecl)) {
+        if (callGraph.CheckReachability(pPatchFnDecl, FDecl)) {
           self->Diag(FDecl->getSourceRange().getBegin(),
                      diag::err_hlsl_patch_reachability_not_allowed)
               << 1 << FDecl->getName() << 0 << pPatchFnDecl->getName();
         }
-        CG.BuildForEntry(FDecl);
-        if (CG.CheckReachability(FDecl, pPatchFnDecl)) {
+        if (callGraph.CheckReachability(FDecl, pPatchFnDecl)) {
           self->Diag(FDecl->getSourceRange().getBegin(),
                      diag::err_hlsl_patch_reachability_not_allowed)
               << 0 << pPatchFnDecl->getName() << 1 << FDecl->getName();
@@ -520,8 +709,21 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
               << hullPatchCount.value();
         }
       }
-    }
+      for (const auto *param : pPatchFnDecl->params())
+        if (ContainsLongVector(param->getType())) {
+          const unsigned PatchConstantFunctionParametersIdx = 8;
+          self->Diag(param->getLocation(),
+                     diag::err_hlsl_unsupported_long_vector)
+              << PatchConstantFunctionParametersIdx;
+        }
 
+      if (ContainsLongVector(pPatchFnDecl->getReturnType())) {
+        const unsigned PatchConstantFunctionReturnIdx = 9;
+        self->Diag(pPatchFnDecl->getLocation(),
+                   diag::err_hlsl_unsupported_long_vector)
+            << PatchConstantFunctionReturnIdx;
+      }
+    }
     DXIL::ShaderKind EntrySK = shaderModel->GetKind();
     DXIL::NodeLaunchType NodeLaunchTy = DXIL::NodeLaunchType::Invalid;
     if (EntrySK == DXIL::ShaderKind::Library) {
@@ -537,12 +739,16 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) {
           NodeLaunchTy = DXIL::NodeLaunchType::Broadcasting;
       }
     }
+
     // Visit all visited functions in call graph to collect illegal intrinsic
     // calls.
-    for (FunctionDecl *FD : callGraph.GetVisitedFunctions()) {
-      HLSLCallDiagnoseVisitor Visitor(self, shaderModel, EntrySK, NodeLaunchTy,
-                                      FDecl, DiagnosedCalls);
+    HLSLReachableDiagnoseVisitor Visitor(
+        self, shaderModel, EntrySK, NodeLaunchTy, FDecl, DiagnosedCalls,
+        DeclAvailabilityChecked, DiagnosedTypeLocs);
+    // Visit globals with initializers when processing entry point.
+    for (VarDecl *VD : InitGlobals)
+      Visitor.TraverseDecl(VD);
+    for (FunctionDecl *FD : callGraph.GetVisitedFunctions())
       Visitor.TraverseDecl(FD);
-    }
   }
 }
diff --git a/tools/clang/lib/Sema/SemaOverload.cpp b/tools/clang/lib/Sema/SemaOverload.cpp
index 650fe38adc..636eaf0213 100644
--- a/tools/clang/lib/Sema/SemaOverload.cpp
+++ b/tools/clang/lib/Sema/SemaOverload.cpp
@@ -10936,7 +10936,13 @@ bool Sema::buildOverloadedCallSet(Scope *S, Expr *Fn,
         ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace &&
         ULE->getQualifier()->getAsNamespace()->getName() == "vk";
 
-    assert((!ULE->getQualifier() || isVkNamespace) && "non-vk qualified name with ADL");
+    bool isDxNamespace =
+        ULE->getQualifier() &&
+        ULE->getQualifier()->getKind() == NestedNameSpecifier::Namespace &&
+        ULE->getQualifier()->getAsNamespace()->getName() == "dx";
+
+    assert((!ULE->getQualifier() || isVkNamespace || isDxNamespace) &&
+           "expected vk or dx qualified name with ADL");
     // HLSL Change Ends
 
     // We don't perform ADL for implicit declarations of builtins.
diff --git a/tools/clang/lib/Sema/SemaStmt.cpp b/tools/clang/lib/Sema/SemaStmt.cpp
index ce1e55bb0e..4e47a68888 100644
--- a/tools/clang/lib/Sema/SemaStmt.cpp
+++ b/tools/clang/lib/Sema/SemaStmt.cpp
@@ -3184,7 +3184,7 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
 
   // HLSL Change begin - Diagnose mismatched globallycoherent attrs on return.
   if (RetValExp)
-    DiagnoseGloballyCoherentMismatch(RetValExp, FnRetType, ReturnLoc);
+    DiagnoseCoherenceMismatch(RetValExp, FnRetType, ReturnLoc);
   // HLSL Change end
 
   bool HasDependentReturnType = FnRetType->isDependentType();
diff --git a/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp b/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp
index a6ae05faa5..1eacedbb0b 100644
--- a/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -2139,6 +2139,18 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation,
               SourceLocation(), SourceLocation(), nullptr);
   CheckCompletedCXXClass(Instantiation);
 
+  // HLSL Change Begin - set longvec bit for vectors of over 4 elements
+  ClassTemplateSpecializationDecl *Spec =
+      dyn_cast<ClassTemplateSpecializationDecl>(Instantiation);
+  if (Spec && Spec->hasAttr<HLSLVectorAttr>()) {
+    const TemplateArgumentList &argList = Spec->getTemplateArgs();
+    const TemplateArgument &arg1 = argList[1];
+    llvm::APSInt vecSize = arg1.getAsIntegral();
+    if (vecSize.getLimitedValue() > hlsl::DXIL::kDefaultMaxVectorLength)
+      Instantiation->setHasHLSLLongVector();
+  }
+  // HLSL Change End - set longvec bit for vectors of over 4 elements
+
   // Default arguments are parsed, if not instantiated. We can go instantiate
   // default arg exprs for default constructors if necessary now.
   ActOnFinishCXXMemberDefaultArgs(Instantiation);
diff --git a/tools/clang/lib/Sema/SemaType.cpp b/tools/clang/lib/Sema/SemaType.cpp
index 5a8f9d13b3..ff3b0dbac7 100644
--- a/tools/clang/lib/Sema/SemaType.cpp
+++ b/tools/clang/lib/Sema/SemaType.cpp
@@ -4528,7 +4528,9 @@ static AttributeList::Kind getAttrListKind(AttributedType::Kind kind) {
     return AttributeList::AT_HLSLColumnMajor;
   case AttributedType::attr_hlsl_globallycoherent:
     return AttributeList::AT_HLSLGloballyCoherent;
-  // HLSL Change Ends
+  case AttributedType::attr_hlsl_reordercoherent:
+    return AttributeList::AT_HLSLReorderCoherent;
+    // HLSL Change Ends
   }
   llvm_unreachable("unexpected attribute kind!");
 }
@@ -5771,6 +5773,7 @@ static bool isHLSLTypeAttr(AttributeList::Kind Kind) {
   case AttributeList::AT_HLSLSnorm:
   case AttributeList::AT_HLSLUnorm:
   case AttributeList::AT_HLSLGloballyCoherent:
+  case AttributeList::AT_HLSLReorderCoherent:
     return true;
   default:
     // Only meant to catch attr handled by handleHLSLTypeAttr, ignore the rest
@@ -5802,7 +5805,9 @@ static bool handleHLSLTypeAttr(TypeProcessingState &State,
   const AttributedType *pMatrixOrientation = nullptr;
   const AttributedType *pNorm = nullptr;
   const AttributedType *pGLC = nullptr;
-  hlsl::GetHLSLAttributedTypes(&S, Type, &pMatrixOrientation, &pNorm, &pGLC);
+  const AttributedType *pRDC = nullptr;
+  hlsl::GetHLSLAttributedTypes(&S, Type, &pMatrixOrientation, &pNorm, &pGLC,
+                               &pRDC);
 
   if (pMatrixOrientation &&
     (Kind == AttributeList::AT_HLSLColumnMajor ||
@@ -5836,13 +5841,18 @@ static bool handleHLSLTypeAttr(TypeProcessingState &State,
     return true;
   }
 
-  if (pGLC && Kind == AttributeList::AT_HLSLGloballyCoherent) {
-    AttributedType::Kind CurAttrKind = pGLC->getAttrKind();
-    if (Kind == getAttrListKind(CurAttrKind)) {
-      S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute_exact)
-          << Attr.getName() << Attr.getRange();
-    }
-  }
+  const bool hasGLC = pGLC;
+  const bool addsGLC = Kind == AttributeList::AT_HLSLGloballyCoherent;
+  const bool hasRDC = pRDC;
+  const bool addsRDC = Kind == AttributeList::AT_HLSLReorderCoherent;
+
+  const bool hasMismatchingAttrs = hasGLC && hasRDC;
+  const bool addsMismatchingAttr = (hasGLC && addsRDC) || (hasRDC && addsGLC);
+  if ((hasGLC && addsGLC) || (hasRDC && addsRDC))
+    S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute_exact)
+        << Attr.getName() << Attr.getRange();
+  else if (!hasMismatchingAttrs && addsMismatchingAttr)
+    S.Diag(Attr.getLoc(), diag::warn_hlsl_glc_implies_rdc) << Attr.getRange();
 
   AttributedType::Kind TAK;
   switch (Kind) {
@@ -5853,6 +5863,9 @@ static bool handleHLSLTypeAttr(TypeProcessingState &State,
   case AttributeList::AT_HLSLSnorm:       TAK = AttributedType::attr_hlsl_snorm; break;
   case AttributeList::AT_HLSLGloballyCoherent:
     TAK = AttributedType::attr_hlsl_globallycoherent; break;
+  case AttributeList::AT_HLSLReorderCoherent:
+    TAK = AttributedType::attr_hlsl_reordercoherent;
+    break;
   }
 
   Type = S.Context.getAttributedType(TAK, Type, Type);
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_for_arg.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_for_arg.hlsl
new file mode 100644
index 0000000000..d92ce7b9ca
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_for_arg.hlsl
@@ -0,0 +1,19 @@
+// RUN: %dxc -E main -T lib_6_9 %s | FileCheck %s
+// REQUIRES: dxil-1-9
+
+// CHECK: %[[uH:[^ ]+]] = load %dx.types.Handle, %dx.types.Handle* @"\01?u@@3V?$RWBuffer@M@@A", align 4
+// CHECK: %[[uLIBH:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %[[uH]]) ; CreateHandleForLib(Resource)
+// CHECK: %[[uANNOT:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[uLIBH]], %dx.types.ResourceProperties { i32 69642, i32 265 }) ; AnnotateHandle(res,props) resource: reordercoherent RWTypedBuffer<F32>
+// CHECK: %{{[^ ]+}} = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle %[[uANNOT]], i32 0, i32 undef) ; BufferLoad(srv,index,wot)
+
+RWBuffer<float> OutBuf : register(u1);
+reordercoherent RWBuffer<float> u : register(u2);
+
+float read(RWBuffer<float> buf) {
+  return buf[0];
+}
+
+[shader("raygeneration")]
+void main() {
+  OutBuf[0] = read(u);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav.hlsl
new file mode 100644
index 0000000000..ea47281d0d
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav.hlsl
@@ -0,0 +1,17 @@
+// RUN: %dxc -E main -T lib_6_9 %s | FileCheck %s
+// REQUIRES: dxil-1-9
+
+// CHECK: !"uav1", {{.+}}, ![[TAGMD:[0-9]+]]}
+// CHECK: ![[TAGMD]] = !{i32 0, i32 9, i32 4, i1 true
+
+reordercoherent RWTexture1D<float4> uav1 : register(u3);
+RWBuffer<float4> uav2;
+
+[shader("raygeneration")]
+void main()
+{
+  reordercoherent  RWTexture1D<float4> uav3 = uav1;
+  uav3[0] = 5;
+  uav1[0] = 2;
+  uav2[1] = 3;
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav_array.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav_array.hlsl
new file mode 100644
index 0000000000..8b60c0cd67
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/attributes/reordercoherent_uav_array.hlsl
@@ -0,0 +1,16 @@
+// RUN: %dxc -E main -T lib_6_9 %s | FileCheck %s
+// REQUIRES: dxil-1-9
+
+// Make sure uav array can have reordercoherent.
+// CHECK: !{{.*}} = !{i32 1, [12 x %"class.RWTexture2D<float>"]* bitcast ([12 x %dx.types.Handle]* @"\01?tex@@3PAV?$RWTexture2D@M@@A" to [12 x %"class.RWTexture2D<float>"]*), !"tex", i32 0, i32 2, i32 12, i32 2, i1 false, i1 false, i1 false, ![[TAGMD:.*]]}
+// CHECK: ![[TAGMD]] = !{i32 0, i32 9, i32 4, i1 true}
+
+
+RWBuffer<float> OutBuf: register(u1);
+reordercoherent RWTexture2D<float> tex[12] : register(u2);
+
+[shader("raygeneration")]
+void main() {
+  int2 c = DispatchRaysIndex().xy;
+  OutBuf[0] = tex[0][c];
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl
index e6246845b3..9f7a487a05 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl
@@ -3,14 +3,34 @@
 // RUN: %dxc -T vs_6_6              -DETY=uint64_t -DCOLS=2 %s | FileCheck %s
 // RUN: %dxc -T vs_6_6              -DETY=double   -DCOLS=2 %s | FileCheck %s
 
+// RUN: %dxc -T vs_6_6              -DETY=float1    -DCOLS=4 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6              -DETY=bool1     -DCOLS=4 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6              -DETY=uint64_t1 -DCOLS=2 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6              -DETY=double1   -DCOLS=2 %s | FileCheck %s
+
+// RUN: %dxc -T vs_6_6              -DETY=float4    -DCOLS=4 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6              -DETY=bool4     -DCOLS=4 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6              -DETY=uint64_t4 -DCOLS=2 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6              -DETY=double4   -DCOLS=2 %s | FileCheck %s
+
 // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float    -DCOLS=2 -DROWS=2 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool     -DCOLS=2 -DROWS=2 %s | FileCheck %s
 // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s
 // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double   -DCOLS=2 -DROWS=2 %s | FileCheck %s
+
 // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float    -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
 // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool     -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
 // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
 // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double   -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
 
+// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float    -DCOLS=2 -DROWS=2 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double   -DCOLS=2 -DROWS=2 %s | FileCheck %s
+// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float    -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
+// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=bool     -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
+// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
+// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double   -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT
+
 // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=float    -DCOLS=4 %s | FileCheck %s
 // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=bool     -DCOLS=4 %s | FileCheck %s
 // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=uint64_t -DCOLS=2 %s | FileCheck %s
@@ -26,8 +46,6 @@
 //  for different aggregate buffer types and indices.
 ///////////////////////////////////////////////////////////////////////
 
-
-
 // CHECK: %dx.types.ResRet.[[TY:[a-z][0-9][0-9]]] = type { [[TYPE:[a-z0-9]*]],
 
 #if !defined(ATY)
@@ -68,6 +86,16 @@ struct OffVector {
   }
 };
 
+template<typename T, int N, int M>
+struct Matrix {
+  matrix<T, N, M> m;
+  Matrix operator+(Matrix mat) {
+    Matrix ret;
+    ret.m = m + mat.m;
+    return ret;
+  }
+};
+
   ByteAddressBuffer RoByBuf : register(t1);
 RWByteAddressBuffer RwByBuf : register(u1);
 
@@ -156,6 +184,8 @@ void main(uint ix[2] : IX) {
   // StructuredBuffer Tests
   // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]]
   // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[BOFF]]
+  // MAT:  call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]]
+  // MAT:  call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]]
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
@@ -163,6 +193,8 @@ void main(uint ix[2] : IX) {
   TYPE stbElt1 SS = RwStBuf.Load(ix[0]);
   // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4,
   // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[BOFF]]
+  // MAT:  call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p4]]
+  // MAT:  call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p8]]
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl
new file mode 100644
index 0000000000..03735cb968
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl
@@ -0,0 +1,162 @@
+// RUN: %dxc -DTYPE=float    -T vs_6_6 %s | FileCheck %s
+// RUN: %dxc -DTYPE=bool     -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I1
+// RUN: %dxc -DTYPE=uint64_t -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I64
+// RUN: %dxc -DTYPE=double   -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,F64
+
+// RUN: %dxc -DTYPE=float1    -T vs_6_6 %s | FileCheck %s
+// RUN: %dxc -DTYPE=bool1     -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I1
+// RUN: %dxc -DTYPE=uint64_t1 -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I64
+// RUN: %dxc -DTYPE=double1   -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,F64
+
+// Confirm that 6.9 doesn't use vector loads for scalars and vec1s
+// RUN: %dxc -DTYPE=float    -T vs_6_9 %s | FileCheck %s
+// RUN: %dxc -DTYPE=bool     -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1
+// RUN: %dxc -DTYPE=uint64_t -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I64
+// RUN: %dxc -DTYPE=double   -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,F64
+
+// RUN: %dxc -DTYPE=float1    -T vs_6_9 %s | FileCheck %s
+// RUiN: %dxc -DTYPE=bool1     -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1
+// RUN: %dxc -DTYPE=uint64_t1 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I64
+// RUN: %dxc -DTYPE=double1   -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,F64
+
+///////////////////////////////////////////////////////////////////////
+// Test codegen for various load and store operations and conversions
+//  for different scalar buffer types and confirm that the proper
+//  loads, stores, and conversion operations take place.
+///////////////////////////////////////////////////////////////////////
+
+
+// These -DAGs must match the same line. That is the only reason for the -DAG.
+// The first match will assign [[TY]] to the native type
+// For most runs, the second match will assign [[TY32]] to the same thing.
+// For 64-bit types, the memory representation is i32 and a separate variable is needed.
+// For these cases, there is another line that will always match i32.
+// This line will also force the previous -DAGs to match the same line since the most
+// This shader can produce is two ResRet types.
+// CHECK-DAG: %dx.types.ResRet.[[TY:[a-z][0-9][0-9]]] = type { [[TYPE:[a-z0-9]*]],
+// CHECK-DAG: %dx.types.ResRet.[[TY32:[a-z][0-9][0-9]]] = type { [[TYPE]],
+// I64: %dx.types.ResRet.[[TY32:i32]]
+// F64: %dx.types.ResRet.[[TY32:i32]]
+
+  ByteAddressBuffer RoByBuf : register(t1);
+RWByteAddressBuffer RwByBuf : register(u1);
+
+  StructuredBuffer< TYPE > RoStBuf : register(t2);
+RWStructuredBuffer< TYPE > RwStBuf : register(u2);
+
+  Buffer< TYPE > RoTyBuf : register(t3);
+RWBuffer< TYPE > RwTyBuf : register(u3);
+
+ConsumeStructuredBuffer<TYPE> CnStBuf : register(u4);
+AppendStructuredBuffer<TYPE> ApStBuf  : register(u5);
+
+void main(uint ix[2] : IX) {
+  // ByteAddressBuffer Tests
+
+  // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false)
+  // CHECK-DAG: [[HDLRWBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false)
+
+  // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false)
+  // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false)
+
+  // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false)
+  // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false)
+
+  // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false)
+  // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false)
+
+  // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4,
+
+  // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]]
+  // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]]
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE babElt1 = RwByBuf.Load< TYPE >(ix[0]);
+
+  // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]]
+  // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]]
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE babElt2 = RoByBuf.Load< TYPE >(ix[0]);
+
+  // I1: zext i1 %{{.*}} to i32
+  // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]]
+  RwByBuf.Store< TYPE >(ix[0], babElt1 + babElt2);
+
+  // StructuredBuffer Tests
+  // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]]
+  // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]]
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE stbElt1 = RwStBuf.Load(ix[0]);
+  // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4,
+  // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]]
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE stbElt2 = RwStBuf[ix[1]];
+
+  // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]]
+  // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]]
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE stbElt3 = RoStBuf.Load(ix[0]);
+  // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]]
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE stbElt4 = RoStBuf[ix[1]];
+
+  // I1: zext i1 %{{.*}} to i32
+  // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]]
+  RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4;
+
+  // {Append/Consume}StructuredBuffer Tests
+  // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]]
+  // CHECK: [[CONIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLCON]], i8 -1)
+  // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]]
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE cnElt = CnStBuf.Consume();
+
+  // CHECK: [[ANHDLAPP:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLAPP]]
+  // CHECK: [[APPIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLAPP]], i8 1)
+  // I1: zext i1 %{{.*}} to i32
+  // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]]
+  ApStBuf.Append(cnElt);
+
+  // TypedBuffer Tests
+  // CHECK: [[ANHDLRWTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTY]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]]
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE typElt1 = RwTyBuf.Load(ix[0]);
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX1]]
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE typElt2 = RwTyBuf[ix[1]];
+  // CHECK: [[ANHDLROTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTY]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX0]]
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE typElt3 = RoTyBuf.Load(ix[0]);
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX1]]
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE typElt4 = RoTyBuf[ix[1]];
+
+  // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102
+  // I64: trunc i64 %{{.*}} to i32
+  // I64: lshr i64  %{{.*}}, 32
+  // I64: trunc i64 %{{.*}} to i32
+  // I1: zext i1 %{{.*}} to i32
+  // CHECK: all void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]]
+  RwTyBuf[ix[0]] = typElt1 + typElt2 + typElt3 + typElt4;
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl
new file mode 100644
index 0000000000..5305ee495b
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl
@@ -0,0 +1,91 @@
+// RUN: %dxc -DTYPE=float    -DNUM=4 -T vs_6_9 %s | FileCheck %s
+// RUN: %dxc -DTYPE=bool     -DNUM=4 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1
+// RUN: %dxc -DTYPE=uint64_t -DNUM=2 -T vs_6_9 %s | FileCheck %s
+// RUN: %dxc -DTYPE=double   -DNUM=2 -T vs_6_9 %s | FileCheck %s
+
+// RUN: %dxc -DTYPE=float    -DNUM=6  -T vs_6_9 %s | FileCheck %s
+// RUN: %dxc -DTYPE=bool     -DNUM=13 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1
+// RUN: %dxc -DTYPE=uint64_t -DNUM=24 -T vs_6_9 %s | FileCheck %s
+// RUN: %dxc -DTYPE=double   -DNUM=32 -T vs_6_9 %s | FileCheck %s
+
+///////////////////////////////////////////////////////////////////////
+// Test codegen for various load and store operations and conversions
+//  for different scalar/vector buffer types and indices.
+///////////////////////////////////////////////////////////////////////
+
+// CHECK: %dx.types.ResRet.[[VTY:v[0-9]*[a-z][0-9][0-9]]] = type { <[[NUM:[0-9]*]] x [[TYPE:[a-z_0-9]*]]>, i32 }
+
+ByteAddressBuffer RoByBuf : register(t1);
+RWByteAddressBuffer RwByBuf : register(u1);
+
+StructuredBuffer<vector<TYPE, NUM> > RoStBuf : register(t2);
+RWStructuredBuffer<vector<TYPE, NUM> > RwStBuf : register(u2);
+
+ConsumeStructuredBuffer<vector<TYPE, NUM> > CnStBuf : register(u4);
+AppendStructuredBuffer<vector<TYPE, NUM> > ApStBuf  : register(u5);
+
+// CHECK-LABEL: define void @main
+[shader("vertex")]
+void main(uint ix[2] : IX) {
+  // ByteAddressBuffer Tests
+
+  // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false)
+  // CHECK-DAG: [[HDLRWBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false)
+
+  // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false)
+  // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false)
+
+  // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false)
+  // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false)
+
+  // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4,
+
+  // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]]
+  // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]]
+  // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer
+  vector<TYPE, NUM>  babElt1 = RwByBuf.Load< vector<TYPE, NUM>  >(ix[0]);
+
+  // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]]
+  // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]]
+  // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer
+  vector<TYPE, NUM>  babElt2 = RoByBuf.Load< vector<TYPE, NUM>  >(ix[0]);
+
+  // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32>
+  // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]]
+  RwByBuf.Store< vector<TYPE, NUM>  >(ix[0], babElt1 + babElt2);
+
+  // StructuredBuffer Tests
+  // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]]
+  // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]]
+  // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer
+  vector<TYPE, NUM>  stbElt1 = RwStBuf.Load(ix[0]);
+  // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4,
+  // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]]
+  // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer
+  vector<TYPE, NUM>  stbElt2 = RwStBuf[ix[1]];
+
+  // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]]
+  // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]]
+  // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer
+  vector<TYPE, NUM>  stbElt3 = RoStBuf.Load(ix[0]);
+  // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]]
+  // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer
+  vector<TYPE, NUM>  stbElt4 = RoStBuf[ix[1]];
+
+  // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32>
+  // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]]
+  RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4;
+
+  // {Append/Consume}StructuredBuffer Tests
+  // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]]
+  // CHECK: [[CONIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLCON]], i8 -1) 
+  // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]]
+  // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer
+  vector<TYPE, NUM>  cnElt = CnStBuf.Consume();
+
+  // CHECK: [[ANHDLAPP:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLAPP]]
+  // CHECK: [[APPIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLAPP]], i8 1) 
+  // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32>
+  // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]]
+  ApStBuf.Append(cnElt);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl
index ea44fef604..8dcf5ead1c 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl
@@ -27,13 +27,20 @@ RWByteAddressBuffer RwByBuf : register(u1);
   StructuredBuffer< TYPE > RoStBuf : register(t2);
 RWStructuredBuffer< TYPE > RwStBuf : register(u2);
 
-  Buffer< TYPE > RoTyBuf : register(t3);
-RWBuffer< TYPE > RwTyBuf : register(u3);
+ConsumeStructuredBuffer<TYPE> CnStBuf : register(u3);
+AppendStructuredBuffer<TYPE> ApStBuf  : register(u4);
 
-ConsumeStructuredBuffer<TYPE> CnStBuf : register(u4);
-AppendStructuredBuffer<TYPE> ApStBuf  : register(u5);
+  Buffer< TYPE > RoTyBuf : register(t5);
+RWBuffer< TYPE > RwTyBuf : register(u5);
 
-void main(uint ix[2] : IX) {
+  Texture1D< TYPE > RoTex1d : register(t6);
+RWTexture1D< TYPE > RwTex1d : register(u6);
+  Texture2D< TYPE > RoTex2d : register(t7);
+RWTexture2D< TYPE > RwTex2d : register(u7);
+  Texture3D< TYPE > RoTex3d : register(t8);
+RWTexture3D< TYPE > RwTex3d : register(u8);
+
+void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) {
   // ByteAddressBuffer Tests
 
   // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false)
@@ -42,13 +49,27 @@ void main(uint ix[2] : IX) {
   // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false)
   // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false)
 
-  // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false)
-  // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false)
+  // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false)
+  // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false)
+
+  // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 0 }, i32 5, i1 false)
+  // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false)
 
-  // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false)
-  // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false)
+  // CHECK-DAG: [[HDLROTX1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 6, i32 6, i32 0, i8 0 }, i32 6, i1 false)
+  // CHECK-DAG: [[HDLRWTX1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 6, i32 6, i32 0, i8 1 }, i32 6, i1 false)
+  // CHECK-DAG: [[HDLROTX2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 0, i8 0 }, i32 7, i1 false)
+  // CHECK-DAG: [[HDLRWTX2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 0, i8 1 }, i32 7, i1 false)
+  // CHECK-DAG: [[HDLROTX3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 0, i8 0 }, i32 8, i1 false)
+  // CHECK-DAG: [[HDLRWTX3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 0, i8 1 }, i32 8, i1 false)
 
-  // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4,
+
+  // CHECK-DAG: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0
+  // CHECK-DAG: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0
+  // CHECK-DAG: [[IX20:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 0
+  // CHECK-DAG: [[IX21:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 1
+  // CHECK-DAG: [[IX30:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 0
+  // CHECK-DAG: [[IX31:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 1
+  // CHECK-DAG: [[IX32:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 2
 
   // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]]
   // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]]
@@ -56,7 +77,7 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE babElt1 = RwByBuf.Load< TYPE >(ix[0]);
+  TYPE babElt1 = RwByBuf.Load< TYPE >(ix0);
 
   // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]]
   // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]]
@@ -64,14 +85,14 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE babElt2 = RoByBuf.Load< TYPE >(ix[0]);
+  TYPE babElt2 = RoByBuf.Load< TYPE >(ix0);
 
   // I1: zext i1 %{{.*}} to i32
   // I1: zext i1 %{{.*}} to i32
   // I1: zext i1 %{{.*}} to i32
   // I1: zext i1 %{{.*}} to i32
   // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]]
-  RwByBuf.Store< TYPE >(ix[0], babElt1 + babElt2);
+  RwByBuf.Store< TYPE >(ix0, babElt1 + babElt2);
 
   // StructuredBuffer Tests
   // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]]
@@ -80,14 +101,13 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE stbElt1 = RwStBuf.Load(ix[0]);
-  // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4,
+  TYPE stbElt1 = RwStBuf.Load(ix0);
   // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]]
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE stbElt2 = RwStBuf[ix[1]];
+  TYPE stbElt2 = RwStBuf[ix1];
 
   // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]]
   // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]]
@@ -95,20 +115,20 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE stbElt3 = RoStBuf.Load(ix[0]);
+  TYPE stbElt3 = RoStBuf.Load(ix0);
   // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]]
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE stbElt4 = RoStBuf[ix[1]];
+  TYPE stbElt4 = RoStBuf[ix1];
 
   // I1: zext i1 %{{.*}} to i32
   // I1: zext i1 %{{.*}} to i32
   // I1: zext i1 %{{.*}} to i32
   // I1: zext i1 %{{.*}} to i32
   // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]]
-  RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4;
+  RwStBuf[ix0] = stbElt1 + stbElt2 + stbElt3 + stbElt4;
 
   // {Append/Consume}StructuredBuffer Tests
   // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]]
@@ -146,7 +166,7 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE typElt1 = RwTyBuf.Load(ix[0]);
+  TYPE typElt1 = RwTyBuf.Load(ix0);
   // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX1]]
   // F64: call double @dx.op.makeDouble.f64(i32 101
   // F64: call double @dx.op.makeDouble.f64(i32 101
@@ -162,7 +182,7 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE typElt2 = RwTyBuf[ix[1]];
+  TYPE typElt2 = RwTyBuf[ix1];
   // CHECK: [[ANHDLROTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTY]]
   // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX0]]
   // F64: call double @dx.op.makeDouble.f64(i32 101
@@ -179,7 +199,7 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE typElt3 = RoTyBuf.Load(ix[0]);
+  TYPE typElt3 = RoTyBuf.Load(ix0);
   // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX1]]
   // F64: call double @dx.op.makeDouble.f64(i32 101
   // F64: call double @dx.op.makeDouble.f64(i32 101
@@ -195,7 +215,7 @@ void main(uint ix[2] : IX) {
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
   // I1: icmp ne i32 %{{.*}}, 0
-  TYPE typElt4 = RoTyBuf[ix[1]];
+  TYPE typElt4 = RoTyBuf[ix1];
 
   // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102
   // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102
@@ -210,5 +230,126 @@ void main(uint ix[2] : IX) {
   // I1: zext i1 %{{.*}} to i32
   // I1: zext i1 %{{.*}} to i32
   // CHECK: all void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]]
-  RwTyBuf[ix[0]] = typElt1 + typElt2 + typElt3 + typElt4;
+  RwTyBuf[ix0] = typElt1 + typElt2 + typElt3 + typElt4;
+
+  // Texture Tests
+  // CHECK: [[ANHDLROTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX1]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLROTX1]], i32 0, i32 [[IX0]], i32 undef, i32 undef
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE texElt1 = RoTex1d[ix0];
+  // CHECK: [[ANHDLRWTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX1]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX1]], i32 undef, i32 [[IX0]], i32 undef, i32 undef
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE texElt2 = RwTex1d[ix0];
+
+  // CHECK: [[ANHDLROTX2:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX2]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLROTX2]], i32 0, i32 [[IX20]], i32 [[IX21]], i32 undef
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE texElt3 = RoTex2d[ix2];
+  // CHECK: [[ANHDLRWTX2:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX2]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX2]], i32 undef, i32 [[IX20]], i32 [[IX21]], i32 undef
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE texElt4 = RwTex2d[ix2];
+
+  // CHECK: [[ANHDLROTX3:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX3]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLROTX3]], i32 0, i32 [[IX30]], i32 [[IX31]], i32 [[IX32]]
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE texElt5 = RoTex3d[ix3];
+  // CHECK: [[ANHDLRWTX3:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX3]]
+  // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX3]], i32 undef, i32 [[IX30]], i32 [[IX31]], i32 [[IX32]]
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // F64: call double @dx.op.makeDouble.f64(i32 101
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: zext i32 %{{.*}} to i64
+  // I64: shl nuw i64
+  // I64: or i64
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  // I1: icmp ne i32 %{{.*}}, 0
+  TYPE texElt6 = RwTex3d[ix3];
+
+  // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102
+  // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102
+  // I64: trunc i64 %{{.*}} to i32
+  // lshr i64  %{{.*}}, 32
+  // I64: trunc i64 %{{.*}} to i32
+  // I64: trunc i64 %{{.*}} to i32
+  // lshr i64  %{{.*}}, 32
+  // I64: trunc i64 %{{.*}} to i32
+  // I1: zext i1 %{{.*}} to i32
+  // I1: zext i1 %{{.*}} to i32
+  // I1: zext i1 %{{.*}} to i32
+  // I1: zext i1 %{{.*}} to i32
+  // CHECK: call void @dx.op.textureStore.[[TY32]](i32 67, %dx.types.Handle [[ANHDLRWTX3]], i32 [[IX30]], i32 [[IX31]], i32 [[IX32]]
+  RwTex3d[ix3] = texElt1 + texElt2 + texElt3 + texElt4 + texElt5 + texElt6;
 }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.hlsl
new file mode 100644
index 0000000000..7cd54e0387
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.hlsl
@@ -0,0 +1,152 @@
+// RUN: %dxc -fcgl  -T vs_6_6 %s | FileCheck %s
+
+// Source file for DxilGen IR test for buffer load lowering
+// Much of this mirrors buffer-load-store and buffer-agg-load-store
+
+template<typename T, int N>
+struct Vector {
+  float4 pad1;
+  double pad2;
+  vector<T, N> v;
+  Vector operator+(Vector vec) {
+    Vector ret;
+    ret.pad1 = 0.0;
+    ret.pad2 = 0.0;
+    ret.v = v + vec.v;
+    return ret;
+  }
+};
+
+template<typename T, int N, int M>
+struct Matrix {
+  float4 pad1;
+  matrix<T, N, M> m;
+  Matrix operator+(Matrix mat) {
+    Matrix ret;
+    ret.m = m + mat.m;
+    return ret;
+  }
+};
+
+RWByteAddressBuffer                        BabBuf : register(u1);
+RWStructuredBuffer< float2 >               VecBuf : register(u2);
+  StructuredBuffer< float[2] >             ArrBuf : register(t3);
+  StructuredBuffer< Vector<float, 2> >    SVecBuf : register(t4);
+  StructuredBuffer< float2x2 >             MatBuf : register(t5);
+  StructuredBuffer< Matrix<float, 2, 2> > SMatBuf : register(t6);
+
+void main(uint ix0 : IX0) {
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 0
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  bool2  Bab0  = BabBuf.Load< bool2 >(ix0 + 0);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 1
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Bab1  = (float2)BabBuf.Load< float[2] >(ix0 + 1);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 2
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Bab2  = BabBuf.Load< Vector<float,2> >(ix0 + 2).v;
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 3
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call %class.matrix.float.2.2 @"dx.hl.op.ro.%class.matrix.float.2.2 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Bab3  = BabBuf.Load< float2x2 >(ix0 + 3)[1];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 4
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: [[MSS:%.*]] = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Bab4  = BabBuf.Load< Matrix<float,2,2> >(ix0 + 4).m[1];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 5
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32 277, %dx.types.Handle [[ANHDL]], i32 [[IX]], <2 x float>
+  BabBuf.Store< float2 >(ix0+5, select(Bab0, Bab1+Bab2, Bab3+Bab4));
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 0
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Sld0 = VecBuf.Load(ix0 + 0);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 1
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<float [2]>\22)"(i32 0, %"class.StructuredBuffer<float [2]>"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer<float [2]>" undef)
+  // CHECK: call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Sld1 = (float2)ArrBuf.Load(ix0 + 1);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 2
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.StructuredBuffer<Vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer<Vector<float, 2> >" undef)
+  // CHECK: call %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Sld2 = SVecBuf.Load(ix0 + 2).v;
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 3
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<matrix<float, 2, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer<matrix<float, 2, 2> >" undef)
+  // CHECK: call %class.matrix.float.2.2 @"dx.hl.op.ro.%class.matrix.float.2.2 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Sld3 = MatBuf.Load(ix0 + 3)[1];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 4
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<Matrix<float, 2, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer<Matrix<float, 2, 2> >" undef)
+  // CHECK: [[MSS:%.*]] = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  // CHECK: [[GEP:%.*]] = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* [[MSS]], i32 0, i32 1
+  // CHECK: call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* [[GEP]], i32 1, i32 3)
+  float2 Sld4 = SMatBuf.Load(ix0 + 4).m[1];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 5
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  VecBuf[ix0+5] = select(Sld0, Sld1+Sld2, Sld3+Sld4);
+  
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 6
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]]
+  float2 Sss0 = VecBuf[ix0 + 6];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 7
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<float [2]>\22)"(i32 0, %"class.StructuredBuffer<float [2]>"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer<float [2]>" undef)
+  // CHECK: call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Sss1 = (float2)ArrBuf[ix0 + 7];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 8
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.StructuredBuffer<Vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer<Vector<float, 2> >" undef)
+  // CHECK: call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Sss2 = SVecBuf[ix0 + 8].v;
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 9
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<matrix<float, 2, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer<matrix<float, 2, 2> >" undef)
+  // CHECK: [[SS:%.*]] = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  // CHECK: call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* [[SS]], i32 1, i32 3)
+  float2 Sss3 = MatBuf[ix0 + 9][1];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 10
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<Matrix<float, 2, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer<Matrix<float, 2, 2> >" undef)
+  // CHECK: [[MSS:%.*]] = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  // CHECK: [[GEP:%.*]] = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* [[MSS]], i32 0, i32 1
+  // CHECK: call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* [[GEP]], i32 1, i32 3)
+  float2 Sss4 = SMatBuf[ix0 + 10].m[1];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 11
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  VecBuf[ix0+11] = select(Sss0, Sss1+Sss2, Sss3+Sss4);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.ll
new file mode 100644
index 0000000000..6b01120f7b
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load.ll
@@ -0,0 +1,404 @@
+; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
+
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%struct.RWByteAddressBuffer = type { i32 }
+%"class.RWStructuredBuffer<vector<float, 2> >" = type { <2 x float> }
+%"class.StructuredBuffer<float [2]>" = type { [2 x float] }
+%"class.StructuredBuffer<Vector<float, 2> >" = type { %"struct.Vector<float, 2>" }
+%"struct.Vector<float, 2>" = type { <4 x float>, double, <2 x float> }
+%"class.StructuredBuffer<matrix<float, 2, 2> >" = type { %class.matrix.float.2.2 }
+%class.matrix.float.2.2 = type { [2 x <2 x float>] }
+%"class.StructuredBuffer<Matrix<float, 2, 2> >" = type { %"struct.Matrix<float, 2, 2>" }
+%"struct.Matrix<float, 2, 2>" = type { <4 x float>, %class.matrix.float.2.2 }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+
+@"\01?BabBuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4
+@"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.RWStructuredBuffer<vector<float, 2> >", align 4
+@"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A" = external global %"class.StructuredBuffer<float [2]>", align 4
+@"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.StructuredBuffer<Vector<float, 2> >", align 8
+@"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.StructuredBuffer<matrix<float, 2, 2> >", align 4
+@"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.StructuredBuffer<Matrix<float, 2, 2> >", align 4
+
+; Function Attrs: nounwind
+define void @main(i32 %ix0) #0 {
+  %1 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A"
+
+  ; Booleans require some conversion after being loaded
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %10, i32 %7, i32 undef, i8 3, i32 4)
+  ; CHECK: [[EL0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0
+  ; CHECK: [[EL1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1
+  ; CHECK: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[EL0]], i64 0
+  ; CHECK: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[EL1]], i64 1
+  ; CHECK: {{%.*}} = icmp ne <2 x i32> [[VEC1]], zeroinitializer
+  %2 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %1)
+  %3 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer)
+  %4 = call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %3, i32 %ix0)
+  %5 = zext <2 x i1> %4 to <2 x i32>
+  %6 = add i32 %ix0, 1
+  %7 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A"
+
+  ; Array loads do so one element at a time.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  %8 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %7)
+  %9 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %8, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer)
+  %10 = call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %9, i32 %6)
+
+  %11 = getelementptr inbounds [2 x float], [2 x float]* %10, i32 0, i32 0
+  %12 = load float, float* %11
+  %13 = getelementptr inbounds [2 x float], [2 x float]* %10, i32 0, i32 1
+  %14 = load float, float* %13
+  %15 = insertelement <2 x float> undef, float %12, i32 0
+  %16 = insertelement <2 x float> %15, float %14, i32 1
+  %17 = add i32 %ix0, 3
+  %18 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A"
+
+  ; Vector inside a struct is a simple load.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 3, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1
+  %19 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %18)
+  %20 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %19, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer)
+  %21 = call %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %20, i32 %17)
+  %22 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %21, i32 0, i32 2
+  %23 = load <2 x float>, <2 x float>* %22, align 4
+  %24 = add i32 %ix0, 4
+  %25 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A"
+
+  ; 2x2 matrix loads the full storage vector and converts the orientation.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 15, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 2
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 3
+  %26 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %25)
+  %27 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %26, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer)
+  %28 = call <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %27, i32 %24)
+  %row2col = shufflevector <4 x float> %28, <4 x float> %28, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  %29 = shufflevector <4 x float> %row2col, <4 x float> %row2col, <2 x i32> <i32 1, i32 3>
+  %30 = add i32 %ix0, 5
+  %31 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A"
+
+  ; Matrix struct members get their elements extracted with individual loads on account of already dealing with GEPs
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 undef, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  %32 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %31)
+  %33 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %32, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer)
+  %34 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %33, i32 %30)
+  %35 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %34, i32 0, i32 1
+  %36 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %35, i32 1, i32 3)
+  %37 = load <2 x float>, <2 x float>* %36
+  %38 = fadd <2 x float> %29, %37
+  %39 = fadd <2 x float> %16, %23
+  %40 = icmp ne <2 x i32> %5, zeroinitializer
+  %41 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %40, <2 x float> %39, <2 x float> %38)
+  %42 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A"
+
+  %43 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %42)
+  %44 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %43, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer)
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32 277, %dx.types.Handle %44, i32 %ix0, <2 x float> %41)
+  %45 = load %"class.RWStructuredBuffer<vector<float, 2> >", %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A"
+
+  ; Normal vector. Standard load.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 3, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1
+  %46 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" %45)
+  %47 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %46, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" zeroinitializer)
+  %48 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %47, i32 %ix0)
+  %49 = add i32 %ix0, 1
+  %50 = load %"class.StructuredBuffer<float [2]>", %"class.StructuredBuffer<float [2]>"* @"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A"
+
+  ; Array loads do so one element at a time.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<float [2]>"(i32 160, %"class.StructuredBuffer<float [2]>"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 4, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  %51 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<float [2]>\22)"(i32 0, %"class.StructuredBuffer<float [2]>" %50)
+  %52 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle %51, %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer<float [2]>" zeroinitializer)
+  %53 = call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %52, i32 %49)
+  %54 = getelementptr inbounds [2 x float], [2 x float]* %53, i32 0, i32 0
+  %55 = load float, float* %54
+  %56 = getelementptr inbounds [2 x float], [2 x float]* %53, i32 0, i32 1
+  %57 = load float, float* %56
+  %58 = insertelement <2 x float> undef, float %55, i32 0
+  %59 = insertelement <2 x float> %58, float %57, i32 1
+  %60 = add i32 %ix0, 3
+  %61 = load %"class.StructuredBuffer<Vector<float, 2> >", %"class.StructuredBuffer<Vector<float, 2> >"* @"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A"
+
+  ; Vector inside a struct is a simple load.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<Vector<float, 2> >"(i32 160, %"class.StructuredBuffer<Vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 24, i8 3, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1
+  %62 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.StructuredBuffer<Vector<float, 2> >" %61)
+  %63 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle %62, %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer<Vector<float, 2> >" zeroinitializer)
+  %64 = call %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %63, i32 %60)
+  %65 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %64, i32 0, i32 2
+  %66 = load <2 x float>, <2 x float>* %65, align 4
+  %67 = add i32 %ix0, 4
+  %68 = load %"class.StructuredBuffer<matrix<float, 2, 2> >", %"class.StructuredBuffer<matrix<float, 2, 2> >"* @"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A"
+
+  ; 2x2 matrix loads the full storage vector and converts the orientation.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<matrix<float, 2, 2> >"(i32 160, %"class.StructuredBuffer<matrix<float, 2, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 15, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 2
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 3
+  %69 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<matrix<float, 2, 2> >" %68)
+  %70 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %69, %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer<matrix<float, 2, 2> >" zeroinitializer)
+  %71 = call <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %70, i32 %67)
+  %row2col1 = shufflevector <4 x float> %71, <4 x float> %71, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  %72 = shufflevector <4 x float> %row2col1, <4 x float> %row2col1, <2 x i32> <i32 1, i32 3>
+  %73 = add i32 %ix0, 5
+  %74 = load %"class.StructuredBuffer<Matrix<float, 2, 2> >", %"class.StructuredBuffer<Matrix<float, 2, 2> >"* @"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A"
+
+  ; Matrix struct members get their elements extracted with individual loads on account of already dealing with GEPs
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<Matrix<float, 2, 2> >"(i32 160, %"class.StructuredBuffer<Matrix<float, 2, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 20, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 28, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  %75 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<Matrix<float, 2, 2> >" %74)
+  %76 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %75, %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer<Matrix<float, 2, 2> >" zeroinitializer)
+  %77 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %76, i32 %73)
+  %78 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %77, i32 0, i32 1
+  %79 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %78, i32 1, i32 3)
+  %80 = load <2 x float>, <2 x float>* %79
+  %81 = fadd <2 x float> %72, %80
+  %82 = fadd <2 x float> %59, %66
+  %83 = fcmp une <2 x float> %48, zeroinitializer
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDL]]
+  %84 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %83, <2 x float> %82, <2 x float> %81)
+  %85 = load %"class.RWStructuredBuffer<vector<float, 2> >", %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A"
+
+  ; Normal vector. Standard load.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 3, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1
+  %86 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" %85)
+  %87 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %86, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" zeroinitializer)
+  %88 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %87, i32 %ix0)
+  store <2 x float> %84, <2 x float>* %88
+  %89 = load %"class.RWStructuredBuffer<vector<float, 2> >", %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A"
+
+  %90 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" %89)
+  %91 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %90, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" zeroinitializer)
+  %92 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %91, i32 %ix0)
+  %93 = load <2 x float>, <2 x float>* %92
+  %94 = add i32 %ix0, 1
+  %95 = load %"class.StructuredBuffer<float [2]>", %"class.StructuredBuffer<float [2]>"* @"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A"
+
+  ; Array loads do so one element at a time.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<float [2]>"(i32 160, %"class.StructuredBuffer<float [2]>"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 12, i32 8 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 0, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 4, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  %96 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<float [2]>\22)"(i32 0, %"class.StructuredBuffer<float [2]>" %95)
+  %97 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle %96, %dx.types.ResourceProperties { i32 12, i32 8 }, %"class.StructuredBuffer<float [2]>" zeroinitializer)
+  %98 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %97, i32 %94)
+  %99 = getelementptr inbounds [2 x float], [2 x float]* %98, i32 0, i32 0
+  %100 = load float, float* %99
+  %101 = getelementptr inbounds [2 x float], [2 x float]* %98, i32 0, i32 1
+  %102 = load float, float* %101
+  %103 = insertelement <2 x float> undef, float %100, i32 0
+  %104 = insertelement <2 x float> %103, float %102, i32 1
+  %105 = add i32 %ix0, 3
+  %106 = load %"class.StructuredBuffer<Vector<float, 2> >", %"class.StructuredBuffer<Vector<float, 2> >"* @"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A"
+
+  ; Vector inside a struct is a simple load.
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<Vector<float, 2> >"(i32 160, %"class.StructuredBuffer<Vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 780, i32 32 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 24, i8 3, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 1
+  %107 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.StructuredBuffer<Vector<float, 2> >" %106)
+  %108 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle %107, %dx.types.ResourceProperties { i32 780, i32 32 }, %"class.StructuredBuffer<Vector<float, 2> >" zeroinitializer)
+  %109 = call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %108, i32 %105)
+  %110 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %109, i32 0, i32 2
+  %111 = load <2 x float>, <2 x float>* %110, align 4
+  %112 = add i32 %ix0, 4
+  %113 = load %"class.StructuredBuffer<matrix<float, 2, 2> >", %"class.StructuredBuffer<matrix<float, 2, 2> >"* @"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A"
+
+  ; Subscripted matrices get their elements extracted with individual loads on account of already dealing with GEPs
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<matrix<float, 2, 2> >"(i32 160, %"class.StructuredBuffer<matrix<float, 2, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 16 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 4, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 12, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  %114 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<matrix<float, 2, 2> >" %113)
+  %115 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %114, %dx.types.ResourceProperties { i32 524, i32 16 }, %"class.StructuredBuffer<matrix<float, 2, 2> >" zeroinitializer)
+  %116 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %115, i32 %112)
+  %117 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %116, i32 1, i32 3)
+  %118 = load <2 x float>, <2 x float>* %117
+  %119 = add i32 %ix0, 5
+  %120 = load %"class.StructuredBuffer<Matrix<float, 2, 2> >", %"class.StructuredBuffer<Matrix<float, 2, 2> >"* @"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A"
+
+  ; Matrix struct members get their elements extracted with individual loads on account of already dealing with GEPs
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.StructuredBuffer<Matrix<float, 2, 2> >"(i32 160, %"class.StructuredBuffer<Matrix<float, 2, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 524, i32 32 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 20, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDL]], i32 {{%.*}}, i32 28, i8 1, i32 4)
+  ; CHECK: {{%.*}} = extractvalue %dx.types.ResRet.f32 [[LD]], 0
+  %121 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.StructuredBuffer<Matrix<float, 2, 2> >" %120)
+  %122 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %121, %dx.types.ResourceProperties { i32 524, i32 32 }, %"class.StructuredBuffer<Matrix<float, 2, 2> >" zeroinitializer)
+  %123 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %122, i32 %119)
+  %124 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %123, i32 0, i32 1
+  %125 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32 1, %class.matrix.float.2.2* %124, i32 1, i32 3)
+  %126 = load <2 x float>, <2 x float>* %125
+  %127 = fadd <2 x float> %118, %126
+  %128 = fadd <2 x float> %104, %111
+  %129 = fcmp une <2 x float> %93, zeroinitializer
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDL]]
+  %130 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %129, <2 x float> %128, <2 x float> %127)
+  %131 = add i32 %ix0, 1
+  %132 = load %"class.RWStructuredBuffer<vector<float, 2> >", %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A"
+
+  %133 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" %132)
+  %134 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %133, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" zeroinitializer)
+  %135 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %134, i32 %131)
+  store <2 x float> %130, <2 x float>* %135
+  ret void
+}
+
+declare <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.2.2*, i32, i32)"(i32, %class.matrix.float.2.2*, i32, i32) #1
+declare <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #1
+declare [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32, %dx.types.Handle, i32, <2 x float>) #0
+declare <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32, <2 x i1>, <2 x float>, <2 x float>) #1
+declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32, %"class.RWStructuredBuffer<vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<float [2]>\22)"(i32, %"class.StructuredBuffer<float [2]>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<float [2]>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer<float [2]>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32, %"class.StructuredBuffer<Vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer<Vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %"class.StructuredBuffer<matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer<matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %"class.StructuredBuffer<Matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.StructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.StructuredBuffer<Matrix<float, 2, 2> >") #1
+declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!3}
+!dx.valver = !{!4}
+!dx.shaderModel = !{!5}
+!dx.typeAnnotations = !{!6, !43}
+!dx.entryPoints = !{!50}
+!dx.fnprops = !{!63}
+!dx.options = !{!64, !65}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!2 = !{!"dxc(private) 1.8.0.4807 (longvec_bab_ldst, 88cfe61c3-dirty)"}
+!3 = !{i32 1, i32 6}
+!4 = !{i32 1, i32 9}
+!5 = !{!"vs", i32 6, i32 6}
+!6 = !{i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" undef, !7, %"class.StructuredBuffer<float [2]>" undef, !12, %"class.StructuredBuffer<Vector<float, 2> >" undef, !16, %"struct.Vector<float, 2>" undef, !21, %"class.StructuredBuffer<matrix<float, 2, 2> >" undef, !29, %"class.StructuredBuffer<Matrix<float, 2, 2> >" undef, !35, %"struct.Matrix<float, 2, 2>" undef, !39}
+!7 = !{i32 8, !8, !9}
+!8 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9}
+!9 = !{i32 0, !10}
+!10 = !{!11}
+!11 = !{i32 0, <2 x float> undef}
+!12 = !{i32 20, !8, !13}
+!13 = !{i32 0, !14}
+!14 = !{!15}
+!15 = !{i32 0, [2 x float] undef}
+!16 = !{i32 32, !17, !18}
+!17 = !{i32 6, !"h", i32 3, i32 0}
+!18 = !{i32 0, !19}
+!19 = !{!20}
+!20 = !{i32 0, %"struct.Vector<float, 2>" undef}
+!21 = !{i32 32, !22, !23, !24, !25}
+!22 = !{i32 6, !"pad1", i32 3, i32 0, i32 7, i32 9}
+!23 = !{i32 6, !"pad2", i32 3, i32 16, i32 7, i32 10}
+!24 = !{i32 6, !"v", i32 3, i32 24, i32 7, i32 9}
+!25 = !{i32 0, !26}
+!26 = !{!27, !28}
+!27 = !{i32 0, float undef}
+!28 = !{i32 1, i64 2}
+!29 = !{i32 24, !30, !32}
+!30 = !{i32 6, !"h", i32 2, !31, i32 3, i32 0, i32 7, i32 9}
+!31 = !{i32 2, i32 2, i32 2}
+!32 = !{i32 0, !33}
+!33 = !{!34}
+!34 = !{i32 0, %class.matrix.float.2.2 undef}
+!35 = !{i32 40, !17, !36}
+!36 = !{i32 0, !37}
+!37 = !{!38}
+!38 = !{i32 0, %"struct.Matrix<float, 2, 2>" undef}
+!39 = !{i32 40, !22, !40, !41}
+!40 = !{i32 6, !"m", i32 2, !31, i32 3, i32 16, i32 7, i32 9}
+!41 = !{i32 0, !42}
+!42 = !{!27, !28, !28}
+!43 = !{i32 1, void (i32)* @main, !44}
+!44 = !{!45, !47}
+!45 = !{i32 1, !46, !46}
+!46 = !{}
+!47 = !{i32 0, !48, !49}
+!48 = !{i32 4, !"IX0", i32 7, i32 5}
+!49 = !{i32 0}
+!50 = !{void (i32)* @main, !"main", null, !51, null}
+!51 = !{!52, !60, null, null}
+!52 = !{!53, !55, !57, !59}
+!53 = !{i32 0, %"class.StructuredBuffer<float [2]>"* @"\01?ArrBuf@@3V?$StructuredBuffer@$$BY01M@@A", !"ArrBuf", i32 0, i32 3, i32 1, i32 12, i32 0, !54}
+!54 = !{i32 1, i32 8}
+!55 = !{i32 1, %"class.StructuredBuffer<Vector<float, 2> >"* @"\01?SVecBuf@@3V?$StructuredBuffer@U?$Vector@M$01@@@@A", !"SVecBuf", i32 0, i32 4, i32 1, i32 12, i32 0, !56}
+!56 = !{i32 1, i32 32}
+!57 = !{i32 2, %"class.StructuredBuffer<matrix<float, 2, 2> >"* @"\01?MatBuf@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A", !"MatBuf", i32 0, i32 5, i32 1, i32 12, i32 0, !58}
+!58 = !{i32 1, i32 16}
+!59 = !{i32 3, %"class.StructuredBuffer<Matrix<float, 2, 2> >"* @"\01?SMatBuf@@3V?$StructuredBuffer@U?$Matrix@M$01$01@@@@A", !"SMatBuf", i32 0, i32 6, i32 1, i32 12, i32 0, !56}
+!60 = !{!61, !62}
+!61 = !{i32 0, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A", !"BabBuf", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!62 = !{i32 1, %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A", !"VecBuf", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !54}
+!63 = !{void (i32)* @main, i32 1}
+!64 = !{i32 64}
+!65 = !{i32 -1}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.hlsl
new file mode 100644
index 0000000000..fa070ceca5
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.hlsl
@@ -0,0 +1,192 @@
+// RUN: %dxc -fcgl  -T vs_6_6 %s | FileCheck %s
+
+// Source file for DxilGen IR test for buffer store lowering
+
+template<typename T, int N>
+struct Vector {
+  float4 pad1;
+  double pad2;
+  vector<T, N> v;
+  Vector operator+(Vector vec) {
+    Vector ret;
+    ret.pad1 = 0.0;
+    ret.pad2 = 0.0;
+    ret.v = v + vec.v;
+    return ret;
+  }
+};
+
+template<typename T, int N, int M>
+struct Matrix {
+  float4 pad1;
+  matrix<T, N, M> m;
+  Matrix operator+(Matrix mat) {
+    Matrix ret;
+    ret.m = m + mat.m;
+    return ret;
+  }
+};
+
+RWByteAddressBuffer                        BabBuf : register(u1);
+RWStructuredBuffer< float2 >               VecBuf : register(u2);
+RWStructuredBuffer< float[2] >             ArrBuf : register(u3);
+RWStructuredBuffer< Vector<float, 2> >    SVecBuf : register(u4);
+RWStructuredBuffer< float2x2 >             MatBuf : register(u5);
+RWStructuredBuffer< Matrix<float, 2, 2> > SMatBuf : register(u6);
+
+ConsumeStructuredBuffer< float2 >               CVecBuf : register(u7);
+ConsumeStructuredBuffer< float[2] >             CArrBuf : register(u8);
+ConsumeStructuredBuffer< Vector<float, 2> >    CSVecBuf : register(u9);
+ConsumeStructuredBuffer< float2x2 >             CMatBuf : register(u10);
+ConsumeStructuredBuffer< Matrix<float, 2, 2> > CSMatBuf : register(u11);
+
+AppendStructuredBuffer< float2 >               AVecBuf : register(u12);
+AppendStructuredBuffer< float[2] >             AArrBuf : register(u13);
+AppendStructuredBuffer< Vector<float, 2> >    ASVecBuf : register(u14);
+AppendStructuredBuffer< float2x2 >             AMatBuf : register(u15);
+AppendStructuredBuffer< Matrix<float, 2, 2> > ASMatBuf : register(u16);
+
+void main(uint ix0 : IX0) {
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 0
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x i1>)"(i32 277, %dx.types.Handle [[anhdl]], i32 [[ix]], <2 x i1>
+  BabBuf.Store<bool2>(ix0 + 1, BabBuf.Load< bool2 >(ix0 + 0));
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, [2 x float]*)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]], [2 x float]
+  BabBuf.Store<float[2]>(ix0 + 2, BabBuf.Load< float[2] >(ix0 + 1));
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, %\22struct.Vector<float, 2>\22*)"(i32 277, %dx.types.Handle [[anhdl]], i32 [[ix]], %"struct.Vector<float, 2>"
+  BabBuf.Store<Vector<float,2> >(ix0 + 3, BabBuf.Load< Vector<float,2> >(ix0 + 2));
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call %class.matrix.float.2.2 @"dx.hl.op.ro.%class.matrix.float.2.2 (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, %class.matrix.float.2.2)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]], %class.matrix.float.2.2
+  BabBuf.Store<float2x2>(ix0 + 4, BabBuf.Load< float2x2 >(ix0 + 3));
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: [[MSS:%.*]] = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 5
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %struct.RWByteAddressBuffer
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
+  // CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, %\22struct.Matrix<float, 2, 2>\22*)"(i32 277, %dx.types.Handle [[anhdl]], i32 [[ix]], %"struct.Matrix<float, 2, 2>"
+  BabBuf.Store<Matrix<float,2,2> >(ix0 + 5, BabBuf.Load< Matrix<float,2,2> >(ix0 + 4));
+
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 0
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]]
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 {{[0-9]*}}, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  VecBuf[ix0 + 1] = VecBuf[ix0 + 0];
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<float [2]>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<float [2]>" undef)
+  // CHECK: call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 1
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<float [2]>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<float [2]>" undef)
+  // CHECK: call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  ArrBuf[ix0 + 2] = ArrBuf[ix0 + 1];
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<Vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer<Vector<float, 2> >" undef)
+  // CHECK: call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 2
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<Vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer<Vector<float, 2> >" undef)
+  // CHECK: call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  SVecBuf[ix0 + 3] = SVecBuf[ix0 + 2];
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer<matrix<float, 2, 2> >" undef)
+  // CHECK: [[SS:%.*]] = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 3
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer<matrix<float, 2, 2> >" undef)
+  // CHECK: call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  MatBuf[ix0 + 4] = MatBuf[ix0 + 3];
+
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 5
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >" undef)
+  // CHECK: [[MSS:%.*]] = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ix:%.*]] = add i32 {{%.*}}, 4
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 {{[0-9]*}}, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >" undef)
+  // CHECK: call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 {{[0-9]*}}, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  SMatBuf[ix0 + 5] = SMatBuf[ix0 + 4];
+
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: [[cn:%.*]] = call <2 x float> @"dx.hl.op..consume<2 x float> (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]])
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer<vector<float, 2> >" undef)
+  // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, <2 x float>)"(i32 226, %dx.types.Handle [[anhdl]], <2 x float> [[cn]])
+  AVecBuf.Append(CVecBuf.Consume());
+
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<float [2]>\22)"(i32 0, %"class.ConsumeStructuredBuffer<float [2]>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer<float [2]>" undef)
+  // CHECK: [[cn:%.*]] = call [2 x float]* @"dx.hl.op..consume[2 x float]* (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]])
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<float [2]>\22)"(i32 0, %"class.AppendStructuredBuffer<float [2]>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer<float [2]>" undef)
+  // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, [2 x float]*)"(i32 226, %dx.types.Handle [[anhdl]], [2 x float]*
+  AArrBuf.Append(CArrBuf.Consume());
+
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<Vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.ConsumeStructuredBuffer<Vector<float, 2> >" undef)
+  // CHECK: [[cn:%.*]] = call %"struct.Vector<float, 2>"* @"dx.hl.op..consume%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]])
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<Vector<float, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.AppendStructuredBuffer<Vector<float, 2> >" undef)
+  // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, %\22struct.Vector<float, 2>\22*)"(i32 226, %dx.types.Handle [[anhdl]], %"struct.Vector<float, 2>"*
+  ASVecBuf.Append(CSVecBuf.Consume());
+
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >" undef)
+  // CHECK: [[cn:%.*]] = call %class.matrix.float.2.2 @"dx.hl.op..consume%class.matrix.float.2.2 (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]])
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >" undef)
+  // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, %class.matrix.float.2.2)"(i32 226, %dx.types.Handle [[anhdl]], %class.matrix.float.2.2 [[cn]])
+  AMatBuf.Append(CMatBuf.Consume());
+
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >" undef)
+  // CHECK: [[cn:%.*]] = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.op..consume%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle)"(i32 283, %dx.types.Handle [[anhdl]])
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >" undef)
+  // CHECK: call void @"dx.hl.op..appendvoid (i32, %dx.types.Handle, %\22struct.Matrix<float, 2, 2>\22*)"(i32 226, %dx.types.Handle [[anhdl]], %"struct.Matrix<float, 2, 2>"*
+  ASMatBuf.Append(CSMatBuf.Consume());
+
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.ll
new file mode 100644
index 0000000000..540ab85819
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-store.ll
@@ -0,0 +1,822 @@
+; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%struct.RWByteAddressBuffer = type { i32 }
+%"class.RWStructuredBuffer<vector<float, 2> >" = type { <2 x float> }
+%"class.RWStructuredBuffer<float [2]>" = type { [2 x float] }
+%"class.RWStructuredBuffer<Vector<float, 2> >" = type { %"struct.Vector<float, 2>" }
+%"struct.Vector<float, 2>" = type { <4 x float>, double, <2 x float> }
+%"class.RWStructuredBuffer<matrix<float, 2, 2> >" = type { %class.matrix.float.2.2 }
+%class.matrix.float.2.2 = type { [2 x <2 x float>] }
+%"class.RWStructuredBuffer<Matrix<float, 2, 2> >" = type { %"struct.Matrix<float, 2, 2>" }
+%"struct.Matrix<float, 2, 2>" = type { <4 x float>, %class.matrix.float.2.2 }
+%"class.ConsumeStructuredBuffer<vector<float, 2> >" = type { <2 x float> }
+%"class.ConsumeStructuredBuffer<float [2]>" = type { [2 x float] }
+%"class.ConsumeStructuredBuffer<Vector<float, 2> >" = type { %"struct.Vector<float, 2>" }
+%"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >" = type { %class.matrix.float.2.2 }
+%"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >" = type { %"struct.Matrix<float, 2, 2>" }
+%"class.AppendStructuredBuffer<vector<float, 2> >" = type { <2 x float> }
+%"class.AppendStructuredBuffer<float [2]>" = type { [2 x float] }
+%"class.AppendStructuredBuffer<Vector<float, 2> >" = type { %"struct.Vector<float, 2>" }
+%"class.AppendStructuredBuffer<matrix<float, 2, 2> >" = type { %class.matrix.float.2.2 }
+%"class.AppendStructuredBuffer<Matrix<float, 2, 2> >" = type { %"struct.Matrix<float, 2, 2>" }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+
+@"\01?BabBuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4
+@"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.RWStructuredBuffer<vector<float, 2> >", align 4
+@"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A" = external global %"class.RWStructuredBuffer<float [2]>", align 4
+@"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.RWStructuredBuffer<Vector<float, 2> >", align 8
+@"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.RWStructuredBuffer<matrix<float, 2, 2> >", align 4
+@"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.RWStructuredBuffer<Matrix<float, 2, 2> >", align 4
+@"\01?CVecBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.ConsumeStructuredBuffer<vector<float, 2> >", align 4
+@"\01?CArrBuf@@3V?$ConsumeStructuredBuffer@$$BY01M@@A" = external global %"class.ConsumeStructuredBuffer<float [2]>", align 4
+@"\01?CSVecBuf@@3V?$ConsumeStructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.ConsumeStructuredBuffer<Vector<float, 2> >", align 8
+@"\01?CMatBuf@@3V?$ConsumeStructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >", align 4
+@"\01?CSMatBuf@@3V?$ConsumeStructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >", align 4
+@"\01?AVecBuf@@3V?$AppendStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.AppendStructuredBuffer<vector<float, 2> >", align 4
+@"\01?AArrBuf@@3V?$AppendStructuredBuffer@$$BY01M@@A" = external global %"class.AppendStructuredBuffer<float [2]>", align 4
+@"\01?ASVecBuf@@3V?$AppendStructuredBuffer@U?$Vector@M$01@@@@A" = external global %"class.AppendStructuredBuffer<Vector<float, 2> >", align 8
+@"\01?AMatBuf@@3V?$AppendStructuredBuffer@V?$matrix@M$01$01@@@@A" = external global %"class.AppendStructuredBuffer<matrix<float, 2, 2> >", align 4
+@"\01?ASMatBuf@@3V?$AppendStructuredBuffer@U?$Matrix@M$01$01@@@@A" = external global %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >", align 4
+
+; CHECK-LABEL: define void @main(i32 %ix0)
+; Function Attrs: nounwind
+define void @main(i32 %ix0) #0 {
+bb:
+  ; CHECK: [[pix:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle [[anhdl]], i32 [[pix]], i32 undef, i8 3, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1
+  ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer
+
+  %tmp = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:60 col:32
+  %tmp1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp) ; line:60 col:32
+  %tmp2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:60 col:32
+  %tmp3 = call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp2, i32 %ix0) ; line:60 col:32
+
+  ; CHECK: [[stix:%.*]] = add i32 [[pix]], 1
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[vec2:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32>
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec2]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec2]], i64 1
+  ; CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, i32 [[val0]], i32 [[val1]], i32 undef, i32 undef, i8 3, i32 4)
+  %tmp4 = add i32 %ix0, 1 ; line:60 col:27
+  %tmp5 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:60 col:3
+  %tmp6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp5) ; line:60 col:3
+  %tmp7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:60 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x i1>)"(i32 277, %dx.types.Handle %tmp7, i32 %tmp4, <2 x i1> %tmp3) ; line:60 col:3
+
+  ; CHECK: [[ix:%.*]] = add i32 [[pix]], 1
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[lix:%.*]] = add i32 0, [[ix]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 1, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[lix:%.*]] = add i32 4, [[ix]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 1, i32 4)
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+
+  %tmp8 = add i32 %ix0, 1 ; line:70 col:63
+  %tmp9 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:70 col:35
+  %tmp10 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp9) ; line:70 col:35
+  %tmp11 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp10, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:70 col:35
+  %tmp12 = call [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp11, i32 %tmp8) ; line:70 col:35
+  %tmp13 = getelementptr inbounds [2 x float], [2 x float]* %tmp12, i32 0, i32 0 ; line:70 col:3
+  %tmp14 = load float, float* %tmp13 ; line:70 col:3
+  %tmp15 = getelementptr inbounds [2 x float], [2 x float]* %tmp12, i32 0, i32 1 ; line:70 col:3
+  %tmp16 = load float, float* %tmp15 ; line:70 col:3
+
+
+  ; CHECK: [[ix:%.*]] = add i32 [[pix]], 2
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float undef, float undef, float undef, i8 1, i32 4)
+  ; CHECK: [[stix:%.*]] = add i32 [[ix]], 4
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val1]], float undef, float undef, float undef, i8 1, i32 4)
+
+  %tmp17 = add i32 %ix0, 2 ; line:70 col:30
+  %tmp18 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:70 col:3
+  %tmp19 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp18) ; line:70 col:3
+  %tmp20 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp19, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:70 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %tmp20, i32 %tmp17, float %tmp14) ; line:70 col:3
+  %tmp21 = add i32 %tmp17, 4 ; line:70 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %tmp20, i32 %tmp21, float %tmp16) ; line:70 col:3
+
+  ; CHECK: [[ix:%.*]] = add i32 [[pix]], 2
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[lix:%.*]] = add i32 0, [[ix]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[lix:%.*]] = add i32 16, [[ix]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 1, i32 4)
+  ; CHECK: [[dval:%.*]] = extractvalue %dx.types.ResRet.f64 [[ld]], 0
+  ; CHECK: [[lix:%.*]] = add i32 24, [[ix]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 3, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1
+  %tmp22 = add i32 %ix0, 2 ; line:80 col:78
+  %tmp23 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:80 col:43
+  %tmp24 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp23) ; line:80 col:43
+  %tmp25 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp24, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:80 col:43
+  %tmp26 = call %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp25, i32 %tmp22) ; line:80 col:43
+  %tmp27 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp26, i32 0, i32 0 ; line:80 col:3
+  %tmp28 = load <4 x float>, <4 x float>* %tmp27 ; line:80 col:3
+  %tmp29 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp26, i32 0, i32 1 ; line:80 col:3
+  %tmp30 = load double, double* %tmp29 ; line:80 col:3
+  %tmp31 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp26, i32 0, i32 2 ; line:80 col:3
+  %tmp32 = load <2 x float>, <2 x float>* %tmp31 ; line:80 col:3
+
+  ; CHECK: [[ix:%.*]] = add i32 [[pix]], 3
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  ; CHECK: [[stix:%.*]] = add i32 [[ix]], 16
+  ; CHECK: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, double [[dval]]
+  ; CHECK: [[stix:%.*]] = add i32 [[ix]], 24
+  ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4)
+  %tmp33 = add i32 %ix0, 3 ; line:80 col:38
+  %tmp34 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:80 col:3
+  %tmp35 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp34) ; line:80 col:3
+  %tmp36 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp35, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:80 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp36, i32 %tmp33, <4 x float> %tmp28) ; line:80 col:3
+  %tmp37 = add i32 %tmp33, 16 ; line:80 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, double)"(i32 277, %dx.types.Handle %tmp36, i32 %tmp37, double %tmp30) ; line:80 col:3
+  %tmp38 = add i32 %tmp33, 24 ; line:80 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32 277, %dx.types.Handle %tmp36, i32 %tmp38, <2 x float> %tmp32) ; line:80 col:3
+
+
+  ; CHECK: [[lix:%.*]] = add i32 [[pix]], 3
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[rvec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  %tmp39 = add i32 %ix0, 3 ; line:90 col:63
+  %tmp40 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:90 col:35
+  %tmp41 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp40) ; line:90 col:35
+  %tmp42 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp41, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:90 col:35
+  %tmp43 = call <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp42, i32 %tmp39) ; line:90 col:35
+
+  ; CHECK: [[stix:%.*]] = add i32 [[pix]], 4
+  ; CHECK: [[cvec4:%.*]] = shufflevector <4 x float> [[rvec4]], <4 x float> [[rvec4]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[cvec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[cvec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[cvec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[cvec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  %tmp44 = add i32 %ix0, 4 ; line:90 col:30
+  %row2col = shufflevector <4 x float> %tmp43, <4 x float> %tmp43, <4 x i32> <i32 0, i32 2, i32 1, i32 3> ; line:90 col:3
+  %tmp45 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:90 col:3
+  %tmp46 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp45) ; line:90 col:3
+  %tmp47 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp46, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:90 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp47, i32 %tmp44, <4 x float> %row2col) ; line:90 col:3
+
+
+  ; CHECK: [[ix:%.*]] = add i32 [[pix]], 4
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[lix:%.*]] = add i32 0, [[ix]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[lix:%.*]] = add i32 16, [[ix]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[lix]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[mat:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  %tmp48 = add i32 %ix0, 4 ; line:100 col:82
+  %tmp49 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:100 col:45
+  %tmp50 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp49) ; line:100 col:45
+  %tmp51 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp50, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:100 col:45
+  %tmp52 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp51, i32 %tmp48) ; line:100 col:45
+  %tmp53 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp52, i32 0, i32 0 ; line:100 col:3
+  %tmp54 = load <4 x float>, <4 x float>* %tmp53 ; line:100 col:3
+  %tmp55 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp52, i32 0, i32 1 ; line:100 col:3
+  %tmp56 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp55) ; line:100 col:3
+
+  ; CHECK: [[ix:%.*]] = add i32 [[pix]], 5
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4107, i32 0 })
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  ; CHECK: [[stix:%.*]] = add i32 [[ix]], 16
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[mat]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[mat]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[mat]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[mat]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  %tmp57 = add i32 %ix0, 5 ; line:100 col:40
+  %tmp58 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A" ; line:100 col:3
+  %tmp59 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp58) ; line:100 col:3
+  %tmp60 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp59, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer) ; line:100 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp60, i32 %tmp57, <4 x float> %tmp54) ; line:100 col:3
+  %tmp61 = add i32 %tmp57, 16 ; line:100 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp60, i32 %tmp61, <4 x float> %tmp56) ; line:100 col:3
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[pix]], i32 0, i8 3, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1
+  %tmp62 = load %"class.RWStructuredBuffer<vector<float, 2> >", %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" ; line:111 col:21
+  %tmp63 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" %tmp62) ; line:111 col:21
+  %tmp64 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp63, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" zeroinitializer) ; line:111 col:21
+  %tmp65 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp64, i32 %ix0) ; line:111 col:21
+  %tmp66 = load <2 x float>, <2 x float>* %tmp65 ; line:111 col:21
+
+  ; CHECK: [[stix:%.*]] = add i32 [[pix]], 1
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+  ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4)
+  %tmp67 = add i32 %ix0, 1 ; line:111 col:14
+  %tmp68 = load %"class.RWStructuredBuffer<vector<float, 2> >", %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" ; line:111 col:3
+  %tmp69 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" %tmp68) ; line:111 col:3
+  %tmp70 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp69, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<vector<float, 2> >" zeroinitializer) ; line:111 col:3
+  %tmp71 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp70, i32 %tmp67) ; line:111 col:3
+  store <2 x float> %tmp66, <2 x float>* %tmp71 ; line:111 col:19
+
+
+  ; CHECK: [[stix:%.*]] = add i32 [[pix]], 2
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<float [2]>"(i32 160, %"class.RWStructuredBuffer<float [2]>"
+  ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+  ; CHECK: [[lix:%.*]] = add i32 [[pix]], 1
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<float [2]>"(i32 160, %"class.RWStructuredBuffer<float [2]>"
+  ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4108, i32 8 })
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 1, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float undef, float undef, float undef, i8 1, i32 4)
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 4, i8 1, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 4, float [[val0]], float undef, float undef, float undef, i8 1, i32 4)
+  %tmp72 = add i32 %ix0, 2 ; line:121 col:14
+  %tmp73 = load %"class.RWStructuredBuffer<float [2]>", %"class.RWStructuredBuffer<float [2]>"* @"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A" ; line:121 col:3
+  %tmp74 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 0, %"class.RWStructuredBuffer<float [2]>" %tmp73) ; line:121 col:3
+  %tmp75 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle %tmp74, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<float [2]>" zeroinitializer) ; line:121 col:3
+  %tmp76 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp75, i32 %tmp72) ; line:121 col:3
+  %tmp77 = add i32 %ix0, 1 ; line:121 col:32
+  %tmp78 = load %"class.RWStructuredBuffer<float [2]>", %"class.RWStructuredBuffer<float [2]>"* @"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A" ; line:121 col:21
+  %tmp79 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 0, %"class.RWStructuredBuffer<float [2]>" %tmp78) ; line:121 col:21
+  %tmp80 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle %tmp79, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer<float [2]>" zeroinitializer) ; line:121 col:21
+  %tmp81 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp80, i32 %tmp77) ; line:121 col:21
+  %tmp82 = getelementptr inbounds [2 x float], [2 x float]* %tmp76, i32 0, i32 0 ; line:121 col:21
+  %tmp83 = getelementptr inbounds [2 x float], [2 x float]* %tmp81, i32 0, i32 0 ; line:121 col:21
+  %tmp84 = load float, float* %tmp83 ; line:121 col:21
+  store float %tmp84, float* %tmp82 ; line:121 col:21
+  %tmp85 = getelementptr inbounds [2 x float], [2 x float]* %tmp76, i32 0, i32 1 ; line:121 col:21
+  %tmp86 = getelementptr inbounds [2 x float], [2 x float]* %tmp81, i32 0, i32 1 ; line:121 col:21
+  %tmp87 = load float, float* %tmp86 ; line:121 col:21
+  store float %tmp87, float* %tmp85 ; line:121 col:21
+
+
+  ; CHECK: [[stix:%.*]] = add i32 [[pix]], 3
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<Vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<Vector<float, 2> >"
+  ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 })
+  ; CHECK: [[lix:%.*]] = add i32 [[pix]], 2
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<Vector<float, 2> >"(i32 160, %"class.RWStructuredBuffer<Vector<float, 2> >"
+  ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4876, i32 32 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 16, i8 1, i32 8)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f64 [[ld]], 0
+  ; CHECK: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 16, double [[val0]], double undef, double undef, double undef, i8 1, i32 8)
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 24, i8 3, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 24, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4)
+  %tmp88 = add i32 %ix0, 3 ; line:131 col:15
+  %tmp89 = load %"class.RWStructuredBuffer<Vector<float, 2> >", %"class.RWStructuredBuffer<Vector<float, 2> >"* @"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A" ; line:131 col:3
+  %tmp90 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<Vector<float, 2> >" %tmp89) ; line:131 col:3
+  %tmp91 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp90, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer<Vector<float, 2> >" zeroinitializer) ; line:131 col:3
+  %tmp92 = call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp91, i32 %tmp88) ; line:131 col:3
+  %tmp93 = add i32 %ix0, 2 ; line:131 col:34
+  %tmp94 = load %"class.RWStructuredBuffer<Vector<float, 2> >", %"class.RWStructuredBuffer<Vector<float, 2> >"* @"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A" ; line:131 col:22
+  %tmp95 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<Vector<float, 2> >" %tmp94) ; line:131 col:22
+  %tmp96 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp95, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.RWStructuredBuffer<Vector<float, 2> >" zeroinitializer) ; line:131 col:22
+  %tmp97 = call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp96, i32 %tmp93) ; line:131 col:22
+  %tmp98 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp92, i32 0, i32 0 ; line:131 col:22
+  %tmp99 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp97, i32 0, i32 0 ; line:131 col:22
+  %tmp100 = load <4 x float>, <4 x float>* %tmp99 ; line:131 col:22
+  store <4 x float> %tmp100, <4 x float>* %tmp98 ; line:131 col:22
+  %tmp101 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp92, i32 0, i32 1 ; line:131 col:22
+  %tmp102 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp97, i32 0, i32 1 ; line:131 col:22
+  %tmp103 = load double, double* %tmp102 ; line:131 col:22
+  store double %tmp103, double* %tmp101 ; line:131 col:22
+  %tmp104 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp92, i32 0, i32 2 ; line:131 col:22
+  %tmp105 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp97, i32 0, i32 2 ; line:131 col:22
+  %tmp106 = load <2 x float>, <2 x float>* %tmp105 ; line:131 col:22
+  store <2 x float> %tmp106, <2 x float>* %tmp104 ; line:131 col:22
+
+
+  ; CHECK: [[stix:%.*]] = add i32 [[pix]], 4
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<matrix<float, 2, 2> >"(i32 160, %"class.RWStructuredBuffer<matrix<float, 2, 2> >"
+  ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 })
+  ; CHECK: [[lix:%.*]] = add i32 [[pix]], 3
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<matrix<float, 2, 2> >"(i32 160, %"class.RWStructuredBuffer<matrix<float, 2, 2> >"
+  ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 16 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  %tmp107 = add i32 %ix0, 4 ; line:141 col:14
+  %tmp108 = load %"class.RWStructuredBuffer<matrix<float, 2, 2> >", %"class.RWStructuredBuffer<matrix<float, 2, 2> >"* @"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:141 col:3
+  %tmp109 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<matrix<float, 2, 2> >" %tmp108) ; line:141 col:3
+  %tmp110 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp109, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer<matrix<float, 2, 2> >" zeroinitializer) ; line:141 col:3
+  %tmp111 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp110, i32 %tmp107) ; line:141 col:3
+  %tmp112 = add i32 %ix0, 3 ; line:141 col:32
+  %tmp113 = load %"class.RWStructuredBuffer<matrix<float, 2, 2> >", %"class.RWStructuredBuffer<matrix<float, 2, 2> >"* @"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:141 col:21
+  %tmp114 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<matrix<float, 2, 2> >" %tmp113) ; line:141 col:21
+  %tmp115 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp114, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.RWStructuredBuffer<matrix<float, 2, 2> >" zeroinitializer) ; line:141 col:21
+  %tmp116 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp115, i32 %tmp112) ; line:141 col:21
+  %tmp117 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp116) ; line:141 col:21
+  %tmp118 = call <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp111, <4 x float> %tmp117) ; line:141 col:19
+
+
+  ; CHECK: [[stix:%.*]] = add i32 [[pix]], 5
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<Matrix<float, 2, 2> >"(i32 160, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >"
+  ; CHECK: [[sthdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 })
+  ; CHECK: [[lix:%.*]] = add i32 [[pix]], 4
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<Matrix<float, 2, 2> >"(i32 160, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >"
+  ; CHECK: [[ldhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4620, i32 32 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ldhdl]], i32 [[lix]], i32 16, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[sthdl]], i32 [[stix]], i32 16, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  %tmp119 = add i32 %ix0, 5 ; line:151 col:15
+  %tmp120 = load %"class.RWStructuredBuffer<Matrix<float, 2, 2> >", %"class.RWStructuredBuffer<Matrix<float, 2, 2> >"* @"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:151 col:3
+  %tmp121 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >" %tmp120) ; line:151 col:3
+  %tmp122 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp121, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >" zeroinitializer) ; line:151 col:3
+  %tmp123 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp122, i32 %tmp119) ; line:151 col:3
+  %tmp124 = add i32 %ix0, 4 ; line:151 col:34
+  %tmp125 = load %"class.RWStructuredBuffer<Matrix<float, 2, 2> >", %"class.RWStructuredBuffer<Matrix<float, 2, 2> >"* @"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:151 col:22
+  %tmp126 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >" %tmp125) ; line:151 col:22
+  %tmp127 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp126, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >" zeroinitializer) ; line:151 col:22
+  %tmp128 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp127, i32 %tmp124) ; line:151 col:22
+  %tmp129 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp123, i32 0, i32 0 ; line:151 col:22
+  %tmp130 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp128, i32 0, i32 0 ; line:151 col:22
+  %tmp131 = load <4 x float>, <4 x float>* %tmp130 ; line:151 col:22
+  store <4 x float> %tmp131, <4 x float>* %tmp129 ; line:151 col:22
+  %tmp132 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp123, i32 0, i32 1 ; line:151 col:22
+  %tmp133 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp128, i32 0, i32 1 ; line:151 col:22
+  %tmp134 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp133) ; line:151 col:22
+  %tmp135 = call <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp132, <4 x float> %tmp134) ; line:151 col:22
+
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer<vector<float, 2> >"(i32 160, %"class.ConsumeStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1)
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 3, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1
+  %tmp136 = load %"class.ConsumeStructuredBuffer<vector<float, 2> >", %"class.ConsumeStructuredBuffer<vector<float, 2> >"* @"\01?CVecBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$01@@@@A" ; line:159 col:18
+  %tmp137 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<vector<float, 2> >" %tmp136) ; line:159 col:18
+  %tmp138 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp137, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer<vector<float, 2> >" zeroinitializer) ; line:159 col:18
+  %tmp139 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp138) #0 ; line:159 col:18
+  %tmp140 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp138, i32 %tmp139) #0 ; line:159 col:18
+  %tmp141 = load <2 x float>, <2 x float>* %tmp140 ; line:159 col:18
+
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer<vector<float, 2> >"(i32 160, %"class.AppendStructuredBuffer<vector<float, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1)
+  ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4)
+  %tmp142 = load %"class.AppendStructuredBuffer<vector<float, 2> >", %"class.AppendStructuredBuffer<vector<float, 2> >"* @"\01?AVecBuf@@3V?$AppendStructuredBuffer@V?$vector@M$01@@@@A" ; line:159 col:3
+  %tmp143 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<vector<float, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<vector<float, 2> >" %tmp142) ; line:159 col:3
+  %tmp144 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp143, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer<vector<float, 2> >" zeroinitializer) ; line:159 col:3
+  %tmp145 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp144) #0 ; line:159 col:3
+  %tmp146 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp144, i32 %tmp145) #0 ; line:159 col:3
+  store <2 x float> %tmp141, <2 x float>* %tmp146 ; line:159 col:3
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer<float [2]>"(i32 160, %"class.ConsumeStructuredBuffer<float [2]>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1)
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 1, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 4, i8 1, i32 4)
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+
+  %tmp147 = load %"class.ConsumeStructuredBuffer<float [2]>", %"class.ConsumeStructuredBuffer<float [2]>"* @"\01?CArrBuf@@3V?$ConsumeStructuredBuffer@$$BY01M@@A" ; line:167 col:18
+  %tmp148 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<float [2]>\22)"(i32 0, %"class.ConsumeStructuredBuffer<float [2]>" %tmp147) ; line:167 col:18
+  %tmp149 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle %tmp148, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.ConsumeStructuredBuffer<float [2]>" zeroinitializer) ; line:167 col:18
+  %tmp150 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp149) #0 ; line:167 col:18
+  %tmp151 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp149, i32 %tmp150) #0 ; line:167 col:18
+  %tmp152 = getelementptr inbounds [2 x float], [2 x float]* %tmp151, i32 0, i32 0 ; line:167 col:3
+  %tmp153 = load float, float* %tmp152 ; line:167 col:3
+  %tmp154 = getelementptr inbounds [2 x float], [2 x float]* %tmp151, i32 0, i32 1 ; line:167 col:3
+  %tmp155 = load float, float* %tmp154 ; line:167 col:3
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer<float [2]>"(i32 160, %"class.AppendStructuredBuffer<float [2]>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 36876, i32 8 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1)
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float undef, float undef, float undef, i8 1, i32 4)
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 4, float [[val1]], float undef, float undef, float undef, i8 1, i32 4)
+
+  %tmp156 = load %"class.AppendStructuredBuffer<float [2]>", %"class.AppendStructuredBuffer<float [2]>"* @"\01?AArrBuf@@3V?$AppendStructuredBuffer@$$BY01M@@A" ; line:167 col:3
+  %tmp157 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<float [2]>\22)"(i32 0, %"class.AppendStructuredBuffer<float [2]>" %tmp156) ; line:167 col:3
+  %tmp158 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<float [2]>\22)"(i32 14, %dx.types.Handle %tmp157, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.AppendStructuredBuffer<float [2]>" zeroinitializer) ; line:167 col:3
+  %tmp159 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp158) #0 ; line:167 col:3
+  %tmp160 = call [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp158, i32 %tmp159) #0 ; line:167 col:3
+  %tmp161 = getelementptr inbounds [2 x float], [2 x float]* %tmp160, i32 0, i32 0 ; line:167 col:3
+  store float %tmp153, float* %tmp161 ; line:167 col:3
+  %tmp162 = getelementptr inbounds [2 x float], [2 x float]* %tmp160, i32 0, i32 1 ; line:167 col:3
+  store float %tmp155, float* %tmp162 ; line:167 col:3
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer<Vector<float, 2> >"(i32 160, %"class.ConsumeStructuredBuffer<Vector<float, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37644, i32 32 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1)
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, i8 1, i32 8)
+  ; CHECK: [[dval:%.*]] = extractvalue %dx.types.ResRet.f64 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 24, i8 3, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec2:%.*]] = insertelement <2 x float> [[ping]], float [[val1]], i64 1
+
+  %tmp163 = load %"class.ConsumeStructuredBuffer<Vector<float, 2> >", %"class.ConsumeStructuredBuffer<Vector<float, 2> >"* @"\01?CSVecBuf@@3V?$ConsumeStructuredBuffer@U?$Vector@M$01@@@@A" ; line:175 col:19
+  %tmp164 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<Vector<float, 2> >" %tmp163) ; line:175 col:19
+  %tmp165 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp164, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.ConsumeStructuredBuffer<Vector<float, 2> >" zeroinitializer) ; line:175 col:19
+  %tmp166 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp165) #0 ; line:175 col:19
+  %tmp167 = call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp165, i32 %tmp166) #0 ; line:175 col:19
+  %tmp168 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp167, i32 0, i32 0 ; line:175 col:3
+  %tmp169 = load <4 x float>, <4 x float>* %tmp168 ; line:175 col:3
+  %tmp170 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp167, i32 0, i32 1 ; line:175 col:3
+  %tmp171 = load double, double* %tmp170 ; line:175 col:3
+  %tmp172 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp167, i32 0, i32 2 ; line:175 col:3
+  %tmp173 = load <2 x float>, <2 x float>* %tmp172 ; line:175 col:3
+
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer<Vector<float, 2> >"(i32 160, %"class.AppendStructuredBuffer<Vector<float, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37644, i32 32 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1)
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  ; CHECK: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, double [[dval]], double undef, double undef, double undef, i8 1, i32 8)
+  ; CHECK: [[val0:%.*]] = extractelement <2 x float> [[vec2]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x float> [[vec2]], i64 1
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 24, float [[val0]], float [[val1]], float undef, float undef, i8 3, i32 4)
+  %tmp174 = load %"class.AppendStructuredBuffer<Vector<float, 2> >", %"class.AppendStructuredBuffer<Vector<float, 2> >"* @"\01?ASVecBuf@@3V?$AppendStructuredBuffer@U?$Vector@M$01@@@@A" ; line:175 col:3
+  %tmp175 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<Vector<float, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<Vector<float, 2> >" %tmp174) ; line:175 col:3
+  %tmp176 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<Vector<float, 2> >\22)"(i32 14, %dx.types.Handle %tmp175, %dx.types.ResourceProperties { i32 4876, i32 32 }, %"class.AppendStructuredBuffer<Vector<float, 2> >" zeroinitializer) ; line:175 col:3
+  %tmp177 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp176) #0 ; line:175 col:3
+  %tmp178 = call %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp176, i32 %tmp177) #0 ; line:175 col:3
+  %tmp179 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp178, i32 0, i32 0 ; line:175 col:3
+  store <4 x float> %tmp169, <4 x float>* %tmp179 ; line:175 col:3
+  %tmp180 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp178, i32 0, i32 1 ; line:175 col:3
+  store double %tmp171, double* %tmp180 ; line:175 col:3
+  %tmp181 = getelementptr inbounds %"struct.Vector<float, 2>", %"struct.Vector<float, 2>"* %tmp178, i32 0, i32 2 ; line:175 col:3
+  store <2 x float> %tmp173, <2 x float>* %tmp181 ; line:175 col:3
+
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer<matrix<float, 2, 2> >"(i32 160, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 16 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1)
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[rvec4:%.*]] = shufflevector <4 x float> [[vec4]], <4 x float> [[vec4]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  %tmp182 = load %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >", %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >"* @"\01?CMatBuf@@3V?$ConsumeStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:183 col:18
+  %tmp183 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >" %tmp182) ; line:183 col:18
+  %tmp184 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp183, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >" zeroinitializer) ; line:183 col:18
+  %tmp185 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp184) #0 ; line:183 col:18
+  %tmp186 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp184, i32 %tmp185) #0 ; line:183 col:18
+  %tmp187 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp186) ; line:183 col:18
+  %col2row10 = shufflevector <4 x float> %tmp187, <4 x float> %tmp187, <4 x i32> <i32 0, i32 2, i32 1, i32 3> ; line:183 col:18
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer<matrix<float, 2, 2> >"(i32 160, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 16 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1)
+  ; CHECK: [[cvec4:%.*]] = shufflevector <4 x float> [[rvec4]], <4 x float> [[rvec4]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[cvec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[cvec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[cvec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[cvec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+
+  %tmp188 = load %"class.AppendStructuredBuffer<matrix<float, 2, 2> >", %"class.AppendStructuredBuffer<matrix<float, 2, 2> >"* @"\01?AMatBuf@@3V?$AppendStructuredBuffer@V?$matrix@M$01$01@@@@A" ; line:183 col:3
+  %tmp189 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >" %tmp188) ; line:183 col:3
+  %tmp190 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp189, %dx.types.ResourceProperties { i32 4620, i32 16 }, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >" zeroinitializer) ; line:183 col:3
+  %tmp191 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp190) #0 ; line:183 col:3
+  %tmp192 = call %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp190, i32 %tmp191) #0 ; line:183 col:3
+  %row2col11 = shufflevector <4 x float> %col2row10, <4 x float> %col2row10, <4 x i32> <i32 0, i32 2, i32 1, i32 3> ; line:183 col:3
+  call void @"dx.hl.matldst.colStore.void (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp192, <4 x float> %row2col11) ; line:183 col:3
+
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >"(i32 160, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 32 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 -1)
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[vec4:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <4 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[ping:%.*]] = insertelement <4 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[mat:%.*]] = insertelement <4 x float> [[ping]], float [[val3]], i64 3
+  %tmp193 = load %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >", %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >"* @"\01?CSMatBuf@@3V?$ConsumeStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:191 col:19
+  %tmp194 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >" %tmp193) ; line:191 col:19
+  %tmp195 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp194, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >" zeroinitializer) ; line:191 col:19
+  %tmp196 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 281, %dx.types.Handle %tmp195) #0 ; line:191 col:19
+  %tmp197 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp195, i32 %tmp196) #0 ; line:191 col:19
+  %tmp198 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp197, i32 0, i32 0 ; line:191 col:3
+  %tmp199 = load <4 x float>, <4 x float>* %tmp198 ; line:191 col:3
+  %tmp200 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp197, i32 0, i32 1 ; line:191 col:3
+  %tmp201 = call <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32 0, %class.matrix.float.2.2* %tmp200) ; line:191 col:3
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.AppendStructuredBuffer<Matrix<float, 2, 2> >"(i32 160, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 37388, i32 32 })
+  ; CHECK: [[ct:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[anhdl]], i8 1)
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[vec4]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[vec4]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[vec4]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[vec4]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+  ; CHECK: [[val0:%.*]] = extractelement <4 x float> [[mat]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <4 x float> [[mat]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <4 x float> [[mat]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <4 x float> [[mat]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[anhdl]], i32 [[ct]], i32 16, float [[val0]], float [[val1]], float [[val2]], float [[val3]]
+
+  %tmp202 = load %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >", %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >"* @"\01?ASMatBuf@@3V?$AppendStructuredBuffer@U?$Matrix@M$01$01@@@@A" ; line:191 col:3
+  %tmp203 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 0, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >" %tmp202) ; line:191 col:3
+  %tmp204 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32 14, %dx.types.Handle %tmp203, %dx.types.ResourceProperties { i32 4620, i32 32 }, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >" zeroinitializer) ; line:191 col:3
+  %tmp205 = call i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32 282, %dx.types.Handle %tmp204) #0 ; line:191 col:3
+  %tmp206 = call %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp204, i32 %tmp205) #0 ; line:191 col:3
+  %tmp207 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp206, i32 0, i32 0 ; line:191 col:3
+  store <4 x float> %tmp199, <4 x float>* %tmp207 ; line:191 col:3
+  %tmp208 = getelementptr inbounds %"struct.Matrix<float, 2, 2>", %"struct.Matrix<float, 2, 2>"* %tmp206, i32 0, i32 1 ; line:191 col:3
+  %tmp209 = call <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32 1, %class.matrix.float.2.2* %tmp208, <4 x float> %tmp201) ; line:191 col:3
+
+
+  ; CHECK: ret void
+  ret void ; line:193 col:1
+}
+
+declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x i1>)"(i32, %dx.types.Handle, i32, <2 x i1>) #0
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #1
+declare <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare [2 x float]* @"dx.hl.op.ro.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare %"struct.Vector<float, 2>"* @"dx.hl.op.ro.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare %"struct.Matrix<float, 2, 2>"* @"dx.hl.op.ro.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32, %"class.RWStructuredBuffer<vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<float, 2> >") #1
+declare [2 x float]* @"dx.hl.subscript.[].rn.[2 x float]* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<float [2]>\22)"(i32, %"class.RWStructuredBuffer<float [2]>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<float [2]>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<float [2]>") #1
+declare %"struct.Vector<float, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Vector<float, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32, %"class.RWStructuredBuffer<Vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<Vector<float, 2> >") #1
+declare %class.matrix.float.2.2* @"dx.hl.subscript.[].rn.%class.matrix.float.2.2* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %"class.RWStructuredBuffer<matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<matrix<float, 2, 2> >") #1
+declare %"struct.Matrix<float, 2, 2>"* @"dx.hl.subscript.[].rn.%\22struct.Matrix<float, 2, 2>\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >") #1
+declare i32 @"dx.hl.op..i32 (i32, %dx.types.Handle)"(i32, %dx.types.Handle) #0
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<vector<float, 2> >\22)"(i32, %"class.AppendStructuredBuffer<vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer<vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<vector<float, 2> >\22)"(i32, %"class.ConsumeStructuredBuffer<vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer<vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<float [2]>\22)"(i32, %"class.AppendStructuredBuffer<float [2]>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<float [2]>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer<float [2]>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<float [2]>\22)"(i32, %"class.ConsumeStructuredBuffer<float [2]>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<float [2]>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer<float [2]>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<Vector<float, 2> >\22)"(i32, %"class.AppendStructuredBuffer<Vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<Vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer<Vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<Vector<float, 2> >\22)"(i32, %"class.ConsumeStructuredBuffer<Vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<Vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer<Vector<float, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.AppendStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.AppendStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >") #1
+declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32, %dx.types.Handle, i32, float) #0
+declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32, %dx.types.Handle, i32, <4 x float>) #0
+declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, double)"(i32, %dx.types.Handle, i32, double) #0
+declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <2 x float>)"(i32, %dx.types.Handle, i32, <2 x float>) #0
+declare <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare <4 x float> @"dx.hl.matldst.colLoad.<4 x float> (i32, %class.matrix.float.2.2*)"(i32, %class.matrix.float.2.2*) #2
+declare <4 x float> @"dx.hl.matldst.colStore.<4 x float> (i32, %class.matrix.float.2.2*, <4 x float>)"(i32, %class.matrix.float.2.2*, <4 x float>) #0
+declare void @"dx.hl.matldst.colStore.void (i32, %class.matrix.float.2.2*, <4 x float>)"(i32, %class.matrix.float.2.2*, <4 x float>) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!3}
+!dx.valver = !{!4}
+!dx.shaderModel = !{!5}
+!dx.typeAnnotations = !{!6, !43}
+!dx.entryPoints = !{!50}
+!dx.fnprops = !{!72}
+!dx.options = !{!73, !74}
+
+!3 = !{i32 1, i32 6}
+!4 = !{i32 1, i32 9}
+!5 = !{!"vs", i32 6, i32 6}
+!6 = !{i32 0, %"class.RWStructuredBuffer<vector<float, 2> >" undef, !7, %"class.RWStructuredBuffer<float [2]>" undef, !12, %"class.RWStructuredBuffer<Vector<float, 2> >" undef, !16, %"struct.Vector<float, 2>" undef, !21, %"class.RWStructuredBuffer<matrix<float, 2, 2> >" undef, !29, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >" undef, !35, %"struct.Matrix<float, 2, 2>" undef, !39, %"class.ConsumeStructuredBuffer<vector<float, 2> >" undef, !7, %"class.ConsumeStructuredBuffer<float [2]>" undef, !12, %"class.ConsumeStructuredBuffer<Vector<float, 2> >" undef, !16, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >" undef, !29, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >" undef, !35, %"class.AppendStructuredBuffer<vector<float, 2> >" undef, !7, %"class.AppendStructuredBuffer<float [2]>" undef, !12, %"class.AppendStructuredBuffer<Vector<float, 2> >" undef, !16, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >" undef, !29, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >" undef, !35}
+!7 = !{i32 8, !8, !9}
+!8 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9}
+!9 = !{i32 0, !10}
+!10 = !{!11}
+!11 = !{i32 0, <2 x float> undef}
+!12 = !{i32 20, !8, !13}
+!13 = !{i32 0, !14}
+!14 = !{!15}
+!15 = !{i32 0, [2 x float] undef}
+!16 = !{i32 32, !17, !18}
+!17 = !{i32 6, !"h", i32 3, i32 0}
+!18 = !{i32 0, !19}
+!19 = !{!20}
+!20 = !{i32 0, %"struct.Vector<float, 2>" undef}
+!21 = !{i32 32, !22, !23, !24, !25}
+!22 = !{i32 6, !"pad1", i32 3, i32 0, i32 7, i32 9}
+!23 = !{i32 6, !"pad2", i32 3, i32 16, i32 7, i32 10}
+!24 = !{i32 6, !"v", i32 3, i32 24, i32 7, i32 9}
+!25 = !{i32 0, !26}
+!26 = !{!27, !28}
+!27 = !{i32 0, float undef}
+!28 = !{i32 1, i64 2}
+!29 = !{i32 24, !30, !32}
+!30 = !{i32 6, !"h", i32 2, !31, i32 3, i32 0, i32 7, i32 9}
+!31 = !{i32 2, i32 2, i32 2}
+!32 = !{i32 0, !33}
+!33 = !{!34}
+!34 = !{i32 0, %class.matrix.float.2.2 undef}
+!35 = !{i32 40, !17, !36}
+!36 = !{i32 0, !37}
+!37 = !{!38}
+!38 = !{i32 0, %"struct.Matrix<float, 2, 2>" undef}
+!39 = !{i32 40, !22, !40, !41}
+!40 = !{i32 6, !"m", i32 2, !31, i32 3, i32 16, i32 7, i32 9}
+!41 = !{i32 0, !42}
+!42 = !{!27, !28, !28}
+!43 = !{i32 1, void (i32)* @main, !44}
+!44 = !{!45, !47}
+!45 = !{i32 1, !46, !46}
+!46 = !{}
+!47 = !{i32 0, !48, !49}
+!48 = !{i32 4, !"IX0", i32 7, i32 5}
+!49 = !{i32 0}
+!50 = !{void (i32)* @main, !"main", null, !51, null}
+!51 = !{null, !52, null, null}
+!52 = !{!53, !54, !56, !57, !59, !61, !62, !63, !64, !65, !66, !67, !68, !69, !70, !71}
+!53 = !{i32 0, %struct.RWByteAddressBuffer* @"\01?BabBuf@@3URWByteAddressBuffer@@A", !"BabBuf", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!54 = !{i32 1, %"class.RWStructuredBuffer<vector<float, 2> >"* @"\01?VecBuf@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A", !"VecBuf", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !55}
+!55 = !{i32 1, i32 8}
+!56 = !{i32 2, %"class.RWStructuredBuffer<float [2]>"* @"\01?ArrBuf@@3V?$RWStructuredBuffer@$$BY01M@@A", !"ArrBuf", i32 0, i32 3, i32 1, i32 12, i1 false, i1 false, i1 false, !55}
+!57 = !{i32 3, %"class.RWStructuredBuffer<Vector<float, 2> >"* @"\01?SVecBuf@@3V?$RWStructuredBuffer@U?$Vector@M$01@@@@A", !"SVecBuf", i32 0, i32 4, i32 1, i32 12, i1 false, i1 false, i1 false, !58}
+!58 = !{i32 1, i32 32}
+!59 = !{i32 4, %"class.RWStructuredBuffer<matrix<float, 2, 2> >"* @"\01?MatBuf@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A", !"MatBuf", i32 0, i32 5, i32 1, i32 12, i1 false, i1 false, i1 false, !60}
+!60 = !{i32 1, i32 16}
+!61 = !{i32 5, %"class.RWStructuredBuffer<Matrix<float, 2, 2> >"* @"\01?SMatBuf@@3V?$RWStructuredBuffer@U?$Matrix@M$01$01@@@@A", !"SMatBuf", i32 0, i32 6, i32 1, i32 12, i1 false, i1 false, i1 false, !58}
+!62 = !{i32 6, %"class.ConsumeStructuredBuffer<vector<float, 2> >"* @"\01?CVecBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$01@@@@A", !"CVecBuf", i32 0, i32 7, i32 1, i32 12, i1 false, i1 false, i1 false, !55}
+!63 = !{i32 7, %"class.ConsumeStructuredBuffer<float [2]>"* @"\01?CArrBuf@@3V?$ConsumeStructuredBuffer@$$BY01M@@A", !"CArrBuf", i32 0, i32 8, i32 1, i32 12, i1 false, i1 false, i1 false, !55}
+!64 = !{i32 8, %"class.ConsumeStructuredBuffer<Vector<float, 2> >"* @"\01?CSVecBuf@@3V?$ConsumeStructuredBuffer@U?$Vector@M$01@@@@A", !"CSVecBuf", i32 0, i32 9, i32 1, i32 12, i1 false, i1 false, i1 false, !58}
+!65 = !{i32 9, %"class.ConsumeStructuredBuffer<matrix<float, 2, 2> >"* @"\01?CMatBuf@@3V?$ConsumeStructuredBuffer@V?$matrix@M$01$01@@@@A", !"CMatBuf", i32 0, i32 10, i32 1, i32 12, i1 false, i1 false, i1 false, !60}
+!66 = !{i32 10, %"class.ConsumeStructuredBuffer<Matrix<float, 2, 2> >"* @"\01?CSMatBuf@@3V?$ConsumeStructuredBuffer@U?$Matrix@M$01$01@@@@A", !"CSMatBuf", i32 0, i32 11, i32 1, i32 12, i1 false, i1 false, i1 false, !58}
+!67 = !{i32 11, %"class.AppendStructuredBuffer<vector<float, 2> >"* @"\01?AVecBuf@@3V?$AppendStructuredBuffer@V?$vector@M$01@@@@A", !"AVecBuf", i32 0, i32 12, i32 1, i32 12, i1 false, i1 false, i1 false, !55}
+!68 = !{i32 12, %"class.AppendStructuredBuffer<float [2]>"* @"\01?AArrBuf@@3V?$AppendStructuredBuffer@$$BY01M@@A", !"AArrBuf", i32 0, i32 13, i32 1, i32 12, i1 false, i1 false, i1 false, !55}
+!69 = !{i32 13, %"class.AppendStructuredBuffer<Vector<float, 2> >"* @"\01?ASVecBuf@@3V?$AppendStructuredBuffer@U?$Vector@M$01@@@@A", !"ASVecBuf", i32 0, i32 14, i32 1, i32 12, i1 false, i1 false, i1 false, !58}
+!70 = !{i32 14, %"class.AppendStructuredBuffer<matrix<float, 2, 2> >"* @"\01?AMatBuf@@3V?$AppendStructuredBuffer@V?$matrix@M$01$01@@@@A", !"AMatBuf", i32 0, i32 15, i32 1, i32 12, i1 false, i1 false, i1 false, !60}
+!71 = !{i32 15, %"class.AppendStructuredBuffer<Matrix<float, 2, 2> >"* @"\01?ASMatBuf@@3V?$AppendStructuredBuffer@U?$Matrix@M$01$01@@@@A", !"ASMatBuf", i32 0, i32 16, i32 1, i32 12, i1 false, i1 false, i1 false, !58}
+!72 = !{void (i32)* @main, i32 1}
+!73 = !{i32 64}
+!74 = !{i32 -1}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.hlsl
new file mode 100644
index 0000000000..47355d633f
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.hlsl
@@ -0,0 +1,112 @@
+// RUN: %dxc -fcgl  -T vs_6_6 %s | FileCheck %s
+
+// Source file for DxilGen IR test for typed buffer/texture load lowering
+
+RWBuffer< bool2 > TyBuf : register(u1);
+Texture2DMS< bool2 > Tex2dMs : register(t2);
+
+Texture1D< float2 > Tex1d : register(t3);
+Texture2D< float2 > Tex2d : register(t4);
+Texture3D< float2 > Tex3d : register(t5);
+Texture2DArray< float2 > Tex2dArr : register(t6);
+
+RWBuffer< float2 > OutBuf : register(u7);
+
+void main(uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3, uint4 ix4 : IX4) {
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 1
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" undef)
+  // CHECK: call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  bool2  Tyb0  = TyBuf.Load(ix1 + 1);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 2
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" undef)
+  // CHECK: call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  bool2  Tyb1  = TyBuf[ix1 + 2];
+
+  // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 3, i32 3>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.Texture2DMS<vector<bool, 2>, 0>"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS<vector<bool, 2>, 0>" undef),
+  // CHECK: call <2 x i1> @"dx.hl.op..<2 x i1> (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 231, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]]
+  bool2  TxMs0  = Tex2dMs.Load(ix2 + 3, ix1);
+
+  // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 4, i32 4>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.Texture2DMS<vector<bool, 2>, 0>"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS<vector<bool, 2>, 0>" undef)
+  // CHECK: call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]])
+  bool2  TxMs1  = Tex2dMs[ix2 + 4];
+
+  // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 5, i32 5>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D<vector<float, 2> >\22)"(i32 0, %"class.Texture1D<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D<vector<float, 2> >" undef)
+  // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <2 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]])
+  float2 Tx1d0  = Tex1d.Load(ix2 + 5);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 6
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D<vector<float, 2> >\22)"(i32 0, %"class.Texture1D<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  float2 Tx1d1  = Tex1d[ix1 + 6];
+
+  // CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, <i32 7, i32 7, i32 7>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D<vector<float, 2> >\22)"(i32 0, %"class.Texture2D<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D<vector<float, 2> >" undef)
+  // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <3 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <3 x i32> [[IX]])
+  float2 Tx2d0  = Tex2d.Load(ix3 + 7);
+
+  // CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 8, i32 8>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D<vector<float, 2> >\22)"(i32 0, %"class.Texture2D<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <2 x i32> [[IX]])
+  float2 Tx2d1  = Tex2d[ix2 + 8];
+
+  // CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, <i32 9, i32 9, i32 9, i32 9>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D<vector<float, 2> >\22)"(i32 0, %"class.Texture3D<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D<vector<float, 2> >" undef)
+  // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <4 x i32> [[IX]])
+  float2 Tx3d0  = Tex3d.Load(ix4 + 9);
+
+  // CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, <i32 10, i32 10, i32 10>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D<vector<float, 2> >\22)"(i32 0, %"class.Texture3D<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <3 x i32> [[IX]])
+  float2 Tx3d1  = Tex3d[ix3 + 10];
+
+  // CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, <i32 11, i32 11, i32 11, i32 11>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 0, %"class.Texture2DArray<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray<vector<float, 2> >" undef)
+  // CHECK: call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle [[ANHDL]], <4 x i32> [[IX]])
+  float2 Tx2da0  = Tex2dArr.Load(ix4 + 11);
+
+  // CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, <i32 12, i32 12, i32 12>
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 0, %"class.Texture2DArray<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[ANHDL]], <3 x i32> [[IX]])
+  float2 Tx2da1  = Tex2dArr[ix3 + 12];
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 13
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  OutBuf[ix1+13] = select(Tyb0, Tx1d0, Tx1d1);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 14
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  OutBuf[ix1+14] = select(Tyb1, Tx2d0, Tx2d1);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 15
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  OutBuf[ix1+15] = select(TxMs0, Tx3d0, Tx3d1);
+
+  // CHECK: [[IX:%.*]] = add i32 {{%.*}}, 16
+  // CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >"
+  // CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" undef)
+  // CHECK: call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[ANHDL]], i32 [[IX]])
+  OutBuf[ix1+16] = select(TxMs1, Tx2da0, Tx2da1);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.ll
new file mode 100644
index 0000000000..3ecb28644c
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-load.ll
@@ -0,0 +1,346 @@
+; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%"class.RWBuffer<vector<bool, 2> >" = type { <2 x i32> }
+%"class.Texture2DMS<vector<bool, 2>, 0>" = type { <2 x i32>, %"class.Texture2DMS<vector<bool, 2>, 0>::sample_type" }
+%"class.Texture2DMS<vector<bool, 2>, 0>::sample_type" = type { i32 }
+%"class.Texture1D<vector<float, 2> >" = type { <2 x float>, %"class.Texture1D<vector<float, 2> >::mips_type" }
+%"class.Texture1D<vector<float, 2> >::mips_type" = type { i32 }
+%"class.Texture2D<vector<float, 2> >" = type { <2 x float>, %"class.Texture2D<vector<float, 2> >::mips_type" }
+%"class.Texture2D<vector<float, 2> >::mips_type" = type { i32 }
+%"class.Texture3D<vector<float, 2> >" = type { <2 x float>, %"class.Texture3D<vector<float, 2> >::mips_type" }
+%"class.Texture3D<vector<float, 2> >::mips_type" = type { i32 }
+%"class.Texture2DArray<vector<float, 2> >" = type { <2 x float>, %"class.Texture2DArray<vector<float, 2> >::mips_type" }
+%"class.Texture2DArray<vector<float, 2> >::mips_type" = type { i32 }
+%"class.RWBuffer<vector<float, 2> >" = type { <2 x float> }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+
+@"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" = external global %"class.RWBuffer<vector<bool, 2> >", align 4
+@"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A" = external global %"class.Texture2DMS<vector<bool, 2>, 0>", align 4
+@"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A" = external global %"class.Texture1D<vector<float, 2> >", align 4
+@"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A" = external global %"class.Texture2D<vector<float, 2> >", align 4
+@"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A" = external global %"class.Texture3D<vector<float, 2> >", align 4
+@"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A" = external global %"class.Texture2DArray<vector<float, 2> >", align 4
+@"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A" = external global %"class.RWBuffer<vector<float, 2> >", align 4
+
+; Function Attrs: nounwind
+define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3, <4 x i32> %ix4) #0 {
+  ; CHECK: [[PIX:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0,
+  ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 1
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<bool, 2> >"(i32 160, %"class.RWBuffer<vector<bool, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef)
+  ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0
+  ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1
+  ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0
+  ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1
+  ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer
+  %1 = add i32 %ix1, 1
+  %2 = load %"class.RWBuffer<vector<bool, 2> >", %"class.RWBuffer<vector<bool, 2> >"* @"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A"
+  %3 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >" %2)
+  %4 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" zeroinitializer)
+  %5 = call <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %4, i32 %1)
+
+  %6 = zext <2 x i1> %5 to <2 x i32>
+
+  ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 2
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<bool, 2> >"(i32 160, %"class.RWBuffer<vector<bool, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 517 })
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef)
+  ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0
+  ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1
+  ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0
+  ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1
+  ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer
+  %7 = add i32 %ix1, 2
+  %8 = load %"class.RWBuffer<vector<bool, 2> >", %"class.RWBuffer<vector<bool, 2> >"* @"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A"
+  %9 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >" %8)
+  %10 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %9, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" zeroinitializer)
+  %11 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %10, i32 %7)
+  %12 = load <2 x i32>, <2 x i32>* %11
+
+  %13 = icmp ne <2 x i32> %12, zeroinitializer
+  %14 = zext <2 x i1> %13 to <2 x i32>
+
+  ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 3, i32 3>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DMS<vector<bool, 2>, 0>"(i32 160, %"class.Texture2DMS<vector<bool, 2>, 0>"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[PIX]], i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0
+  ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1
+  ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0
+  ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1
+  ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer
+  %15 = add <2 x i32> %ix2, <i32 3, i32 3>
+  %16 = load %"class.Texture2DMS<vector<bool, 2>, 0>", %"class.Texture2DMS<vector<bool, 2>, 0>"* @"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A"
+  %17 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.Texture2DMS<vector<bool, 2>, 0>" %16)
+  %18 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle %17, %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS<vector<bool, 2>, 0>" zeroinitializer)
+  %19 = call <2 x i1> @"dx.hl.op..<2 x i1> (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 231, %dx.types.Handle %18, <2 x i32> %15, i32 %ix1)
+  %20 = zext <2 x i1> %19 to <2 x i32>
+
+  ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 4, i32 4>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DMS<vector<bool, 2>, 0>"(i32 160, %"class.Texture2DMS<vector<bool, 2>, 0>"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 3, i32 517 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1
+  ; CHECK: [[LD:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK-DAG: [[V0:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 0
+  ; CHECK-DAG: [[V1:%.*]] = extractvalue %dx.types.ResRet.i32 [[LD]], 1
+  ; CHECK-DAG: [[VEC0:%.*]] = insertelement <2 x i32> undef, i32 [[V0]], i64 0
+  ; CHECK-DAG: [[VEC1:%.*]] = insertelement <2 x i32> [[VEC0]], i32 [[V1]], i64 1
+  ; CHECK: icmp ne <2 x i32> [[VEC1]], zeroinitializer
+  %21 = add <2 x i32> %ix2, <i32 4, i32 4>
+  %22 = load %"class.Texture2DMS<vector<bool, 2>, 0>", %"class.Texture2DMS<vector<bool, 2>, 0>"* @"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A"
+  %23 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.Texture2DMS<vector<bool, 2>, 0>" %22)
+  %24 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle %23, %dx.types.ResourceProperties { i32 3, i32 517 }, %"class.Texture2DMS<vector<bool, 2>, 0>" zeroinitializer)
+  %25 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %24, <2 x i32> %21)
+  %26 = load <2 x i32>, <2 x i32>* %25
+
+  %27 = icmp ne <2 x i32> %26, zeroinitializer
+  %28 = zext <2 x i1> %27 to <2 x i32>
+
+  ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 5, i32 5>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture1D<vector<float, 2> >"(i32 160, %"class.Texture1D<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX1]], i32 [[IX0]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  %29 = add <2 x i32> %ix2, <i32 5, i32 5>
+  %30 = load %"class.Texture1D<vector<float, 2> >", %"class.Texture1D<vector<float, 2> >"* @"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A"
+  %31 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D<vector<float, 2> >\22)"(i32 0, %"class.Texture1D<vector<float, 2> >" %30)
+  %32 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %31, %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D<vector<float, 2> >" zeroinitializer)
+  %33 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <2 x i32>)"(i32 231, %dx.types.Handle %32, <2 x i32> %29)
+
+  ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 6
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture1D<vector<float, 2> >"(i32 160, %"class.Texture1D<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 1, i32 521 })
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  %34 = add i32 %ix1, 6
+  %35 = load %"class.Texture1D<vector<float, 2> >", %"class.Texture1D<vector<float, 2> >"* @"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A"
+  %36 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D<vector<float, 2> >\22)"(i32 0, %"class.Texture1D<vector<float, 2> >" %35)
+  %37 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %36, %dx.types.ResourceProperties { i32 1, i32 521 }, %"class.Texture1D<vector<float, 2> >" zeroinitializer)
+  %38 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %37, i32 %34)
+  %39 = load <2 x float>, <2 x float>* %38
+
+  ; CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, <i32 7, i32 7, i32 7>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2D<vector<float, 2> >"(i32 160, %"class.Texture2D<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <3 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <3 x i32> [[IX]], i64 1
+  ; CHECK-DAG: [[IX2:%.*]] = extractelement <3 x i32> [[IX]], i64 2
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX2]], i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  %40 = add <3 x i32> %ix3, <i32 7, i32 7, i32 7>
+  %41 = load %"class.Texture2D<vector<float, 2> >", %"class.Texture2D<vector<float, 2> >"* @"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A"
+  %42 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D<vector<float, 2> >\22)"(i32 0, %"class.Texture2D<vector<float, 2> >" %41)
+  %43 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %42, %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D<vector<float, 2> >" zeroinitializer)
+  %44 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <3 x i32>)"(i32 231, %dx.types.Handle %43, <3 x i32> %40)
+
+  ; CHECK: [[IX:%.*]] = add <2 x i32> {{%.*}}, <i32 8, i32 8>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2D<vector<float, 2> >"(i32 160, %"class.Texture2D<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 2, i32 521 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <2 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <2 x i32> [[IX]], i64 1
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  %45 = add <2 x i32> %ix2, <i32 8, i32 8>
+  %46 = load %"class.Texture2D<vector<float, 2> >", %"class.Texture2D<vector<float, 2> >"* @"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A"
+  %47 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D<vector<float, 2> >\22)"(i32 0, %"class.Texture2D<vector<float, 2> >" %46)
+  %48 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %47, %dx.types.ResourceProperties { i32 2, i32 521 }, %"class.Texture2D<vector<float, 2> >" zeroinitializer)
+  %49 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %48, <2 x i32> %45)
+  %50 = load <2 x float>, <2 x float>* %49
+
+  ; CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, <i32 9, i32 9, i32 9, i32 9>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture3D<vector<float, 2> >"(i32 160, %"class.Texture3D<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <4 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <4 x i32> [[IX]], i64 1
+  ; CHECK-DAG: [[IX2:%.*]] = extractelement <4 x i32> [[IX]], i64 2
+  ; CHECK-DAG: [[IX3:%.*]] = extractelement <4 x i32> [[IX]], i64 3
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX3]], i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef)
+  %51 = add <4 x i32> %ix4, <i32 9, i32 9, i32 9, i32 9>
+  %52 = load %"class.Texture3D<vector<float, 2> >", %"class.Texture3D<vector<float, 2> >"* @"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A"
+  %53 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D<vector<float, 2> >\22)"(i32 0, %"class.Texture3D<vector<float, 2> >" %52)
+  %54 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %53, %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D<vector<float, 2> >" zeroinitializer)
+  %55 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle %54, <4 x i32> %51)
+
+  ; CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, <i32 10, i32 10, i32 10>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture3D<vector<float, 2> >"(i32 160, %"class.Texture3D<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4, i32 521 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <3 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <3 x i32> [[IX]], i64 1
+  ; CHECK-DAG: [[IX2:%.*]] = extractelement <3 x i32> [[IX]], i64 2
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef)
+  %56 = add <3 x i32> %ix3, <i32 10, i32 10, i32 10>
+  %57 = load %"class.Texture3D<vector<float, 2> >", %"class.Texture3D<vector<float, 2> >"* @"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A"
+  %58 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D<vector<float, 2> >\22)"(i32 0, %"class.Texture3D<vector<float, 2> >" %57)
+  %59 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %58, %dx.types.ResourceProperties { i32 4, i32 521 }, %"class.Texture3D<vector<float, 2> >" zeroinitializer)
+  %60 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %59, <3 x i32> %56)
+  %61 = load <2 x float>, <2 x float>* %60
+
+  ; CHECK: [[IX:%.*]] = add <4 x i32> {{%.*}}, <i32 11, i32 11, i32 11, i32 11>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DArray<vector<float, 2> >"(i32 160, %"class.Texture2DArray<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <4 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <4 x i32> [[IX]], i64 1
+  ; CHECK-DAG: [[IX2:%.*]] = extractelement <4 x i32> [[IX]], i64 2
+  ; CHECK-DAG: [[IX3:%.*]] = extractelement <4 x i32> [[IX]], i64 3
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 [[IX3]], i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef)
+  %62 = add <4 x i32> %ix4, <i32 11, i32 11, i32 11, i32 11>
+  %63 = load %"class.Texture2DArray<vector<float, 2> >", %"class.Texture2DArray<vector<float, 2> >"* @"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A"
+  %64 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 0, %"class.Texture2DArray<vector<float, 2> >" %63)
+  %65 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %64, %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray<vector<float, 2> >" zeroinitializer)
+  %66 = call <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32 231, %dx.types.Handle %65, <4 x i32> %62)
+
+  ; CHECK: [[IX:%.*]] = add <3 x i32> {{%.*}}, <i32 12, i32 12, i32 12>
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.Texture2DArray<vector<float, 2> >"(i32 160, %"class.Texture2DArray<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 7, i32 521 })
+  ; CHECK-DAG: [[IX0:%.*]] = extractelement <3 x i32> [[IX]], i64 0
+  ; CHECK-DAG: [[IX1:%.*]] = extractelement <3 x i32> [[IX]], i64 1
+  ; CHECK-DAG: [[IX2:%.*]] = extractelement <3 x i32> [[IX]], i64 2
+  ; CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[ANHDL]], i32 0, i32 [[IX0]], i32 [[IX1]], i32 [[IX2]], i32 undef, i32 undef, i32 undef)
+  %67 = add <3 x i32> %ix3, <i32 12, i32 12, i32 12>
+  %68 = load %"class.Texture2DArray<vector<float, 2> >", %"class.Texture2DArray<vector<float, 2> >"* @"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A"
+  %69 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 0, %"class.Texture2DArray<vector<float, 2> >" %68)
+  %70 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %69, %dx.types.ResourceProperties { i32 7, i32 521 }, %"class.Texture2DArray<vector<float, 2> >" zeroinitializer)
+  %71 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %70, <3 x i32> %67)
+  %72 = load <2 x float>, <2 x float>* %71
+
+  %73 = icmp ne <2 x i32> %6, zeroinitializer
+  %74 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %73, <2 x float> %33, <2 x float> %39)
+
+  ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 13
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<float, 2> >"(i32 160, %"class.RWBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 })
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef,
+  %75 = add i32 %ix1, 13
+  %76 = load %"class.RWBuffer<vector<float, 2> >", %"class.RWBuffer<vector<float, 2> >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A"
+  %77 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >" %76)
+  %78 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %77, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" zeroinitializer)
+  %79 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %78, i32 %75)
+  store <2 x float> %74, <2 x float>* %79
+
+  %80 = icmp ne <2 x i32> %14, zeroinitializer
+  %81 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %80, <2 x float> %44, <2 x float> %50)
+
+  ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 14
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<float, 2> >"(i32 160, %"class.RWBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 })
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef
+  %82 = add i32 %ix1, 14
+  %83 = load %"class.RWBuffer<vector<float, 2> >", %"class.RWBuffer<vector<float, 2> >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A"
+  %84 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >" %83)
+  %85 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %84, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" zeroinitializer)
+  %86 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %85, i32 %82)
+  store <2 x float> %81, <2 x float>* %86
+
+  %87 = icmp ne <2 x i32> %20, zeroinitializer
+  %88 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %87, <2 x float> %55, <2 x float> %61)
+
+  ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 15
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<float, 2> >"(i32 160, %"class.RWBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 })
+  ; CHECK:  call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef
+  %89 = add i32 %ix1, 15
+  %90 = load %"class.RWBuffer<vector<float, 2> >", %"class.RWBuffer<vector<float, 2> >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A"
+  %91 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >" %90)
+  %92 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %91, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" zeroinitializer)
+  %93 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %92, i32 %89)
+  store <2 x float> %88, <2 x float>* %93
+
+  %94 = icmp ne <2 x i32> %28, zeroinitializer
+  %95 = call <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32 184, <2 x i1> %94, <2 x float> %66, <2 x float> %72)
+
+  ; CHECK: [[IX:%.*]] = add i32 [[PIX]], 16
+  ; CHECK: [[HDL:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<float, 2> >"(i32 160, %"class.RWBuffer<vector<float, 2> >"
+  ; CHECK: [[ANHDL:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDL]], %dx.types.ResourceProperties { i32 4106, i32 521 })
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[ANHDL]], i32 [[IX]], i32 undef
+  %96 = add i32 %ix1, 16
+  %97 = load %"class.RWBuffer<vector<float, 2> >", %"class.RWBuffer<vector<float, 2> >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A"
+  %98 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 0, %"class.RWBuffer<vector<float, 2> >" %97)
+  %99 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32 14, %dx.types.Handle %98, %dx.types.ResourceProperties { i32 4106, i32 521 }, %"class.RWBuffer<vector<float, 2> >" zeroinitializer)
+  %100 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %99, i32 %96)
+  store <2 x float> %95, <2 x float>* %100
+
+  ret void
+}
+
+declare <2 x i1> @"dx.hl.op.ro.<2 x i1> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32, %"class.RWBuffer<vector<bool, 2> >") #2
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer<vector<bool, 2> >") #2
+declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare <2 x i1> @"dx.hl.op..<2 x i1> (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #0
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32, %"class.Texture2DMS<vector<bool, 2>, 0>") #2
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DMS<vector<bool, 2>, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture2DMS<vector<bool, 2>, 0>") #2
+declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #2
+declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture1D<vector<float, 2> >\22)"(i32, %"class.Texture1D<vector<float, 2> >") #2
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture1D<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture1D<vector<float, 2> >") #2
+declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2
+declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2D<vector<float, 2> >\22)"(i32, %"class.Texture2D<vector<float, 2> >") #2
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2D<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture2D<vector<float, 2> >") #2
+declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #2
+declare <2 x float> @"dx.hl.op.ro.<2 x float> (i32, %dx.types.Handle, <4 x i32>)"(i32, %dx.types.Handle, <4 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture3D<vector<float, 2> >\22)"(i32, %"class.Texture3D<vector<float, 2> >") #2
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture3D<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture3D<vector<float, 2> >") #2
+declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #2
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32, %"class.Texture2DArray<vector<float, 2> >") #2
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.Texture2DArray<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.Texture2DArray<vector<float, 2> >") #2
+declare <2 x float> @"dx.hl.op.rn.<2 x float> (i32, <2 x i1>, <2 x float>, <2 x float>)"(i32, <2 x i1>, <2 x float>, <2 x float>) #2
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 2> >\22)"(i32, %"class.RWBuffer<vector<float, 2> >") #2
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer<vector<float, 2> >") #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
+
+!dx.version = !{!3}
+!dx.valver = !{!4}
+!dx.shaderModel = !{!5}
+!dx.typeAnnotations = !{!6}
+!dx.entryPoints = !{!22}
+!dx.fnprops = !{!35}
+!dx.options = !{!36, !37}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!2 = !{!"dxc(private) 1.8.0.4807 (longvec_bab_ldst, 88cfe61c3-dirty)"}
+!3 = !{i32 1, i32 6}
+!4 = !{i32 1, i32 9}
+!5 = !{!"vs", i32 6, i32 6}
+!6 = !{i32 1, void (i32, <2 x i32>, <3 x i32>, <4 x i32>)* @main, !7}
+!7 = !{!8, !10, !13, !16, !19}
+!8 = !{i32 1, !9, !9}
+!9 = !{}
+!10 = !{i32 0, !11, !12}
+!11 = !{i32 4, !"IX1", i32 7, i32 5}
+!12 = !{i32 1}
+!13 = !{i32 0, !14, !15}
+!14 = !{i32 4, !"IX2", i32 7, i32 5}
+!15 = !{i32 2}
+!16 = !{i32 0, !17, !18}
+!17 = !{i32 4, !"IX3", i32 7, i32 5}
+!18 = !{i32 3}
+!19 = !{i32 0, !20, !21}
+!20 = !{i32 4, !"IX4", i32 7, i32 5}
+!21 = !{i32 4}
+!22 = !{void (i32, <2 x i32>, <3 x i32>, <4 x i32>)* @main, !"main", null, !23, null}
+!23 = !{!24, !32, null, null}
+!24 = !{!25, !27, !29, !30, !31}
+!25 = !{i32 0, %"class.Texture2DMS<vector<bool, 2>, 0>"* @"\01?Tex2dMs@@3V?$Texture2DMS@V?$vector@_N$01@@$0A@@@A", !"Tex2dMs", i32 0, i32 2, i32 1, i32 3, i32 0, !26}
+!26 = !{i32 0, i32 5}
+!27 = !{i32 1, %"class.Texture1D<vector<float, 2> >"* @"\01?Tex1d@@3V?$Texture1D@V?$vector@M$01@@@@A", !"Tex1d", i32 0, i32 3, i32 1, i32 1, i32 0, !28}
+!28 = !{i32 0, i32 9}
+!29 = !{i32 2, %"class.Texture2D<vector<float, 2> >"* @"\01?Tex2d@@3V?$Texture2D@V?$vector@M$01@@@@A", !"Tex2d", i32 0, i32 4, i32 1, i32 2, i32 0, !28}
+!30 = !{i32 3, %"class.Texture3D<vector<float, 2> >"* @"\01?Tex3d@@3V?$Texture3D@V?$vector@M$01@@@@A", !"Tex3d", i32 0, i32 5, i32 1, i32 4, i32 0, !28}
+!31 = !{i32 4, %"class.Texture2DArray<vector<float, 2> >"* @"\01?Tex2dArr@@3V?$Texture2DArray@V?$vector@M$01@@@@A", !"Tex2dArr", i32 0, i32 6, i32 1, i32 7, i32 0, !28}
+!32 = !{!33, !34}
+!33 = !{i32 0, %"class.RWBuffer<vector<bool, 2> >"* @"\01?TyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A", !"TyBuf", i32 0, i32 1, i32 1, i32 10, i1 false, i1 false, i1 false, !26}
+!34 = !{i32 1, %"class.RWBuffer<vector<float, 2> >"* @"\01?OutBuf@@3V?$RWBuffer@V?$vector@M$01@@@@A", !"OutBuf", i32 0, i32 7, i32 1, i32 10, i1 false, i1 false, i1 false, !28}
+!35 = !{void (i32, <2 x i32>, <3 x i32>, <4 x i32>)* @main, i32 1}
+!36 = !{i32 64}
+!37 = !{i32 -1}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.hlsl
new file mode 100644
index 0000000000..9ff6039127
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.hlsl
@@ -0,0 +1,404 @@
+// RUN: %dxc -fcgl  -T vs_6_6 %s | FileCheck %s
+
+// Source file for DxilGen IR test for typed buffer store lowering
+// Focuses on converted types in addition to common float type.
+
+RWBuffer<float3>    FTyBuf;
+RWBuffer<bool2>     BTyBuf;
+RWBuffer<uint64_t2> LTyBuf;
+RWBuffer<double>    DTyBuf;
+
+RWTexture1D<float3>    FTex1d;
+RWTexture1D<bool2>     BTex1d;
+RWTexture1D<uint64_t2> LTex1d;
+RWTexture1D<double>    DTex1d;
+
+RWTexture2D<float3>    FTex2d;
+RWTexture2D<bool2>     BTex2d;
+RWTexture2D<uint64_t2> LTex2d;
+RWTexture2D<double>    DTex2d;
+
+RWTexture3D<float3>    FTex3d;
+RWTexture3D<bool2>     BTex3d;
+RWTexture3D<uint64_t2> LTex3d;
+RWTexture3D<double>    DTex3d;
+
+RWTexture2DMS<float3>    FTex2dMs;
+RWTexture2DMS<bool2>     BTex2dMs;
+RWTexture2DMS<uint64_t2> LTex2dMs;
+RWTexture2DMS<double>    DTex2dMs;
+
+// CHECK: define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3)
+void main(uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) {
+
+  // CHECK-DAG: [[ix3adr:%.*]] = alloca <3 x i32>, align 4
+  // CHECK-DAG: [[ix2adr:%.*]] = alloca <2 x i32>, align 4
+  // CHECK-DAG: [[ix1adr:%.*]] = alloca i32, align 4
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 0
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 0, %"class.RWBuffer<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 1
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 0, %"class.RWBuffer<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]]
+  FTyBuf[ix1 + 1] = FTyBuf[ix1 + 0];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 2
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]]
+  // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 3
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32>
+  // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]]
+  BTyBuf[ix1 + 3] = BTyBuf[ix1 + 2];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 4
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWBuffer<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 5
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWBuffer<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]]
+  LTyBuf[ix1 + 5] = LTyBuf[ix1 + 4];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 6
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<double>\22)"(i32 0, %"class.RWBuffer<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load double, double* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 7
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<double>\22)"(i32 0, %"class.RWBuffer<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: store double [[ld]], double* [[sub]]
+  DTyBuf[ix1 + 7] = DTyBuf[ix1 + 6];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 8
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture1D<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 9
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture1D<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]]
+  FTex1d[ix1 + 9] = FTex1d[ix1 + 8];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 10
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]]
+  // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 11
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32>
+  // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]]
+  BTex1d[ix1 + 11] = BTex1d[ix1 + 10];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 12
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 13
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]]
+  LTex1d[ix1 + 13] = LTex1d[ix1 + 12];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 14
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<double>\22)"(i32 0, %"class.RWTexture1D<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: [[ld:%.*]] = load double, double* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[ix:%.*]] = add i32 [[ix1]], 15
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<double>\22)"(i32 0, %"class.RWTexture1D<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle [[anhdl]], i32 [[ix]])
+  // CHECK: store double [[ld]], double* [[sub]]
+  DTex1d[ix1 + 15] = DTex1d[ix1 + 14];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 16, i32 16>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture2D<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]]
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 17, i32 17>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture2D<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]]
+  FTex2d[ix2 + 17] = FTex2d[ix2 + 16];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 18, i32 18>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]]
+  // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 19, i32 19>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32>
+  // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]]
+  BTex2d[ix2 + 19] = BTex2d[ix2 + 18];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 20, i32 20>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]]
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 21, i32 21>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]]
+  LTex2d[ix2 + 21] = LTex2d[ix2 + 20];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 22, i32 22>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<double>\22)"(i32 0, %"class.RWTexture2D<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load double, double* [[sub]]
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 23, i32 23>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<double>\22)"(i32 0, %"class.RWTexture2D<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: store double [[ld]], double* [[sub]]
+  DTex2d[ix2 + 23] = DTex2d[ix2 + 22];
+
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 24, i32 24, i32 24>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture3D<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]]
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 25, i32 25, i32 25>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture3D<vector<float, 3> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D<vector<float, 3> >" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]]
+  FTex3d[ix3 + 25] = FTex3d[ix3 + 24];
+
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 26, i32 26, i32 26>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]]
+  // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 27, i32 27, i32 27>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<bool, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<bool, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32>
+  // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]]
+  BTex3d[ix3 + 27] = BTex3d[ix3 + 26];
+
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 28, i32 28, i32 28>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]]
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 29, i32 29, i32 29>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<unsigned long long, 2> >"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<unsigned long long, 2> >" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]]
+  LTex3d[ix3 + 29] = LTex3d[ix3 + 28];
+
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 30, i32 30, i32 30>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<double>\22)"(i32 0, %"class.RWTexture3D<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load double, double* [[sub]]
+  // CHECK: [[ix3:%.*]] = load <3 x i32>, <3 x i32>* [[ix3adr]], align 4
+  // CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 31, i32 31, i32 31>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<double>\22)"(i32 0, %"class.RWTexture3D<double>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<double>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D<double>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <3 x i32> [[ix]])
+  // CHECK: store double [[ld]], double* [[sub]]
+  DTex3d[ix3 + 31] = DTex3d[ix3 + 30];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 32, i32 32>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]]
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 33, i32 33>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" undef)
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]]
+  FTex2dMs[ix2 + 33] = FTex2dMs[ix2 + 32];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 34, i32 34>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]]
+  // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 35, i32 35>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32>
+  // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]]
+  BTex2dMs[ix2 + 35] = BTex2dMs[ix2 + 34];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 36, i32 36>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]]
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 37, i32 37>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" undef)
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]]
+  LTex2dMs[ix2 + 37] = LTex2dMs[ix2 + 36];
+
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 38, i32 38>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: [[ld:%.*]] = load double, double* [[sub]]
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 39, i32 39>
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" undef)
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]])
+  // CHECK: store double [[ld]], double* [[sub]]
+  DTex2dMs[ix2 + 39] = DTex2dMs[ix2 + 38];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 0
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 40, i32 40>
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: [[ld:%.*]] = load <3 x float>, <3 x float>* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 1
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 41, i32 41>
+  // CHECK: [[sub:%.*]] = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: store <3 x float> [[ld]], <3 x float>* [[sub]]
+  FTex2dMs.sample[ix1 + 1][ix2 + 41] = FTex2dMs.sample[ix1 + 0][ix2 + 40];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 2
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 42, i32 42>
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: [[ld:%.*]] = load <2 x i32>, <2 x i32>* [[sub]]
+  // CHECK: [[bld:%.*]] = icmp ne <2 x i32> [[ld]], zeroinitializer
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 3
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 43, i32 43>
+  // CHECK: [[sub:%.*]] = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: [[ld:%.*]] = zext <2 x i1> [[bld]] to <2 x i32>
+  // CHECK: store <2 x i32> [[ld]], <2 x i32>* [[sub]]
+  BTex2dMs.sample[ix1 + 3][ix2 + 43] = BTex2dMs.sample[ix1 + 2][ix2 + 42];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 4
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 44, i32 44>
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: [[ld:%.*]] = load <2 x i64>, <2 x i64>* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 5
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 45, i32 45>
+  // CHECK: [[sub:%.*]] = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: store <2 x i64> [[ld]], <2 x i64>* [[sub]]
+  LTex2dMs.sample[ix1 + 5][ix2 + 45] = LTex2dMs.sample[ix1 + 4][ix2 + 44];
+
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 6
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 46, i32 46>
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: [[ld:%.*]] = load double, double* [[sub]]
+  // CHECK: [[ix1:%.*]] = load i32, i32* [[ix1adr]], align 4
+  // CHECK: [[sax:%.*]] = add i32 [[ix1]], 7
+  // CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>"
+  // CHECK: [[anhdl:%.*]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" undef)
+  // CHECK: [[ix2:%.*]] = load <2 x i32>, <2 x i32>* [[ix2adr]], align 4
+  // CHECK: [[ix:%.*]] = add <2 x i32> [[ix2:%.*]], <i32 47, i32 47>
+  // CHECK: [[sub:%.*]] = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle [[anhdl]], <2 x i32> [[ix]], i32 [[sax]])
+  // CHECK: store double [[ld]], double* [[sub]]
+  DTex2dMs.sample[ix1 + 7][ix2 + 47] = DTex2dMs.sample[ix1 + 6][ix2 + 46];
+
+  // CHECK: ret void
+
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.ll b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.ll
new file mode 100644
index 0000000000..ac5c6182e1
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-typed-store.ll
@@ -0,0 +1,1079 @@
+; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%"class.RWBuffer<vector<float, 3> >" = type { <3 x float> }
+%"class.RWBuffer<vector<bool, 2> >" = type { <2 x i32> }
+%"class.RWBuffer<vector<unsigned long long, 2> >" = type { <2 x i64> }
+%"class.RWBuffer<double>" = type { double }
+%"class.RWTexture1D<vector<float, 3> >" = type { <3 x float> }
+%"class.RWTexture1D<vector<bool, 2> >" = type { <2 x i32> }
+%"class.RWTexture1D<vector<unsigned long long, 2> >" = type { <2 x i64> }
+%"class.RWTexture1D<double>" = type { double }
+%"class.RWTexture2D<vector<float, 3> >" = type { <3 x float> }
+%"class.RWTexture2D<vector<bool, 2> >" = type { <2 x i32> }
+%"class.RWTexture2D<vector<unsigned long long, 2> >" = type { <2 x i64> }
+%"class.RWTexture2D<double>" = type { double }
+%"class.RWTexture3D<vector<float, 3> >" = type { <3 x float> }
+%"class.RWTexture3D<vector<bool, 2> >" = type { <2 x i32> }
+%"class.RWTexture3D<vector<unsigned long long, 2> >" = type { <2 x i64> }
+%"class.RWTexture3D<double>" = type { double }
+%"class.RWTexture2DMS<vector<float, 3>, 0>" = type { <3 x float>, %"class.RWTexture2DMS<vector<float, 3>, 0>::sample_type" }
+%"class.RWTexture2DMS<vector<float, 3>, 0>::sample_type" = type { i32 }
+%"class.RWTexture2DMS<vector<bool, 2>, 0>" = type { <2 x i32>, %"class.RWTexture2DMS<vector<bool, 2>, 0>::sample_type" }
+%"class.RWTexture2DMS<vector<bool, 2>, 0>::sample_type" = type { i32 }
+%"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" = type { <2 x i64>, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>::sample_type" }
+%"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>::sample_type" = type { i32 }
+%"class.RWTexture2DMS<double, 0>" = type { double, %"class.RWTexture2DMS<double, 0>::sample_type" }
+%"class.RWTexture2DMS<double, 0>::sample_type" = type { i32 }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+
+@"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A" = external global %"class.RWBuffer<vector<float, 3> >", align 4
+@"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A" = external global %"class.RWBuffer<vector<bool, 2> >", align 4
+@"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A" = external global %"class.RWBuffer<vector<unsigned long long, 2> >", align 8
+@"\01?DTyBuf@@3V?$RWBuffer@N@@A" = external global %"class.RWBuffer<double>", align 8
+@"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A" = external global %"class.RWTexture1D<vector<float, 3> >", align 4
+@"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A" = external global %"class.RWTexture1D<vector<bool, 2> >", align 4
+@"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A" = external global %"class.RWTexture1D<vector<unsigned long long, 2> >", align 8
+@"\01?DTex1d@@3V?$RWTexture1D@N@@A" = external global %"class.RWTexture1D<double>", align 8
+@"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A" = external global %"class.RWTexture2D<vector<float, 3> >", align 4
+@"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A" = external global %"class.RWTexture2D<vector<bool, 2> >", align 4
+@"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A" = external global %"class.RWTexture2D<vector<unsigned long long, 2> >", align 8
+@"\01?DTex2d@@3V?$RWTexture2D@N@@A" = external global %"class.RWTexture2D<double>", align 8
+@"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A" = external global %"class.RWTexture3D<vector<float, 3> >", align 4
+@"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A" = external global %"class.RWTexture3D<vector<bool, 2> >", align 4
+@"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A" = external global %"class.RWTexture3D<vector<unsigned long long, 2> >", align 8
+@"\01?DTex3d@@3V?$RWTexture3D@N@@A" = external global %"class.RWTexture3D<double>", align 8
+@"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A" = external global %"class.RWTexture2DMS<vector<float, 3>, 0>", align 4
+@"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A" = external global %"class.RWTexture2DMS<vector<bool, 2>, 0>", align 4
+@"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A" = external global %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>", align 8
+@"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A" = external global %"class.RWTexture2DMS<double, 0>", align 8
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3)
+define void @main(i32 %ix1, <2 x i32> %ix2, <3 x i32> %ix3) #0 {
+bb:
+  ; CHECK: [[ix3_0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 0, i32 undef)
+  ; CHECK: [[ix3:%.*]] = insertelement <3 x i32> undef, i32 [[ix3_0]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 1, i32 undef)
+  ; CHECK: [[vec3:%.*]] = insertelement <3 x i32> [[ix3]], i32 [[ix3_1]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 2, i32 undef)
+  ; CHECK: [[ix3:%.*]] = insertelement <3 x i32> [[vec3]], i32 [[ix3_2]], i64 2
+  ; CHECK: [[ix2_0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
+  ; CHECK: [[vec2:%.*]] = insertelement <2 x i32> undef, i32 [[ix2_0]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 1, i32 undef)
+  ; CHECK: [[ix2:%.*]] = insertelement <2 x i32> [[vec2]], i32 [[ix2_1]], i64 1
+  ; CHECK: [[ix1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<float, 3> >"(i32 160, %"class.RWBuffer<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix1]], i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 1
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<float, 3> >"(i32 160, %"class.RWBuffer<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 777 })
+  ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15)
+  %tmp = load %"class.RWBuffer<vector<float, 3> >", %"class.RWBuffer<vector<float, 3> >"* @"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A"
+  %tmp1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 0, %"class.RWBuffer<vector<float, 3> >" %tmp)
+  %tmp2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer<vector<float, 3> >" zeroinitializer)
+  %tmp3 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp2, i32 %ix1)
+  %tmp4 = load <3 x float>, <3 x float>* %tmp3
+  %tmp5 = add i32 %ix1, 1
+  %tmp6 = load %"class.RWBuffer<vector<float, 3> >", %"class.RWBuffer<vector<float, 3> >"* @"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A"
+  %tmp7 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 0, %"class.RWBuffer<vector<float, 3> >" %tmp6)
+  %tmp8 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 4106, i32 777 }, %"class.RWBuffer<vector<float, 3> >" zeroinitializer)
+  %tmp9 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp8, i32 %tmp5)
+  store <3 x float> %tmp4, <3 x float>* %tmp9
+
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 2
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<bool, 2> >"(i32 160, %"class.RWBuffer<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1
+  ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 3
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<bool, 2> >"(i32 160, %"class.RWBuffer<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 })
+  ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32>
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1
+  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15)
+  %tmp10 = add i32 %ix1, 2
+  %tmp11 = load %"class.RWBuffer<vector<bool, 2> >", %"class.RWBuffer<vector<bool, 2> >"* @"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A"
+  %tmp12 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >" %tmp11)
+  %tmp13 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp12, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" zeroinitializer)
+  %tmp14 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp13, i32 %tmp10)
+  %tmp15 = load <2 x i32>, <2 x i32>* %tmp14
+  %tmp16 = icmp ne <2 x i32> %tmp15, zeroinitializer
+  %tmp17 = add i32 %ix1, 3
+  %tmp18 = load %"class.RWBuffer<vector<bool, 2> >", %"class.RWBuffer<vector<bool, 2> >"* @"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A"
+  %tmp19 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 0, %"class.RWBuffer<vector<bool, 2> >" %tmp18)
+  %tmp20 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp19, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<bool, 2> >" zeroinitializer)
+  %tmp21 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp20, i32 %tmp17)
+  %tmp22 = zext <2 x i1> %tmp16 to <2 x i32>
+  store <2 x i32> %tmp22, <2 x i32>* %tmp21
+
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 4
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<unsigned long long, 2> >"(i32 160, %"class.RWBuffer<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3
+  ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0
+  ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 5
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<vector<unsigned long long, 2> >"(i32 160, %"class.RWBuffer<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 517 })
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1
+  ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32
+  ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32
+  ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32
+  ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32
+  ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32
+  ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32
+  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15)
+  %tmp23 = add i32 %ix1, 4
+  %tmp24 = load %"class.RWBuffer<vector<unsigned long long, 2> >", %"class.RWBuffer<vector<unsigned long long, 2> >"* @"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A"
+  %tmp25 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWBuffer<vector<unsigned long long, 2> >" %tmp24)
+  %tmp26 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp25, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp27 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp26, i32 %tmp23)
+  %tmp28 = load <2 x i64>, <2 x i64>* %tmp27
+  %tmp29 = add i32 %ix1, 5
+  %tmp30 = load %"class.RWBuffer<vector<unsigned long long, 2> >", %"class.RWBuffer<vector<unsigned long long, 2> >"* @"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A"
+  %tmp31 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWBuffer<vector<unsigned long long, 2> >" %tmp30)
+  %tmp32 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp31, %dx.types.ResourceProperties { i32 4106, i32 517 }, %"class.RWBuffer<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp33 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp32, i32 %tmp29)
+  store <2 x i64> %tmp28, <2 x i64>* %tmp33
+
+
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 6
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<double>"(i32 160, %"class.RWBuffer<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]])
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 7
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWBuffer<double>"(i32 160, %"class.RWBuffer<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4106, i32 261 })
+  ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]])
+  ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0
+  ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1
+  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15)
+  %tmp34 = add i32 %ix1, 6
+  %tmp35 = load %"class.RWBuffer<double>", %"class.RWBuffer<double>"* @"\01?DTyBuf@@3V?$RWBuffer@N@@A"
+  %tmp36 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<double>\22)"(i32 0, %"class.RWBuffer<double>" %tmp35)
+  %tmp37 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<double>\22)"(i32 14, %dx.types.Handle %tmp36, %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer<double>" zeroinitializer)
+  %tmp38 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp37, i32 %tmp34)
+  %tmp39 = load double, double* %tmp38
+  %tmp40 = add i32 %ix1, 7
+  %tmp41 = load %"class.RWBuffer<double>", %"class.RWBuffer<double>"* @"\01?DTyBuf@@3V?$RWBuffer@N@@A"
+  %tmp42 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<double>\22)"(i32 0, %"class.RWBuffer<double>" %tmp41)
+  %tmp43 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<double>\22)"(i32 14, %dx.types.Handle %tmp42, %dx.types.ResourceProperties { i32 4106, i32 261 }, %"class.RWBuffer<double>" zeroinitializer)
+  %tmp44 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp43, i32 %tmp40)
+  store double %tmp39, double* %tmp44
+
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 8
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<vector<float, 3> >"(i32 160, %"class.RWTexture1D<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 9
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<vector<float, 3> >"(i32 160, %"class.RWTexture1D<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 777 })
+  ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2
+  ; CHECK: call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15)
+  %tmp45 = add i32 %ix1, 8
+  %tmp46 = load %"class.RWTexture1D<vector<float, 3> >", %"class.RWTexture1D<vector<float, 3> >"* @"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A"
+  %tmp47 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture1D<vector<float, 3> >" %tmp46)
+  %tmp48 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp47, %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D<vector<float, 3> >" zeroinitializer)
+  %tmp49 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp48, i32 %tmp45)
+  %tmp50 = load <3 x float>, <3 x float>* %tmp49
+  %tmp51 = add i32 %ix1, 9
+  %tmp52 = load %"class.RWTexture1D<vector<float, 3> >", %"class.RWTexture1D<vector<float, 3> >"* @"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A"
+  %tmp53 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture1D<vector<float, 3> >" %tmp52)
+  %tmp54 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp53, %dx.types.ResourceProperties { i32 4097, i32 777 }, %"class.RWTexture1D<vector<float, 3> >" zeroinitializer)
+  %tmp55 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp54, i32 %tmp51)
+  store <3 x float> %tmp50, <3 x float>* %tmp55
+
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 10
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<vector<bool, 2> >"(i32 160, %"class.RWTexture1D<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1
+  ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 11
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<vector<bool, 2> >"(i32 160, %"class.RWTexture1D<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 })
+  ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32>
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15)
+  %tmp56 = add i32 %ix1, 10
+  %tmp57 = load %"class.RWTexture1D<vector<bool, 2> >", %"class.RWTexture1D<vector<bool, 2> >"* @"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A"
+  %tmp58 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<bool, 2> >" %tmp57)
+  %tmp59 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp58, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<bool, 2> >" zeroinitializer)
+  %tmp60 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp59, i32 %tmp56)
+  %tmp61 = load <2 x i32>, <2 x i32>* %tmp60
+  %tmp62 = icmp ne <2 x i32> %tmp61, zeroinitializer
+  %tmp63 = add i32 %ix1, 11
+  %tmp64 = load %"class.RWTexture1D<vector<bool, 2> >", %"class.RWTexture1D<vector<bool, 2> >"* @"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A"
+  %tmp65 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<bool, 2> >" %tmp64)
+  %tmp66 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp65, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<bool, 2> >" zeroinitializer)
+  %tmp67 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp66, i32 %tmp63)
+  %tmp68 = zext <2 x i1> %tmp62 to <2 x i32>
+  store <2 x i32> %tmp68, <2 x i32>* %tmp67
+
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 12
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<vector<unsigned long long, 2> >"(i32 160, %"class.RWTexture1D<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3
+  ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0
+  ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 13
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<vector<unsigned long long, 2> >"(i32 160, %"class.RWTexture1D<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 517 })
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1
+  ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32
+  ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32
+  ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32
+  ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32
+  ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32
+  ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15)
+  %tmp69 = add i32 %ix1, 12
+  %tmp70 = load %"class.RWTexture1D<vector<unsigned long long, 2> >", %"class.RWTexture1D<vector<unsigned long long, 2> >"* @"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A"
+  %tmp71 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<unsigned long long, 2> >" %tmp70)
+  %tmp72 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp71, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp73 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp72, i32 %tmp69)
+  %tmp74 = load <2 x i64>, <2 x i64>* %tmp73
+  %tmp75 = add i32 %ix1, 13
+  %tmp76 = load %"class.RWTexture1D<vector<unsigned long long, 2> >", %"class.RWTexture1D<vector<unsigned long long, 2> >"* @"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A"
+  %tmp77 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture1D<vector<unsigned long long, 2> >" %tmp76)
+  %tmp78 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp77, %dx.types.ResourceProperties { i32 4097, i32 517 }, %"class.RWTexture1D<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp79 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp78, i32 %tmp75)
+  store <2 x i64> %tmp74, <2 x i64>* %tmp79
+
+
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 14
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<double>"(i32 160, %"class.RWTexture1D<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 })
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix]], i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]])
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 15
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture1D<double>"(i32 160, %"class.RWTexture1D<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4097, i32 261 })
+  ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]])
+  ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0
+  ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix]], i32 undef, i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15)
+  %tmp80 = add i32 %ix1, 14
+  %tmp81 = load %"class.RWTexture1D<double>", %"class.RWTexture1D<double>"* @"\01?DTex1d@@3V?$RWTexture1D@N@@A"
+  %tmp82 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<double>\22)"(i32 0, %"class.RWTexture1D<double>" %tmp81)
+  %tmp83 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<double>\22)"(i32 14, %dx.types.Handle %tmp82, %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D<double>" zeroinitializer)
+  %tmp84 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp83, i32 %tmp80)
+  %tmp85 = load double, double* %tmp84
+  %tmp86 = add i32 %ix1, 15
+  %tmp87 = load %"class.RWTexture1D<double>", %"class.RWTexture1D<double>"* @"\01?DTex1d@@3V?$RWTexture1D@N@@A"
+  %tmp88 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<double>\22)"(i32 0, %"class.RWTexture1D<double>" %tmp87)
+  %tmp89 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<double>\22)"(i32 14, %dx.types.Handle %tmp88, %dx.types.ResourceProperties { i32 4097, i32 261 }, %"class.RWTexture1D<double>" zeroinitializer)
+  %tmp90 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp89, i32 %tmp86)
+  store double %tmp85, double* %tmp90
+
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 16, i32 16>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<vector<float, 3> >"(i32 160, %"class.RWTexture2D<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 17, i32 17>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<vector<float, 3> >"(i32 160, %"class.RWTexture2D<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 777 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2
+  ; CHECK: call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15)
+  %tmp91 = add <2 x i32> %ix2, <i32 16, i32 16>
+  %tmp92 = load %"class.RWTexture2D<vector<float, 3> >", %"class.RWTexture2D<vector<float, 3> >"* @"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A"
+  %tmp93 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture2D<vector<float, 3> >" %tmp92)
+  %tmp94 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp93, %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D<vector<float, 3> >" zeroinitializer)
+  %tmp95 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp94, <2 x i32> %tmp91)
+  %tmp96 = load <3 x float>, <3 x float>* %tmp95
+  %tmp97 = add <2 x i32> %ix2, <i32 17, i32 17>
+  %tmp98 = load %"class.RWTexture2D<vector<float, 3> >", %"class.RWTexture2D<vector<float, 3> >"* @"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A"
+  %tmp99 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture2D<vector<float, 3> >" %tmp98)
+  %tmp100 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp99, %dx.types.ResourceProperties { i32 4098, i32 777 }, %"class.RWTexture2D<vector<float, 3> >" zeroinitializer)
+  %tmp101 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp100, <2 x i32> %tmp97)
+  store <3 x float> %tmp96, <3 x float>* %tmp101
+
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 18, i32 18>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<vector<bool, 2> >"(i32 160, %"class.RWTexture2D<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1
+  ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 19, i32 19>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<vector<bool, 2> >"(i32 160, %"class.RWTexture2D<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 })
+  ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15)
+  %tmp102 = add <2 x i32> %ix2, <i32 18, i32 18>
+  %tmp103 = load %"class.RWTexture2D<vector<bool, 2> >", %"class.RWTexture2D<vector<bool, 2> >"* @"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A"
+  %tmp104 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<bool, 2> >" %tmp103)
+  %tmp105 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp104, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<bool, 2> >" zeroinitializer)
+  %tmp106 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp105, <2 x i32> %tmp102)
+  %tmp107 = load <2 x i32>, <2 x i32>* %tmp106
+  %tmp108 = icmp ne <2 x i32> %tmp107, zeroinitializer
+  %tmp109 = add <2 x i32> %ix2, <i32 19, i32 19>
+  %tmp110 = load %"class.RWTexture2D<vector<bool, 2> >", %"class.RWTexture2D<vector<bool, 2> >"* @"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A"
+  %tmp111 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<bool, 2> >" %tmp110)
+  %tmp112 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp111, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<bool, 2> >" zeroinitializer)
+  %tmp113 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp112, <2 x i32> %tmp109)
+  %tmp114 = zext <2 x i1> %tmp108 to <2 x i32>
+  store <2 x i32> %tmp114, <2 x i32>* %tmp113
+
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 20, i32 20>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<vector<unsigned long long, 2> >"(i32 160, %"class.RWTexture2D<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3
+  ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0
+  ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 21, i32 21>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<vector<unsigned long long, 2> >"(i32 160, %"class.RWTexture2D<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 517 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1
+  ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32
+  ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32
+  ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32
+  ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32
+  ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32
+  ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15)
+  %tmp115 = add <2 x i32> %ix2, <i32 20, i32 20>
+  %tmp116 = load %"class.RWTexture2D<vector<unsigned long long, 2> >", %"class.RWTexture2D<vector<unsigned long long, 2> >"* @"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A"
+  %tmp117 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<unsigned long long, 2> >" %tmp116)
+  %tmp118 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp117, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp119 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp118, <2 x i32> %tmp115)
+  %tmp120 = load <2 x i64>, <2 x i64>* %tmp119
+  %tmp121 = add <2 x i32> %ix2, <i32 21, i32 21>
+  %tmp122 = load %"class.RWTexture2D<vector<unsigned long long, 2> >", %"class.RWTexture2D<vector<unsigned long long, 2> >"* @"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A"
+  %tmp123 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture2D<vector<unsigned long long, 2> >" %tmp122)
+  %tmp124 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp123, %dx.types.ResourceProperties { i32 4098, i32 517 }, %"class.RWTexture2D<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp125 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp124, <2 x i32> %tmp121)
+  store <2 x i64> %tmp120, <2 x i64>* %tmp125
+
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 22, i32 22>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<double>"(i32 160, %"class.RWTexture2D<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]])
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 23, i32 23>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2D<double>"(i32 160, %"class.RWTexture2D<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4098, i32 261 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]])
+  ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0
+  ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15)
+  %tmp126 = add <2 x i32> %ix2, <i32 22, i32 22>
+  %tmp127 = load %"class.RWTexture2D<double>", %"class.RWTexture2D<double>"* @"\01?DTex2d@@3V?$RWTexture2D@N@@A"
+  %tmp128 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<double>\22)"(i32 0, %"class.RWTexture2D<double>" %tmp127)
+  %tmp129 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<double>\22)"(i32 14, %dx.types.Handle %tmp128, %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D<double>" zeroinitializer)
+  %tmp130 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp129, <2 x i32> %tmp126)
+  %tmp131 = load double, double* %tmp130
+  %tmp132 = add <2 x i32> %ix2, <i32 23, i32 23>
+  %tmp133 = load %"class.RWTexture2D<double>", %"class.RWTexture2D<double>"* @"\01?DTex2d@@3V?$RWTexture2D@N@@A"
+  %tmp134 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<double>\22)"(i32 0, %"class.RWTexture2D<double>" %tmp133)
+  %tmp135 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<double>\22)"(i32 14, %dx.types.Handle %tmp134, %dx.types.ResourceProperties { i32 4098, i32 261 }, %"class.RWTexture2D<double>" zeroinitializer)
+  %tmp136 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp135, <2 x i32> %tmp132)
+  store double %tmp131, double* %tmp136
+
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 24, i32 24, i32 24>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<vector<float, 3> >"(i32 160, %"class.RWTexture3D<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 })
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 25, i32 25, i32 25>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<vector<float, 3> >"(i32 160, %"class.RWTexture3D<vector<float, 3> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 777 })
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2
+  ; CHECK: call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15)
+  %tmp137 = add <3 x i32> %ix3, <i32 24, i32 24, i32 24>
+  %tmp138 = load %"class.RWTexture3D<vector<float, 3> >", %"class.RWTexture3D<vector<float, 3> >"* @"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A"
+  %tmp139 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture3D<vector<float, 3> >" %tmp138)
+  %tmp140 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp139, %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D<vector<float, 3> >" zeroinitializer)
+  %tmp141 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp140, <3 x i32> %tmp137)
+  %tmp142 = load <3 x float>, <3 x float>* %tmp141
+  %tmp143 = add <3 x i32> %ix3, <i32 25, i32 25, i32 25>
+  %tmp144 = load %"class.RWTexture3D<vector<float, 3> >", %"class.RWTexture3D<vector<float, 3> >"* @"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A"
+  %tmp145 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 0, %"class.RWTexture3D<vector<float, 3> >" %tmp144)
+  %tmp146 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32 14, %dx.types.Handle %tmp145, %dx.types.ResourceProperties { i32 4100, i32 777 }, %"class.RWTexture3D<vector<float, 3> >" zeroinitializer)
+  %tmp147 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp146, <3 x i32> %tmp143)
+  store <3 x float> %tmp142, <3 x float>* %tmp147
+
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 26, i32 26, i32 26>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<vector<bool, 2> >"(i32 160, %"class.RWTexture3D<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 })
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1
+  ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 27, i32 27, i32 27>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<vector<bool, 2> >"(i32 160, %"class.RWTexture3D<vector<bool, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 })
+  ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32>
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15)
+  %tmp148 = add <3 x i32> %ix3, <i32 26, i32 26, i32 26>
+  %tmp149 = load %"class.RWTexture3D<vector<bool, 2> >", %"class.RWTexture3D<vector<bool, 2> >"* @"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A"
+  %tmp150 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<bool, 2> >" %tmp149)
+  %tmp151 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp150, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<bool, 2> >" zeroinitializer)
+  %tmp152 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp151, <3 x i32> %tmp148)
+  %tmp153 = load <2 x i32>, <2 x i32>* %tmp152
+  %tmp154 = icmp ne <2 x i32> %tmp153, zeroinitializer
+  %tmp155 = add <3 x i32> %ix3, <i32 27, i32 27, i32 27>
+  %tmp156 = load %"class.RWTexture3D<vector<bool, 2> >", %"class.RWTexture3D<vector<bool, 2> >"* @"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A"
+  %tmp157 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<bool, 2> >" %tmp156)
+  %tmp158 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32 14, %dx.types.Handle %tmp157, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<bool, 2> >" zeroinitializer)
+  %tmp159 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp158, <3 x i32> %tmp155)
+  %tmp160 = zext <2 x i1> %tmp154 to <2 x i32>
+  store <2 x i32> %tmp160, <2 x i32>* %tmp159
+
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 28, i32 28, i32 28>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<vector<unsigned long long, 2> >"(i32 160, %"class.RWTexture3D<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 })
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3
+  ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0
+  ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 29, i32 29, i32 29>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<vector<unsigned long long, 2> >"(i32 160, %"class.RWTexture3D<vector<unsigned long long, 2> >"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 517 })
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1
+  ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32
+  ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32
+  ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32
+  ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32
+  ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32
+  ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15)
+  %tmp161 = add <3 x i32> %ix3, <i32 28, i32 28, i32 28>
+  %tmp162 = load %"class.RWTexture3D<vector<unsigned long long, 2> >", %"class.RWTexture3D<vector<unsigned long long, 2> >"* @"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A"
+  %tmp163 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<unsigned long long, 2> >" %tmp162)
+  %tmp164 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp163, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp165 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp164, <3 x i32> %tmp161)
+  %tmp166 = load <2 x i64>, <2 x i64>* %tmp165
+  %tmp167 = add <3 x i32> %ix3, <i32 29, i32 29, i32 29>
+  %tmp168 = load %"class.RWTexture3D<vector<unsigned long long, 2> >", %"class.RWTexture3D<vector<unsigned long long, 2> >"* @"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A"
+  %tmp169 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 0, %"class.RWTexture3D<vector<unsigned long long, 2> >" %tmp168)
+  %tmp170 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32 14, %dx.types.Handle %tmp169, %dx.types.ResourceProperties { i32 4100, i32 517 }, %"class.RWTexture3D<vector<unsigned long long, 2> >" zeroinitializer)
+  %tmp171 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp170, <3 x i32> %tmp167)
+  store <2 x i64> %tmp166, <2 x i64>* %tmp171
+
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 30, i32 30, i32 30>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<double>"(i32 160, %"class.RWTexture3D<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 })
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 undef, i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]])
+  ; CHECK: [[ix:%.*]] = add <3 x i32> [[ix3]], <i32 31, i32 31, i32 31>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture3D<double>"(i32 160, %"class.RWTexture3D<double>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4100, i32 261 })
+  ; CHECK: [[ix3_0:%.*]] = extractelement <3 x i32> [[ix]], i64 0
+  ; CHECK: [[ix3_1:%.*]] = extractelement <3 x i32> [[ix]], i64 1
+  ; CHECK: [[ix3_2:%.*]] = extractelement <3 x i32> [[ix]], i64 2
+  ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]])
+  ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0
+  ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1
+  ; CHECK: call void @dx.op.textureStore.i32(i32 67, %dx.types.Handle [[anhdl]], i32 [[ix3_0]], i32 [[ix3_1]], i32 [[ix3_2]], i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15)
+  %tmp172 = add <3 x i32> %ix3, <i32 30, i32 30, i32 30>
+  %tmp173 = load %"class.RWTexture3D<double>", %"class.RWTexture3D<double>"* @"\01?DTex3d@@3V?$RWTexture3D@N@@A"
+  %tmp174 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<double>\22)"(i32 0, %"class.RWTexture3D<double>" %tmp173)
+  %tmp175 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<double>\22)"(i32 14, %dx.types.Handle %tmp174, %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D<double>" zeroinitializer)
+  %tmp176 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp175, <3 x i32> %tmp172)
+  %tmp177 = load double, double* %tmp176
+  %tmp178 = add <3 x i32> %ix3, <i32 31, i32 31, i32 31>
+  %tmp179 = load %"class.RWTexture3D<double>", %"class.RWTexture3D<double>"* @"\01?DTex3d@@3V?$RWTexture3D@N@@A"
+  %tmp180 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<double>\22)"(i32 0, %"class.RWTexture3D<double>" %tmp179)
+  %tmp181 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<double>\22)"(i32 14, %dx.types.Handle %tmp180, %dx.types.ResourceProperties { i32 4100, i32 261 }, %"class.RWTexture3D<double>" zeroinitializer)
+  %tmp182 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32 0, %dx.types.Handle %tmp181, <3 x i32> %tmp178)
+  store double %tmp177, double* %tmp182
+
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 32, i32 32>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<float, 3>, 0>"(i32 160, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 33, i32 33>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<float, 3>, 0>"(i32 160, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2
+  ; CHECK: call void @dx.op.textureStoreSample.f32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 0)
+  %tmp183 = add <2 x i32> %ix2, <i32 32, i32 32>
+  %tmp184 = load %"class.RWTexture2DMS<vector<float, 3>, 0>", %"class.RWTexture2DMS<vector<float, 3>, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A"
+  %tmp185 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>" %tmp184)
+  %tmp186 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle %tmp185, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" zeroinitializer)
+  %tmp187 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp186, <2 x i32> %tmp183)
+  %tmp188 = load <3 x float>, <3 x float>* %tmp187
+  %tmp189 = add <2 x i32> %ix2, <i32 33, i32 33>
+  %tmp190 = load %"class.RWTexture2DMS<vector<float, 3>, 0>", %"class.RWTexture2DMS<vector<float, 3>, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A"
+  %tmp191 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>" %tmp190)
+  %tmp192 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle %tmp191, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" zeroinitializer)
+  %tmp193 = call <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp192, <2 x i32> %tmp189)
+  store <3 x float> %tmp188, <3 x float>* %tmp193
+
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 34, i32 34>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<bool, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1
+  ; CHECK: [[bvec:%.*]] = icmp ne <2 x i32> [[pong]], zeroinitializer
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 35, i32 35>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<bool, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[vec:%.*]] = zext <2 x i1> [[bvec]] to <2 x i32>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i32> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i32> [[vec]], i64 1
+  ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[val0]], i32 [[val1]], i32 [[val3]], i32 [[val3]], i8 15, i32 0)
+  %tmp194 = add <2 x i32> %ix2, <i32 34, i32 34>
+  %tmp195 = load %"class.RWTexture2DMS<vector<bool, 2>, 0>", %"class.RWTexture2DMS<vector<bool, 2>, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A"
+  %tmp196 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>" %tmp195)
+  %tmp197 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp196, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" zeroinitializer)
+  %tmp198 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp197, <2 x i32> %tmp194)
+  %tmp199 = load <2 x i32>, <2 x i32>* %tmp198
+  %tmp200 = icmp ne <2 x i32> %tmp199, zeroinitializer
+  %tmp201 = add <2 x i32> %ix2, <i32 35, i32 35>
+  %tmp202 = load %"class.RWTexture2DMS<vector<bool, 2>, 0>", %"class.RWTexture2DMS<vector<bool, 2>, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A"
+  %tmp203 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>" %tmp202)
+  %tmp204 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp203, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" zeroinitializer)
+  %tmp205 = call <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp204, <2 x i32> %tmp201)
+  %tmp206 = zext <2 x i1> %tmp200 to <2 x i32>
+  store <2 x i32> %tmp206, <2 x i32>* %tmp205
+
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 36, i32 36>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3
+  ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0
+  ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 37, i32 37>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1
+  ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32
+  ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32
+  ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32
+  ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32
+  ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32
+  ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32
+  ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15, i32 0)
+
+  %tmp207 = add <2 x i32> %ix2, <i32 36, i32 36>
+  %tmp208 = load %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>", %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A"
+  %tmp209 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" %tmp208)
+  %tmp210 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp209, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" zeroinitializer)
+  %tmp211 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp210, <2 x i32> %tmp207)
+  %tmp212 = load <2 x i64>, <2 x i64>* %tmp211
+  %tmp213 = add <2 x i32> %ix2, <i32 37, i32 37>
+  %tmp214 = load %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>", %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A"
+  %tmp215 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" %tmp214)
+  %tmp216 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp215, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" zeroinitializer)
+  %tmp217 = call <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp216, <2 x i32> %tmp213)
+  store <2 x i64> %tmp212, <2 x i64>* %tmp217
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 38, i32 38>
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<double, 0>"(i32 160, %"class.RWTexture2DMS<double, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 0, i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[dval:%.*]] = call double @dx.op.makeDouble.f64(i32 101, i32 [[val0]], i32 [[val1]])
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 39, i32 39>
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<double, 0>"(i32 160, %"class.RWTexture2DMS<double, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 })
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[dvec:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[dval]])
+  ; CHECK: [[lodbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 0
+  ; CHECK: [[hidbl:%.*]] = extractvalue %dx.types.splitdouble [[dvec]], 1
+  ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[lodbl]], i32 [[hidbl]], i32 [[lodbl]], i32 [[hidbl]], i8 15, i32 0)
+
+  %tmp218 = add <2 x i32> %ix2, <i32 38, i32 38>
+  %tmp219 = load %"class.RWTexture2DMS<double, 0>", %"class.RWTexture2DMS<double, 0>"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A"
+  %tmp220 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>" %tmp219)
+  %tmp221 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle %tmp220, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" zeroinitializer)
+  %tmp222 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp221, <2 x i32> %tmp218)
+  %tmp223 = load double, double* %tmp222
+  %tmp224 = add <2 x i32> %ix2, <i32 39, i32 39>
+  %tmp225 = load %"class.RWTexture2DMS<double, 0>", %"class.RWTexture2DMS<double, 0>"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A"
+  %tmp226 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>" %tmp225)
+  %tmp227 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle %tmp226, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" zeroinitializer)
+  %tmp228 = call double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32 0, %dx.types.Handle %tmp227, <2 x i32> %tmp224)
+  store double %tmp223, double* %tmp228
+
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<float, 3>, 0>"(i32 160, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 40, i32 40>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 66, %dx.types.Handle [[anhdl]], i32 [[ix1]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[ping:%.*]] = insertelement <3 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <3 x float> [[ping]], float [[val1]], i64 1
+  ; CHECK: [[vec:%.*]] = insertelement <3 x float> [[pong]], float [[val2]], i64 2
+  ; CHECK: [[ix:%.*]] = add i32 [[ix1]], 1
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<float, 3>, 0>"(i32 160, %"class.RWTexture2DMS<vector<float, 3>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 777 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 41, i32 41>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <3 x float> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <3 x float> [[vec]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <3 x float> [[vec]], i64 2
+  ; CHECK: call void @dx.op.textureStoreSample.f32(i32 225, %dx.types.Handle %388, i32 %389, i32 %390, i32 undef, float %392, float %393, float %394, float %391, i8 15, i32 %tmp235)
+  %tmp229 = load %"class.RWTexture2DMS<vector<float, 3>, 0>", %"class.RWTexture2DMS<vector<float, 3>, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A"
+  %tmp230 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>" %tmp229)
+  %tmp231 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle %tmp230, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" zeroinitializer)
+  %tmp232 = add <2 x i32> %ix2, <i32 40, i32 40>
+  %tmp233 = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp231, <2 x i32> %tmp232, i32 %ix1)
+  %tmp234 = load <3 x float>, <3 x float>* %tmp233
+  %tmp235 = add i32 %ix1, 1
+  %tmp236 = load %"class.RWTexture2DMS<vector<float, 3>, 0>", %"class.RWTexture2DMS<vector<float, 3>, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A"
+  %tmp237 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<float, 3>, 0>" %tmp236)
+  %tmp238 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32 14, %dx.types.Handle %tmp237, %dx.types.ResourceProperties { i32 4099, i32 777 }, %"class.RWTexture2DMS<vector<float, 3>, 0>" zeroinitializer)
+  %tmp239 = add <2 x i32> %ix2, <i32 41, i32 41>
+  %tmp240 = call <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp238, <2 x i32> %tmp239, i32 %tmp235)
+  store <3 x float> %tmp234, <3 x float>* %tmp240
+
+  ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 2
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<bool, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 42, i32 42>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 [[sax]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i32> undef, i32 [[val0]], i64 0
+  ; CHECK: [[pong:%.*]] = insertelement <2 x i32> [[ping]], i32 [[val1]], i64 1
+  ; CHECK: %tmp248 = icmp ne <2 x i32> %402, zeroinitializer
+  ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 3
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<bool, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<bool, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 43, i32 43>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: %407 = extractelement <2 x i32> %tmp255, i64 0
+  ; CHECK: %408 = extractelement <2 x i32> %tmp255, i64 0
+  ; CHECK: %409 = extractelement <2 x i32> %tmp255, i64 1
+  ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle %404, i32 %405, i32 %406, i32 undef, i32 %408, i32 %409, i32 %407, i32 %407, i8 15, i32 %tmp249)
+  ; CHECK: %tmp255 = zext <2 x i1> %tmp248 to <2 x i32>
+  %tmp241 = add i32 %ix1, 2
+  %tmp242 = load %"class.RWTexture2DMS<vector<bool, 2>, 0>", %"class.RWTexture2DMS<vector<bool, 2>, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A"
+  %tmp243 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>" %tmp242)
+  %tmp244 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp243, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" zeroinitializer)
+  %tmp245 = add <2 x i32> %ix2, <i32 42, i32 42>
+  %tmp246 = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp244, <2 x i32> %tmp245, i32 %tmp241)
+  %tmp247 = load <2 x i32>, <2 x i32>* %tmp246
+  %tmp248 = icmp ne <2 x i32> %tmp247, zeroinitializer
+  %tmp249 = add i32 %ix1, 3
+  %tmp250 = load %"class.RWTexture2DMS<vector<bool, 2>, 0>", %"class.RWTexture2DMS<vector<bool, 2>, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A"
+  %tmp251 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<bool, 2>, 0>" %tmp250)
+  %tmp252 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp251, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<bool, 2>, 0>" zeroinitializer)
+  %tmp253 = add <2 x i32> %ix2, <i32 43, i32 43>
+  %tmp254 = call <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp252, <2 x i32> %tmp253, i32 %tmp249)
+  %tmp255 = zext <2 x i1> %tmp248 to <2 x i32>
+  store <2 x i32> %tmp255, <2 x i32>* %tmp254
+
+  ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 4
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 44, i32 44>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 [[sax]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 3
+  ; CHECK: [[loval:%.*]] = zext i32 [[val0]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val1]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val0:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[loval:%.*]] = zext i32 [[val2]] to i64
+  ; CHECK: [[hival:%.*]] = zext i32 [[val3]] to i64
+  ; CHECK: [[val:%.*]] = shl i64 [[hival]], 32
+  ; CHECK: [[val1:%.*]] = or i64 [[loval]], [[val]]
+  ; CHECK: [[ping:%.*]] = insertelement <2 x i64> undef, i64 [[val0]], i64 0
+  ; CHECK: [[vec:%.*]] = insertelement <2 x i64> [[ping]], i64 [[val1]], i64 1
+  ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 5
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"(i32 160, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 517 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 45, i32 45>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[val3:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val0:%.*]] = extractelement <2 x i64> [[vec]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <2 x i64> [[vec]], i64 1
+  ; CHECK: [[loval0:%.*]] = trunc i64 [[val0]] to i32
+  ; CHECK: [[msk0:%.*]] = lshr i64 [[val0]], 32
+  ; CHECK: [[hival0:%.*]] = trunc i64 [[msk0]] to i32
+  ; CHECK: [[loval1:%.*]] = trunc i64 [[val1]] to i32
+  ; CHECK: [[msk1:%.*]] = lshr i64 [[val1]], 32
+  ; CHECK: [[hival1:%.*]] = trunc i64 [[msk1]] to i32
+  ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle [[anhdl]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 [[loval0]], i32 [[hival0]], i32 [[loval1]], i32 [[hival1]], i8 15, i32 [[sax]])
+  %tmp256 = add i32 %ix1, 4
+  %tmp257 = load %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>", %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A"
+  %tmp258 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" %tmp257)
+  %tmp259 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp258, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" zeroinitializer)
+  %tmp260 = add <2 x i32> %ix2, <i32 44, i32 44>
+  %tmp261 = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp259, <2 x i32> %tmp260, i32 %tmp256)
+  %tmp262 = load <2 x i64>, <2 x i64>* %tmp261
+  %tmp263 = add i32 %ix1, 5
+  %tmp264 = load %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>", %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A"
+  %tmp265 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 0, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" %tmp264)
+  %tmp266 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32 14, %dx.types.Handle %tmp265, %dx.types.ResourceProperties { i32 4099, i32 517 }, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>" zeroinitializer)
+  %tmp267 = add <2 x i32> %ix2, <i32 45, i32 45>
+  %tmp268 = call <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp266, <2 x i32> %tmp267, i32 %tmp263)
+  store <2 x i64> %tmp262, <2 x i64>* %tmp268
+
+  ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 6
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<double, 0>"(i32 160, %"class.RWTexture2DMS<double, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 46, i32 46>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.i32 @dx.op.textureLoad.i32(i32 66, %dx.types.Handle [[anhdl]], i32 [[sax]], i32 [[ix2_0]], i32 [[ix2_1]], i32 undef, i32 undef, i32 undef, i32 undef)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.i32 [[ld]], 1
+  ; CHECK: %447 = call double @dx.op.makeDouble.f64(i32 101, i32 %445, i32 %446)
+  ; CHECK: [[sax:%.*]] = add i32 [[ix1]], 7
+  ; CHECK: [[hdl:%.*]] = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWTexture2DMS<double, 0>"(i32 160, %"class.RWTexture2DMS<double, 0>"
+  ; CHECK: [[anhdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[hdl]], %dx.types.ResourceProperties { i32 4099, i32 261 })
+  ; CHECK: [[ix:%.*]] = add <2 x i32> [[ix2]], <i32 47, i32 47>
+  ; CHECK: [[ix2_0:%.*]] = extractelement <2 x i32> [[ix]], i64 0
+  ; CHECK: [[ix2_1:%.*]] = extractelement <2 x i32> [[ix]], i64 1
+  ; CHECK: %452 = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %447)
+  ; CHECK: %453 = extractvalue %dx.types.splitdouble %452, 0
+  ; CHECK: %454 = extractvalue %dx.types.splitdouble %452, 1
+  ; CHECK: call void @dx.op.textureStoreSample.i32(i32 225, %dx.types.Handle %449, i32 %450, i32 %451, i32 undef, i32 %453, i32 %454, i32 %453, i32 %454, i8 15, i32 %tmp276)
+  %tmp269 = add i32 %ix1, 6
+  %tmp270 = load %"class.RWTexture2DMS<double, 0>", %"class.RWTexture2DMS<double, 0>"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A"
+  %tmp271 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>" %tmp270)
+  %tmp272 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle %tmp271, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" zeroinitializer)
+  %tmp273 = add <2 x i32> %ix2, <i32 46, i32 46>
+  %tmp274 = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp272, <2 x i32> %tmp273, i32 %tmp269)
+  %tmp275 = load double, double* %tmp274
+  %tmp276 = add i32 %ix1, 7
+  %tmp277 = load %"class.RWTexture2DMS<double, 0>", %"class.RWTexture2DMS<double, 0>"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A"
+  %tmp278 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32 0, %"class.RWTexture2DMS<double, 0>" %tmp277)
+  %tmp279 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32 14, %dx.types.Handle %tmp278, %dx.types.ResourceProperties { i32 4099, i32 261 }, %"class.RWTexture2DMS<double, 0>" zeroinitializer)
+  %tmp280 = add <2 x i32> %ix2, <i32 47, i32 47>
+  %tmp281 = call double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32 5, %dx.types.Handle %tmp279, <2 x i32> %tmp280, i32 %tmp276)
+  store double %tmp275, double* %tmp281
+
+
+  ; CHECK: ret void
+  ret void
+}
+
+
+declare <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<float, 3> >\22)"(i32, %"class.RWBuffer<vector<float, 3> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<float, 3> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer<vector<float, 3> >") #1
+declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32, %"class.RWBuffer<vector<bool, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<bool, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer<vector<bool, 2> >") #1
+declare <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32, %"class.RWBuffer<vector<unsigned long long, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<vector<unsigned long long, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer<vector<unsigned long long, 2> >") #1
+declare double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWBuffer<double>\22)"(i32, %"class.RWBuffer<double>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWBuffer<double>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWBuffer<double>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32, %"class.RWTexture1D<vector<float, 3> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<float, 3> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D<vector<float, 3> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32, %"class.RWTexture1D<vector<bool, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<bool, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D<vector<bool, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32, %"class.RWTexture1D<vector<unsigned long long, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<vector<unsigned long long, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D<vector<unsigned long long, 2> >") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture1D<double>\22)"(i32, %"class.RWTexture1D<double>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture1D<double>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture1D<double>") #1
+declare <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32, %"class.RWTexture2D<vector<float, 3> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<float, 3> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D<vector<float, 3> >") #1
+declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32, %"class.RWTexture2D<vector<bool, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<bool, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D<vector<bool, 2> >") #1
+declare <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32, %"class.RWTexture2D<vector<unsigned long long, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<vector<unsigned long long, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D<vector<unsigned long long, 2> >") #1
+declare double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <2 x i32>)"(i32, %dx.types.Handle, <2 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2D<double>\22)"(i32, %"class.RWTexture2D<double>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2D<double>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2D<double>") #1
+declare <3 x float>* @"dx.hl.subscript.[].rn.<3 x float>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32, %"class.RWTexture3D<vector<float, 3> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<float, 3> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D<vector<float, 3> >") #1
+declare <2 x i32>* @"dx.hl.subscript.[].rn.<2 x i32>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32, %"class.RWTexture3D<vector<bool, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<bool, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D<vector<bool, 2> >") #1
+declare <2 x i64>* @"dx.hl.subscript.[].rn.<2 x i64>* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32, %"class.RWTexture3D<vector<unsigned long long, 2> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<vector<unsigned long long, 2> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D<vector<unsigned long long, 2> >") #1
+declare double* @"dx.hl.subscript.[].rn.double* (i32, %dx.types.Handle, <3 x i32>)"(i32, %dx.types.Handle, <3 x i32>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture3D<double>\22)"(i32, %"class.RWTexture3D<double>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture3D<double>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture3D<double>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32, %"class.RWTexture2DMS<vector<float, 3>, 0>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<float, 3>, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS<vector<float, 3>, 0>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32, %"class.RWTexture2DMS<vector<bool, 2>, 0>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<bool, 2>, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS<vector<bool, 2>, 0>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<vector<unsigned long long, 2>, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>") #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWTexture2DMS<double, 0>\22)"(i32, %"class.RWTexture2DMS<double, 0>") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWTexture2DMS<double, 0>\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWTexture2DMS<double, 0>") #1
+declare <3 x float>* @"dx.hl.subscript.[][].rn.<3 x float>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1
+declare <2 x i32>* @"dx.hl.subscript.[][].rn.<2 x i32>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1
+declare <2 x i64>* @"dx.hl.subscript.[][].rn.<2 x i64>* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1
+declare double* @"dx.hl.subscript.[][].rn.double* (i32, %dx.types.Handle, <2 x i32>, i32)"(i32, %dx.types.Handle, <2 x i32>, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!dx.version = !{!3}
+!dx.valver = !{!4}
+!dx.shaderModel = !{!5}
+!dx.typeAnnotations = !{!6}
+!dx.entryPoints = !{!19}
+!dx.fnprops = !{!44}
+!dx.options = !{!45, !46}
+
+!3 = !{i32 1, i32 6}
+!4 = !{i32 1, i32 9}
+!5 = !{!"vs", i32 6, i32 6}
+!6 = !{i32 1, void (i32, <2 x i32>, <3 x i32>)* @main, !7}
+!7 = !{!8, !10, !13, !16}
+!8 = !{i32 1, !9, !9}
+!9 = !{}
+!10 = !{i32 0, !11, !12}
+!11 = !{i32 4, !"IX1", i32 7, i32 5}
+!12 = !{i32 1}
+!13 = !{i32 0, !14, !15}
+!14 = !{i32 4, !"IX2", i32 7, i32 5}
+!15 = !{i32 2}
+!16 = !{i32 0, !17, !18}
+!17 = !{i32 4, !"IX3", i32 7, i32 5}
+!18 = !{i32 3}
+!19 = !{void (i32, <2 x i32>, <3 x i32>)* @main, !"main", null, !20, null}
+!20 = !{null, !21, null, null}
+!21 = !{!22, !24, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43}
+!22 = !{i32 0, %"class.RWBuffer<vector<float, 3> >"* @"\01?FTyBuf@@3V?$RWBuffer@V?$vector@M$02@@@@A", !"FTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !23}
+!23 = !{i32 0, i32 9}
+!24 = !{i32 1, %"class.RWBuffer<vector<bool, 2> >"* @"\01?BTyBuf@@3V?$RWBuffer@V?$vector@_N$01@@@@A", !"BTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !25}
+!25 = !{i32 0, i32 5}
+!26 = !{i32 2, %"class.RWBuffer<vector<unsigned long long, 2> >"* @"\01?LTyBuf@@3V?$RWBuffer@V?$vector@_K$01@@@@A", !"LTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !25}
+!27 = !{i32 3, %"class.RWBuffer<double>"* @"\01?DTyBuf@@3V?$RWBuffer@N@@A", !"DTyBuf", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !25}
+!28 = !{i32 4, %"class.RWTexture1D<vector<float, 3> >"* @"\01?FTex1d@@3V?$RWTexture1D@V?$vector@M$02@@@@A", !"FTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !23}
+!29 = !{i32 5, %"class.RWTexture1D<vector<bool, 2> >"* @"\01?BTex1d@@3V?$RWTexture1D@V?$vector@_N$01@@@@A", !"BTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !25}
+!30 = !{i32 6, %"class.RWTexture1D<vector<unsigned long long, 2> >"* @"\01?LTex1d@@3V?$RWTexture1D@V?$vector@_K$01@@@@A", !"LTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !25}
+!31 = !{i32 7, %"class.RWTexture1D<double>"* @"\01?DTex1d@@3V?$RWTexture1D@N@@A", !"DTex1d", i32 -1, i32 -1, i32 1, i32 1, i1 false, i1 false, i1 false, !25}
+!32 = !{i32 8, %"class.RWTexture2D<vector<float, 3> >"* @"\01?FTex2d@@3V?$RWTexture2D@V?$vector@M$02@@@@A", !"FTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !23}
+!33 = !{i32 9, %"class.RWTexture2D<vector<bool, 2> >"* @"\01?BTex2d@@3V?$RWTexture2D@V?$vector@_N$01@@@@A", !"BTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !25}
+!34 = !{i32 10, %"class.RWTexture2D<vector<unsigned long long, 2> >"* @"\01?LTex2d@@3V?$RWTexture2D@V?$vector@_K$01@@@@A", !"LTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !25}
+!35 = !{i32 11, %"class.RWTexture2D<double>"* @"\01?DTex2d@@3V?$RWTexture2D@N@@A", !"DTex2d", i32 -1, i32 -1, i32 1, i32 2, i1 false, i1 false, i1 false, !25}
+!36 = !{i32 12, %"class.RWTexture3D<vector<float, 3> >"* @"\01?FTex3d@@3V?$RWTexture3D@V?$vector@M$02@@@@A", !"FTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !23}
+!37 = !{i32 13, %"class.RWTexture3D<vector<bool, 2> >"* @"\01?BTex3d@@3V?$RWTexture3D@V?$vector@_N$01@@@@A", !"BTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !25}
+!38 = !{i32 14, %"class.RWTexture3D<vector<unsigned long long, 2> >"* @"\01?LTex3d@@3V?$RWTexture3D@V?$vector@_K$01@@@@A", !"LTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !25}
+!39 = !{i32 15, %"class.RWTexture3D<double>"* @"\01?DTex3d@@3V?$RWTexture3D@N@@A", !"DTex3d", i32 -1, i32 -1, i32 1, i32 4, i1 false, i1 false, i1 false, !25}
+!40 = !{i32 16, %"class.RWTexture2DMS<vector<float, 3>, 0>"* @"\01?FTex2dMs@@3V?$RWTexture2DMS@V?$vector@M$02@@$0A@@@A", !"FTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !23}
+!41 = !{i32 17, %"class.RWTexture2DMS<vector<bool, 2>, 0>"* @"\01?BTex2dMs@@3V?$RWTexture2DMS@V?$vector@_N$01@@$0A@@@A", !"BTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !25}
+!42 = !{i32 18, %"class.RWTexture2DMS<vector<unsigned long long, 2>, 0>"* @"\01?LTex2dMs@@3V?$RWTexture2DMS@V?$vector@_K$01@@$0A@@@A", !"LTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !25}
+!43 = !{i32 19, %"class.RWTexture2DMS<double, 0>"* @"\01?DTex2dMs@@3V?$RWTexture2DMS@N$0A@@@A", !"DTex2dMs", i32 -1, i32 -1, i32 1, i32 3, i1 false, i1 false, i1 false, !25}
+!44 = !{void (i32, <2 x i32>, <3 x i32>)* @main, i32 1}
+!45 = !{i32 64}
+!46 = !{i32 -1}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder.hlsl
new file mode 100644
index 0000000000..08836dfbaf
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder.hlsl
@@ -0,0 +1,37 @@
+// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL
+// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL
+// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST
+
+// AST: |-FunctionDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit used MaybeReorderThread 'void (dx::HitObject)' extern
+// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> HitObject 'dx::HitObject':'dx::HitObject'
+// AST-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 359
+// AST-NEXT: | `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+// AST: |-FunctionDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit used MaybeReorderThread 'void (dx::HitObject, unsigned int, unsigned int)' extern
+// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> HitObject 'dx::HitObject':'dx::HitObject'
+// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> CoherenceHint 'unsigned int'
+// AST-NEXT: | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> NumCoherenceHintBitsFromLSB 'unsigned int'
+// AST-NEXT: | |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 359
+// AST-NEXT: | `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+// AST: `-FunctionDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit used MaybeReorderThread 'void (unsigned int, unsigned int)' extern
+// AST-NEXT:   |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> CoherenceHint 'unsigned int'
+// AST-NEXT:   |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> NumCoherenceHintBitsFromLSB 'unsigned int'
+// AST-NEXT:   |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 359
+// AST-NEXT:   `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %[[NOP:[^ ]+]])
+// FCGL-NEXT: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32)"(i32 359, %dx.types.HitObject* %[[NOP]], i32 241, i32 3)
+// FCGL-NEXT: call void @"dx.hl.op..void (i32, i32, i32)"(i32 359, i32 242, i32 7)
+
+// DXIL:  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[NOP:[^ ]+]], i32 undef, i32 0)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+// DXIL-NEXT:  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[NOP]], i32 241, i32 3)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+// DXIL-NEXT:  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[NOP]], i32 242, i32 7)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+
+[shader("raygeneration")]
+void main() {
+  dx::HitObject hit;
+  dx::MaybeReorderThread(hit);
+  dx::MaybeReorderThread(hit, 0xf1, 3);
+  dx::MaybeReorderThread(0xf2, 7);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl
new file mode 100644
index 0000000000..1e947b2296
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl
@@ -0,0 +1,75 @@
+// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL
+// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL
+// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST
+
+// AST: | |-CXXRecordDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit referenced class HitObject definition
+// AST-NEXT: | | |-FinalAttr {{[^ ]+}} <<invalid sloc>> Implicit final
+// AST-NEXT: | | |-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+// AST-NEXT: | | |-HLSLHitObjectAttr {{[^ ]+}} <<invalid sloc>> Implicit
+// AST-NEXT: | | |-FieldDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit h 'int'
+// AST-NEXT: | | |-CXXConstructorDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> used HitObject 'void ()'
+// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 358
+// AST-NEXT: | | | `-HLSLCXXOverloadAttr {{[^ ]+}} <<invalid sloc>> Implicit
+
+// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> MakeMiss
+// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> class TResult
+// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> class TRayFlags
+// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> class TMissShaderIndex
+// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> class TRay
+// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit MakeMiss 'TResult (TRayFlags, TMissShaderIndex, TRay) const' static
+// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> RayFlags 'TRayFlags'
+// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> MissShaderIndex 'TMissShaderIndex'
+// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> Ray 'TRay'
+// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> used MakeMiss 'dx::HitObject (unsigned int, unsigned int, RayDesc)' static
+// AST-NEXT: | | |   |-TemplateArgument type 'dx::HitObject'
+// AST-NEXT: | | |   |-TemplateArgument type 'unsigned int'
+// AST-NEXT: | | |   |-TemplateArgument type 'unsigned int'
+// AST-NEXT: | | |   |-TemplateArgument type 'RayDesc'
+// AST-NEXT: | | |   |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> MakeMiss 'unsigned int'
+// AST-NEXT: | | |   |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> RayFlags 'unsigned int'
+// AST-NEXT: | | |   |-ParmVarDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> MissShaderIndex 'RayDesc'
+// AST-NEXT: | | |   |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 387
+// AST-NEXT: | | |   `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> MakeNop
+// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> class TResult
+// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> implicit MakeNop 'TResult () const' static
+// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <<invalid sloc>> <invalid sloc> used MakeNop 'dx::HitObject ()' static
+// AST-NEXT: | | |   |-TemplateArgument type 'dx::HitObject'
+// AST-NEXT: | | |   |-HLSLIntrinsicAttr {{[^ ]+}} <<invalid sloc>> Implicit "op" "" 358
+// AST-NEXT: | | |   `-AvailabilityAttr {{[^ ]+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+// FCGL: %{{[^ ]+}} = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %{{[^ ]+}})
+// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %{{[^ ]+}})
+// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32 387, %dx.types.HitObject* %{{[^ ]+}}, i32 0, i32 1, %struct.RayDesc* %{{[^ ]+}})
+// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32 387, %dx.types.HitObject* %{{[^ ]+}}, i32 0, i32 2, %struct.RayDesc* %{{[^ ]+}})
+
+// Expect HitObject_Make* calls with identical parameters to be folded.
+// DXIL:  {{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)  ; HitObject_MakeNop()
+// DXIL-NOT:  {{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop
+// DXIL:  %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 0, i32 1, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0x3FA99999A0000000, float 1.000000e+03)  ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax)
+// DXIL-NOT:  %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 0, i32 1
+// DXIL:  %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 0, i32 2, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0x3FA99999A0000000, float 1.000000e+03)  ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax)
+
+void Use(in dx::HitObject hit) {
+  dx::MaybeReorderThread(hit);
+}
+
+[shader("raygeneration")]
+void main() {
+  dx::HitObject nop;
+  Use(nop);
+
+  dx::HitObject nop2 = dx::HitObject::MakeNop();
+  Use(nop2);
+
+  RayDesc ray = {{0,0,0}, {0,0,1}, 0.05, 1000.0};
+  dx::HitObject miss = dx::HitObject::MakeMiss(0, 1, ray);
+  Use(miss);
+
+  dx::HitObject miss2 = dx::HitObject::MakeMiss(0, 1, ray);
+  Use(miss2);
+
+  dx::HitObject miss3 = dx::HitObject::MakeMiss(0, 2, ray);
+  Use(miss3);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/lit.local.cfg b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/lit.local.cfg
new file mode 100644
index 0000000000..ba86568f9a
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/lit.local.cfg
@@ -0,0 +1 @@
+config.unsupported = 'dxil-1-9' not in config.available_features
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/allocateRayQuery2.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/allocateRayQuery2.hlsl
new file mode 100644
index 0000000000..de79a2f481
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/allocateRayQuery2.hlsl
@@ -0,0 +1,23 @@
+// REQUIRES: dxil-1-9
+// RUN: %dxc -T lib_6_9 %s | FileCheck %s 
+// RUN: %dxc -T lib_6_9 -fcgl %s | FileCheck -check-prefix=FCGL %s 
+
+// RUN: %dxc -T vs_6_9 %s | FileCheck %s 
+// RUN: %dxc -T vs_6_9 -fcgl %s | FileCheck -check-prefix=FCGL %s 
+
+
+RaytracingAccelerationStructure RTAS;
+[shader("vertex")]
+void main(RayDesc rayDesc : RAYDESC) {
+
+  // CHECK: call i32 @dx.op.allocateRayQuery2(i32 258, i32 1024, i32 1)
+  // FCGL: call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1024, i32 1)
+  RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS> rayQuery1;
+
+  rayQuery1.TraceRayInline(RTAS, RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc);
+
+  // CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 1)
+  // FCGL: call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1, i32 0)
+  RayQuery<RAY_FLAG_FORCE_OPAQUE> rayQuery2;
+  rayQuery2.TraceRayInline(RTAS, 0, 2, rayDesc);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl
new file mode 100644
index 0000000000..8bc7b9e73d
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl
@@ -0,0 +1,322 @@
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float     -DNUM=5 %s | FileCheck %s -check-prefixes=CHECK,F5
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=bool      -DNUM=7 %s | FileCheck %s -check-prefixes=CHECK,B7
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=uint64_t  -DNUM=9 %s | FileCheck %s -check-prefixes=CHECK,L9
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=double    -DNUM=17 %s | FileCheck %s -check-prefixes=CHECK,D17
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float16_t -DNUM=256 -enable-16bit-types %s | FileCheck %s -check-prefixes=CHECK,H256
+// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=int16_t   -DNUM=1024 -enable-16bit-types %s | FileCheck %s -check-prefixes=CHECK,S1024
+
+// A test to verify that declarations of longvecs are permitted in all the accepted places.
+// Only tests for acceptance, most codegen is ignored for now.
+
+// CHECK: %struct.LongVec = type { <4 x float>, <[[NUM:[0-9]*]] x [[STY:[a-z0-9]*]]> }
+struct LongVec {
+  float4 f;
+  vector<TYPE,NUM> vec;
+};
+
+struct LongVecSub : LongVec {
+  int3 is;
+};
+
+template<int N>
+struct LongVecTpl {
+  float4 f;
+  vector<float,N> vec;
+};
+
+// Just some dummies to capture the types and mangles.
+// CHECK: @"\01?dummy@@3[[MNG:F|M|N|_N|_K|\$f16@]]A" = external addrspace(3) global [[STY]]
+groupshared TYPE dummy;
+
+// Use the first groupshared to establish mangles and sizes
+// F5-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:M]]$[[VS:04]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]>
+// B7-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:_N]]$[[VS:06]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]>
+// L9-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:_K]]$[[VS:08]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]>
+// D17-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:N]]$[[VS:0BB@]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]>
+// H256-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:\$f16@]]$[[VS:0BAA@]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]>
+// S1024-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:F]]$[[VS:0EAA@]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]>
+groupshared vector<TYPE, NUM> gs_vec;
+
+// CHECK-DAG: @"\01?gs_vec_arr@@3PAV?$vector@[[MNG]]$[[VS]]@@A" = external addrspace(3) global [10 x <[[NUM]] x [[STY]]>]
+groupshared vector<TYPE, NUM> gs_vec_arr[10];
+// CHECK-DAG: @"\01?gs_vec_rec@@3ULongVec@@A" = external addrspace(3) global %struct.LongVec
+groupshared LongVec gs_vec_rec;
+// CHECK-DAG: @"\01?gs_vec_sub@@3ULongVecSub@@A" = external addrspace(3) global %struct.LongVecSub
+groupshared LongVecSub gs_vec_sub;
+// CHECK-DAG: @"\01?gs_vec_tpl@@3U?$LongVecTpl@$[[VS]]@@A" = external addrspace(3) global %"struct.LongVecTpl<[[NUM]]>"
+groupshared LongVecTpl<NUM> gs_vec_tpl;
+
+// CHECK-DAG: @static_vec = internal global <[[NUM]] x [[STY]]>
+static vector<TYPE, NUM> static_vec;
+// CHECK-DAG: @static_vec_arr = internal global [10 x <[[NUM]] x [[STY]]>] zeroinitializer
+static vector<TYPE, NUM> static_vec_arr[10];
+// CHECK-DAG: @static_vec_rec = internal global %struct.LongVec
+static LongVec static_vec_rec;
+// CHECK-DAG: @static_vec_sub = internal global %struct.LongVecSub
+static LongVecSub static_vec_sub;
+// CHECK-DAG: @static_vec_tpl = internal global %"struct.LongVecTpl<[[NUM]]>"
+static LongVecTpl<NUM> static_vec_tpl;
+
+// CHECK: define [[RTY:[a-z0-9]*]] @"\01?getVal@@YA[[MNG]][[MNG]]@Z"([[RTY]] {{.*}}%t)
+export TYPE getVal(TYPE t) {TYPE ret = dummy; dummy = t; return ret;}
+
+// CHECK: define <[[NUM]] x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_param_passthru
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@V1@@Z"(<[[NUM]] x [[RTY]]> %vec1)
+// CHECK:   ret <[[NUM]] x [[RTY]]>
+export vector<TYPE, NUM> lv_param_passthru(vector<TYPE, NUM> vec1) {
+  return vec1;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_arr_passthru
+// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$[[VS]]@@Y09V1@@Z"([10 x <[[NUM]] x [[STY]]>]* noalias sret %agg.result, [10 x <[[NUM]] x [[STY]]>]* %vec)
+// CHECK: ret void
+export vector<TYPE, NUM> lv_param_arr_passthru(vector<TYPE, NUM> vec[10])[10] {
+  return vec;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_rec_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec)
+// CHECK: memcpy
+// CHECK:   ret void
+export LongVec lv_param_rec_passthru(LongVec vec) {
+  return vec;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_sub_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec)
+// CHECK: memcpy
+// CHECK:   ret void
+export LongVec lv_param_sub_passthru(LongVec vec) {
+  return vec;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_tpl_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec)
+// CHECK: memcpy
+// CHECK:   ret void
+export LongVec lv_param_tpl_passthru(LongVec vec) {
+  return vec;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_in_out
+// CHECK-SAME: @@YAXV?$vector@[[MNG]]$[[VS]]@@AIAV1@@Z"(<[[NUM]] x [[RTY]]> %vec1, <[[NUM]] x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2)
+// CHECK:   store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* %vec2, align 4
+// CHECK:   ret void
+export void lv_param_in_out(in vector<TYPE, NUM> vec1, out vector<TYPE, NUM> vec2) {
+  vec2 = vec1;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_in_out_rec@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2)
+// CHECK: memcpy
+// CHECK:   ret void
+export void lv_param_in_out_rec(in LongVec vec1, out LongVec vec2) {
+  vec2 = vec1;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_in_out_sub@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2)
+// CHECK: memcpy
+// CHECK:   ret void
+export void lv_param_in_out_sub(in LongVec vec1, out LongVec vec2) {
+  vec2 = vec1;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_in_out_tpl@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2)
+// CHECK: memcpy
+// CHECK:   ret void
+export void lv_param_in_out_tpl(in LongVec vec1, out LongVec vec2) {
+  vec2 = vec1;
+}
+
+
+// CHECK-LABEL: define void @"\01?lv_param_inout
+// CHECK-SAME: @@YAXAIAV?$vector@[[MNG]]$[[VS]]@@0@Z"(<[[NUM]] x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec1, <[[NUM]] x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2)
+// CHECK:   load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]>* %vec1, align 4
+// CHECK:   load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]>* %vec2, align 4
+// CHECK:   store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* %vec1, align 4
+// CHECK:   store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* %vec2, align 4
+// CHECK:   ret void
+export void lv_param_inout(inout vector<TYPE, NUM> vec1, inout vector<TYPE, NUM> vec2) {
+  vector<TYPE, NUM> tmp = vec1;
+  vec1 = vec2;
+  vec2 = tmp;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_inout_rec@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2)
+// CHECK: memcpy
+// CHECK:   ret void
+export void lv_param_inout_rec(inout LongVec vec1, inout LongVec vec2) {
+  LongVec tmp = vec1;
+  vec1 = vec2;
+  vec2 = tmp;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_inout_sub@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2)
+// CHECK: memcpy
+// CHECK:   ret void
+export void lv_param_inout_sub(inout LongVec vec1, inout LongVec vec2) {
+  LongVec tmp = vec1;
+  vec1 = vec2;
+  vec2 = tmp;
+}
+
+// CHECK-LABEL: define void @"\01?lv_param_inout_tpl@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2)
+// CHECK: memcpy
+// CHECK:   ret void
+export void lv_param_inout_tpl(inout LongVec vec1, inout LongVec vec2) {
+  LongVec tmp = vec1;
+  vec1 = vec2;
+  vec2 = tmp;
+}
+
+// CHECK-LABEL: define void @"\01?lv_global_assign
+// CHECK-SAME: @@YAXV?$vector@[[MNG]]$[[VS]]@@Y09V1@ULongVec@@ULongVecSub@@U?$LongVecTpl@$[[VS]]@@@Z"(<[[NUM]] x [[RTY]]> %vec, [10 x <[[NUM]] x [[STY]]>]* %arr, %struct.LongVec* %rec, %struct.LongVecSub* %sub, %"struct.LongVecTpl<[[NUM]]>"* %tpl)
+// CHECK:   store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* @static_vec
+// CHECK:   ret void
+export void lv_global_assign(vector<TYPE, NUM> vec, vector<TYPE, NUM> arr[10],
+                             LongVec rec, LongVecSub sub, LongVecTpl<NUM> tpl) {
+  static_vec = vec;
+  static_vec_arr = arr;
+  static_vec_rec = rec;
+  static_vec_sub = sub;
+  static_vec_tpl = tpl;
+}
+
+// CHECK-LABEL: define void @"\01?lv_gs_assign
+// CHECK-SAME: @@YAXV?$vector@[[MNG]]$[[VS]]@@Y09V1@ULongVec@@ULongVecSub@@U?$LongVecTpl@$[[VS]]@@@Z"(<[[NUM]] x [[RTY]]> %vec, [10 x <[[NUM]] x [[STY]]>]* %arr, %struct.LongVec* %rec, %struct.LongVecSub* %sub, %"struct.LongVecTpl<[[NUM]]>"* %tpl)
+// CHECK:   store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$[[VS]]@@A"
+// CHECK:   ret void
+export void lv_gs_assign(vector<TYPE, NUM> vec, vector<TYPE, NUM> arr[10],
+                         LongVec rec, LongVecSub sub, LongVecTpl<NUM> tpl) {
+  gs_vec = vec;
+  gs_vec_arr = arr;
+  gs_vec_rec = sub;
+  gs_vec_tpl = tpl;
+}
+
+// CHECK: define <[[NUM]] x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_global_ret
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@XZ"()
+// CHECK:   load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]>* @static_vec
+// CHECK:   ret <[[NUM]] x [[RTY]]>
+export vector<TYPE, NUM> lv_global_ret() {
+  return static_vec;
+}
+
+// CHECK-LABEL: define void @"\01?lv_global_arr_ret
+// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$[[VS]]@@XZ"([10 x <[[NUM]] x [[STY]]>]* noalias sret %agg.result)
+// CHECK: ret void
+export vector<TYPE, NUM> lv_global_arr_ret()[10] {
+  return static_vec_arr;
+}
+
+// CHECK-LABEL: define void @"\01?lv_global_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result)
+// CHECK: memcpy
+// CHECK:   ret void
+export LongVec lv_global_rec_ret() {
+  return static_vec_rec;
+}
+
+// CHECK-LABEL: define void @"\01?lv_global_sub_ret@@YA?AULongVecSub@@XZ"(%struct.LongVecSub* noalias sret %agg.result)
+// CHECK: memcpy
+// CHECK:   ret void
+export LongVecSub lv_global_sub_ret() {
+  return static_vec_sub;
+}
+
+// CHECK-LABEL: define void @"\01?lv_global_tpl_ret
+// CHECK-SAME: @@YA?AU?$LongVecTpl@$[[VS]]@@XZ"(%"struct.LongVecTpl<[[NUM]]>"* noalias sret %agg.result)
+// CHECK: memcpy
+// CHECK:   ret void
+export LongVecTpl<NUM> lv_global_tpl_ret() {
+  return static_vec_tpl;
+}
+
+// CHECK: define <[[NUM]] x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_gs_ret
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@XZ"()
+// CHECK:   load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$[[VS]]@@A"
+// CHECK:   ret <[[NUM]] x [[RTY]]>
+export vector<TYPE, NUM> lv_gs_ret() {
+  return gs_vec;
+}
+
+// CHECK-LABEL: define void @"\01?lv_gs_arr_ret
+// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$[[VS]]@@XZ"([10 x <[[NUM]] x [[STY]]>]* noalias sret %agg.result)
+// CHECK: ret void
+export vector<TYPE, NUM> lv_gs_arr_ret()[10] {
+  return gs_vec_arr;
+}
+
+// CHECK-LABEL: define void @"\01?lv_gs_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result)
+// CHECK: memcpy
+// CHECK:   ret void
+export LongVec lv_gs_rec_ret() {
+  return gs_vec_rec;
+}
+
+// CHECK-LABEL: define void @"\01?lv_gs_sub_ret@@YA?AULongVecSub@@XZ"(%struct.LongVecSub* noalias sret %agg.result)
+// CHECK: memcpy
+// CHECK:   ret void
+export LongVecSub lv_gs_sub_ret() {
+  return gs_vec_sub;
+}
+
+// CHECK-LABEL: define void @"\01?lv_gs_tpl_ret
+// CHECK-SAME: @@YA?AU?$LongVecTpl@$[[VS]]@@XZ"(%"struct.LongVecTpl<[[NUM]]>"* noalias sret %agg.result)
+// CHECK: memcpy
+// CHECK:   ret void
+export LongVecTpl<NUM> lv_gs_tpl_ret() {
+  return gs_vec_tpl;
+}
+
+// CHECK: define <[[NUM]] x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_splat
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@[[MNG]]@Z"([[RTY]] {{.*}}%scalar)
+// CHECK:   ret <[[NUM]] x [[RTY]]>
+export vector<TYPE,NUM> lv_splat(TYPE scalar) {
+  vector<TYPE,NUM> ret = scalar;
+  return ret;
+}
+
+// CHECK: define <6 x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_initlist
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@XZ"()
+// CHECK:   ret <6 x [[RTY]]>
+export vector<TYPE, 6> lv_initlist() {
+  vector<TYPE, 6> ret = {1, 2, 3, 4, 5, 6};
+  return ret;
+}
+
+// CHECK: define <6 x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_initlist_vec
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@V?$vector@[[MNG]]$02@@@Z"(<3 x [[RTY]]> %vec)
+// CHECK:   ret <6 x [[RTY]]>
+export vector<TYPE, 6> lv_initlist_vec(vector<TYPE, 3> vec) {
+  vector<TYPE, 6> ret = {vec, 4.0, 5.0, 6.0};
+  return ret;
+}
+
+// CHECK: define <6 x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_vec_vec
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@V?$vector@[[MNG]]$02@@0@Z"(<3 x [[RTY]]> %vec1, <3 x [[RTY]]> %vec2)
+// CHECK:   ret <6 x [[RTY]]>
+export vector<TYPE, 6> lv_vec_vec(vector<TYPE, 3> vec1, vector<TYPE, 3> vec2) {
+  vector<TYPE, 6> ret = {vec1, vec2};
+  return ret;
+}
+
+// CHECK: define <[[NUM]] x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_array_cast
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@Y[[VS]][[MNG]]@Z"({{\[}}[[NUM]] x [[STY]]]* %arr)
+// CHECK:   ret <[[NUM]] x [[RTY]]>
+export vector<TYPE, NUM> lv_array_cast(TYPE arr[NUM]) {
+  vector<TYPE, NUM> ret = (vector<TYPE,NUM>)arr;
+  return ret;
+}
+
+// CHECK: define <6 x [[RTY]]>
+// CHECK-LABEL: @"\01?lv_ctor
+// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@[[MNG]]@Z"([[RTY]] {{.*}}%s)
+// CHECK:  ret <6 x [[RTY]]>
+export vector<TYPE, 6> lv_ctor(TYPE s) {
+  vector<TYPE, 6> ret = vector<TYPE,6>(1.0, 2.0, 3.0, 4.0, 5.0, s);
+  return ret;
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-field-di.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-field-di.hlsl
new file mode 100644
index 0000000000..935ec3cc13
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-field-di.hlsl
@@ -0,0 +1,33 @@
+// RUN: %dxc -Zi -Qembed_debug -T lib_6_9 %s -DNUM=8 | FileCheck %s  --check-prefix=CHECK-LONG
+// RUN: %dxc -Zi -Qembed_debug -T lib_6_9 %s -DNUM=4 | FileCheck %s  --check-prefix=CHECK-SHORT
+
+// Test debug info for short and long vector types
+
+RWByteAddressBuffer buf;
+
+export vector<float, NUM> lv_global_arr_ret() {
+  vector<float, NUM> d = buf.Load<vector<float, NUM> >(0);
+  return d;
+}
+
+// CHECK-LONG:  ![[TYDI:[^ ]+]] = !DICompositeType(tag: DW_TAG_class_type, name: "vector<float, 8>", file: !{{[^ ]+}}, size: 256, align: 32, elements: ![[ELEMDI:[^ ]+]],
+// CHECK-LONG:  ![[ELEMDI]] = !{![[C0:[^ ]+]], ![[C1:[^ ]+]], ![[C2:[^ ]+]], ![[C3:[^ ]+]], ![[C4:[^ ]+]], ![[C5:[^ ]+]], ![[C6:[^ ]+]], ![[C7:[^ ]+]]}
+// CHECK-LONG:  ![[C0]] = !DIDerivedType(tag: DW_TAG_member, name: "c0", scope: !{{[^ ]+}} file: !{{[^ ]+}}, baseType: ![[BASETY:[^ ]+]], size: 32, align: 32, flags: DIFlagPublic)
+// CHECK-LONG:  ![[BASETY]] = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+// CHECK-LONG:  ![[C1]] = !DIDerivedType(tag: DW_TAG_member, name: "c1", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 32, flags: DIFlagPublic)
+// CHECK-LONG:  ![[C2]] = !DIDerivedType(tag: DW_TAG_member, name: "c2", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 64, flags: DIFlagPublic)
+// CHECK-LONG:  ![[C3]] = !DIDerivedType(tag: DW_TAG_member, name: "c3", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 96, flags: DIFlagPublic)
+// CHECK-LONG:  ![[C4]] = !DIDerivedType(tag: DW_TAG_member, name: "c4", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 128, flags: DIFlagPublic)
+// CHECK-LONG:  ![[C5]] = !DIDerivedType(tag: DW_TAG_member, name: "c5", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 160, flags: DIFlagPublic)
+// CHECK-LONG:  ![[C6]] = !DIDerivedType(tag: DW_TAG_member, name: "c6", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 192, flags: DIFlagPublic)
+// CHECK-LONG:  ![[C7]] = !DIDerivedType(tag: DW_TAG_member, name: "c7", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 224, flags: DIFlagPublic)
+// CHECK-LONG:  !{{[^ ]+}} = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, line: 9, type: ![[TYDI]])
+
+// CHECK-SHORT:  ![[TYDI:[^ ]+]] = !DICompositeType(tag: DW_TAG_class_type, name: "vector<float, 4>", file: !{{[^ ]+}}, size: 128, align: 32, elements: ![[ELEMDI:[^ ]+]],
+// CHECK-SHORT:  ![[ELEMDI]] = !{![[X:[^ ]+]], ![[Y:[^ ]+]], ![[Z:[^ ]+]], ![[W:[^ ]+]]}
+// CHECK-SHORT:  ![[X]] = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY:[^ ]+]], size: 32, align: 32, flags: DIFlagPublic)
+// CHECK-SHORT:  ![[BASETY]] = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+// CHECK-SHORT:  ![[Y]] = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 32, flags: DIFlagPublic)
+// CHECK-SHORT:  ![[Z]] = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 64, flags: DIFlagPublic)
+// CHECK-SHORT:  ![[W]] = !DIDerivedType(tag: DW_TAG_member, name: "w", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, baseType: ![[BASETY]], size: 32, align: 32, offset: 96, flags: DIFlagPublic)
+// CHECK-SHORT:  !{{[^ ]+}} = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", scope: !{{[^ ]+}}, file: !{{[^ ]+}}, line: 9, type: ![[TYDI]])
\ No newline at end of file
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl
new file mode 100644
index 0000000000..0b7f0d6b2f
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl
@@ -0,0 +1,394 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=2   %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=7   %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=125 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=256 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=1024 %s | FileCheck %s
+
+// Test vector-enabled non-trivial intrinsics that take parameters of various types.
+
+RWByteAddressBuffer buf;
+RWByteAddressBuffer ibuf;
+
+// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16>
+// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32>
+// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64>
+
+// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half>
+// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float>
+// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double>
+
+[numthreads(8,1,1)]
+void main() {
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle {{%.*}}, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512
+  // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 1024
+  // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  vector<float16_t, NUM> hVec1 = buf.Load<vector<float16_t, NUM> >(0);
+  vector<float16_t, NUM> hVec2 = buf.Load<vector<float16_t, NUM> >(512);
+  vector<float16_t, NUM> hVec3 = buf.Load<vector<float16_t, NUM> >(1024);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048
+  // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560
+  // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 3072
+  // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  vector<float, NUM> fVec1 = buf.Load<vector<float, NUM> >(2048);
+  vector<float, NUM> fVec2 = buf.Load<vector<float, NUM> >(2560);
+  vector<float, NUM> fVec3 = buf.Load<vector<float, NUM> >(3072);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096
+  // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608
+  // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 5120
+  // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  vector<double, NUM> dVec1 = buf.Load<vector<double, NUM> >(4096);
+  vector<double, NUM> dVec2 = buf.Load<vector<double, NUM> >(4608);
+  vector<double, NUM> dVec3 = buf.Load<vector<double, NUM> >(5120);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle {{%.*}}, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512
+  // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024
+  // CHECK: [[svec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<int16_t, NUM> sVec1 = ibuf.Load<vector<int16_t, NUM> >(0);
+  vector<int16_t, NUM> sVec2 = ibuf.Load<vector<int16_t, NUM> >(512);
+  vector<int16_t, NUM> sVec3 = ibuf.Load<vector<int16_t, NUM> >(1024);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1025
+  // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536
+  // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 2048
+  // CHECK: [[usvec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<uint16_t, NUM> usVec1 = ibuf.Load<vector<uint16_t, NUM> >(1025);
+  vector<uint16_t, NUM> usVec2 = ibuf.Load<vector<uint16_t, NUM> >(1536);
+  vector<uint16_t, NUM> usVec3 = ibuf.Load<vector<uint16_t, NUM> >(2048);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2049
+  // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560
+  // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072
+  // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<int, NUM> iVec1 = ibuf.Load<vector<int, NUM> >(2049);
+  vector<int, NUM> iVec2 = ibuf.Load<vector<int, NUM> >(2560);
+  vector<int, NUM> iVec3 = ibuf.Load<vector<int, NUM> >(3072);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3073
+  // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584
+  // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 4096
+  // CHECK: [[uivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<uint, NUM> uiVec1 = ibuf.Load<vector<uint, NUM> >(3073);
+  vector<uint, NUM> uiVec2 = ibuf.Load<vector<uint, NUM> >(3584);
+  vector<uint, NUM> uiVec3 = ibuf.Load<vector<uint, NUM> >(4096);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4097
+  // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608
+  // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120
+  // CHECK: [[lvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<int64_t, NUM> lVec1 = ibuf.Load<vector<int64_t, NUM> >(4097);
+  vector<int64_t, NUM> lVec2 = ibuf.Load<vector<int64_t, NUM> >(4608);
+  vector<int64_t, NUM> lVec3 = ibuf.Load<vector<int64_t, NUM> >(5120);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5121
+  // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632
+  // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 6144
+  // CHECK: [[ulvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<uint64_t, NUM> ulVec1 = ibuf.Load<vector<uint64_t, NUM> >(5121);
+  vector<uint64_t, NUM> ulVec2 = ibuf.Load<vector<uint64_t, NUM> >(5632);
+  vector<uint64_t, NUM> ulVec3 = ibuf.Load<vector<uint64_t, NUM> >(6144);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 35, <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]])  ; FMax(a,b)
+  // CHECK: call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 36, <[[NUM]] x half> [[tmp]], <[[NUM]] x half> [[hvec3]])  ; FMin(a,b)
+  vector<float16_t, NUM> hRes = clamp(hVec1, hVec2, hVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 35, <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]])  ; FMax(a,b)
+  // CHECK: call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 36, <[[NUM]] x float> [[tmp]], <[[NUM]] x float> [[fvec3]])  ; FMin(a,b)
+  vector<float, NUM> fRes = clamp(fVec1, fVec2, fVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 35, <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]])  ; FMax(a,b)
+  // CHECK: call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 36, <[[NUM]] x double> [[tmp]], <[[NUM]] x double> [[dvec3]])  ; FMin(a,b)
+  vector<double, NUM> dRes = clamp(dVec1, dVec2, dVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 37, <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]])  ; IMax(a,b)
+  // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 38, <[[NUM]] x i16> [[tmp]], <[[NUM]] x i16> [[svec3]])  ; IMin(a,b)
+  vector<int16_t, NUM> sRes = clamp(sVec1, sVec2, sVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 39, <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]])  ; UMax(a,b)
+  // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 40, <[[NUM]] x i16> [[tmp]], <[[NUM]] x i16> [[usvec3]])  ; UMin(a,b)
+  vector<uint16_t, NUM> usRes = clamp(usVec1, usVec2, usVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 37, <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]])  ; IMax(a,b)
+  // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 38, <[[NUM]] x i32> [[tmp]], <[[NUM]] x i32> [[ivec3]])  ; IMin(a,b)
+  vector<int, NUM> iRes = clamp(iVec1, iVec2, iVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 39, <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]])  ; UMax(a,b)
+  // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 40, <[[NUM]] x i32> [[tmp]], <[[NUM]] x i32> [[uivec3]])  ; UMin(a,b)
+  vector<uint, NUM> uiRes = clamp(uiVec1, uiVec2, uiVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 37, <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]])  ; IMax(a,b)
+  // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 38, <[[NUM]] x i64> [[tmp]], <[[NUM]] x i64> [[lvec3]])  ; IMin(a,b)
+  vector<int64_t, NUM> lRes = clamp(lVec1, lVec2, lVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 39, <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]])  ; UMax(a,b)
+  // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 40, <[[NUM]] x i64> [[tmp]], <[[NUM]] x i64> [[ulvec3]])  ; UMin(a,b)
+  vector<uint64_t, NUM> ulRes = clamp(ulVec1, ulVec2, ulVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec2]], [[hvec1]]
+  // CHECK: select <[[NUM]] x i1> [[tmp]], <[[NUM]] x half> zeroinitializer, <[[NUM]] x half> <half 0xH3C00
+  hRes += step(hVec1, hVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = fcmp fast olt <[[NUM]] x float> [[fvec2]], [[fvec1]]
+  // CHECK: select <[[NUM]] x i1> [[tmp]], <[[NUM]] x float> zeroinitializer, <[[NUM]] x float> <float 1
+  fRes += step(fVec1, fVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = fmul fast <[[NUM]] x half> [[hvec1]], <half 0x
+  // CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 21, <[[NUM]] x half> [[tmp]])  ; Exp(value)
+  hRes += exp(hVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = fmul fast <[[NUM]] x float> [[fvec1]], <float 0x
+  // CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[tmp]])  ; Exp(value)
+  fRes += exp(fVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 23, <[[NUM]] x half> [[hvec1]])  ; Log(value)
+  // CHECK: fmul fast <[[NUM]] x half> [[tmp]], <half 0xH398C
+  hRes += log(hVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 23, <[[NUM]] x float> [[fvec1]])  ; Log(value)
+  // CHECK: fmul fast <[[NUM]] x float> [[tmp]], <float 0x3FE62E4300000000
+  fRes += log(fVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x half> [[hvec2]], [[hvec1]]
+  // CHECK: [[xsub:%.*]] = fsub fast <[[NUM]] x half> [[hvec3]], [[hvec1]]
+  // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x half> [[xsub]], [[sub]]
+  // CHECK: [[sat:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 7, <[[NUM]] x half> [[div]])  ; Saturate(value)
+  // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x half> [[sat]], <half 0xH4000,
+  // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x half> <half 0xH4200, {{.*}}>, [[mul]]
+  // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x half> [[sat]], [[sat]]
+  // CHECK: fmul fast <[[NUM]] x half> [[mul]], [[sub]]
+  hRes += smoothstep(hVec1, hVec2, hVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x float> [[fvec2]], [[fvec1]]
+  // CHECK: [[xsub:%.*]] = fsub fast <[[NUM]] x float> [[fvec3]], [[fvec1]]
+  // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x float> [[xsub]], [[sub]]
+  // CHECK: [[sat:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 7, <[[NUM]] x float> [[div]])  ; Saturate(value)
+  // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x float> [[sat]], <float 2.000000e+00,
+  // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x float> <float 3.000000e+00, {{.*}}>, [[mul]]
+  // CHECK: [[mul:%.*]] = fmul fast <[[NUM]] x float> [[sat]], [[sat]]
+  // CHECK: fmul fast <[[NUM]] x float> [[mul]], [[sub]]
+  fRes += smoothstep(fVec1, fVec2, fVec3);
+
+  // Intrinsics that expand into llvm ops.
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: fmul fast <[[NUM]] x half> [[hvec2]], <half 0xH5329
+  hRes += degrees(hVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: fmul fast <[[NUM]] x float> [[fvec2]], <float 0x404CA5DC20000000
+  fRes += degrees(fVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: fmul fast <[[NUM]] x half> [[hvec3]], <half 0xH2478
+  hRes += radians(hVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: fmul fast <[[NUM]] x float> [[fvec3]], <float 0x3F91DF46A0000000
+  fRes += radians(fVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[cmp:%.*]] = fcmp fast une <[[NUM]] x float> [[fvec1]], zeroinitializer
+  // CHECK: [[f2i:%.*]] = bitcast <[[NUM]] x float> [[fvec1]] to <[[NUM]] x i32>
+  // CHECK: [[and:%.*]] = and <[[NUM]] x i32> [[f2i]], <i32 2139095040
+  // CHECK: [[add:%.*]] = add nsw <[[NUM]] x i32> [[and]], <i32 -1056964608
+  // CHECK: [[shr:%.*]] = ashr <[[NUM]] x i32> [[add]], <i32 23
+  // CHECK: [[i2f:%.*]] = sitofp <[[NUM]] x i32> [[shr]] to <[[NUM]] x float>
+  // CHECK: [[sel:%.*]] = select <[[NUM]] x i1> [[cmp]], <[[NUM]] x float> [[i2f]], <[[NUM]] x float> zeroinitializer
+  // CHECK: [[and:%.*]] = and <[[NUM]] x i32> [[f2i]], <i32 8388607
+  // CHECK: or <[[NUM]] x i32> [[and]], <i32 1056964608
+  vector<float, NUM> exp = fVec3;
+  fRes += frexp(fVec1, exp);
+  fRes += exp;
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = fsub fast <[[NUM]] x half> [[hvec3]], [[hvec2]]
+  // CHECK: fmul fast <[[NUM]] x half> [[tmp]], [[hvec1]]
+  hRes += lerp(hVec2, hVec3, hVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = fsub fast <[[NUM]] x float> [[fvec3]], [[fvec2]]
+  // CHECK: fmul fast <[[NUM]] x float> [[tmp]], [[fvec1]]
+  fRes += lerp(fVec2, fVec3, fVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: fdiv fast <[[NUM]] x half> <half 0xH3C00, {{.*}}>, [[hvec1]]
+  hRes += rcp(hVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: fdiv fast <[[NUM]] x float> <float 1.000000e+00, {{.*}}>, [[fvec1]]
+  fRes += rcp(fVec1);
+
+  vector<uint, NUM> signs = 1;
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x half> [[hvec1]], zeroinitializer
+  // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec1]], zeroinitializer
+  // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32>
+  // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32>
+  // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]]
+  signs *= sign(hVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x float> [[fvec1]], zeroinitializer
+  // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x float> [[fvec1]], zeroinitializer
+  // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32>
+  // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32>
+  // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]]
+  signs *= sign(fVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[gt:%.*]] = fcmp fast ogt <[[NUM]] x double> [[dvec1]], zeroinitializer
+  // CHECK: [[lt:%.*]] = fcmp fast olt <[[NUM]] x double> [[dvec1]], zeroinitializer
+  // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32>
+  // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32>
+  // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]]
+  signs *= sign(dVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i16> [[svec2]], zeroinitializer
+  // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i16> [[svec2]], zeroinitializer
+  // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32>
+  // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32>
+  // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]]
+  signs *= sign(sVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i16> [[usvec2]], zeroinitializer
+  // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32>
+  signs *= sign(usVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i32> [[ivec2]], zeroinitializer
+  // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i32> [[ivec2]], zeroinitializer
+  // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32>
+  // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32>
+  // CHECK: [[sub:%.*]] = sub nsw <[[NUM]] x i32> [[igt]], [[ilt]]
+  signs *= sign(iVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i32> [[uivec2]], zeroinitializer
+  // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32>
+  signs *= sign(uiVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[gt:%.*]] = icmp sgt <[[NUM]] x i64> [[lvec2]], zeroinitializer
+  // CHECK: [[lt:%.*]] = icmp slt <[[NUM]] x i64> [[lvec2]], zeroinitializer
+  // CHECK: [[igt:%.*]] = zext <[[NUM]] x i1> [[gt]] to <[[NUM]] x i32>
+  // CHECK: [[ilt:%.*]] = zext <[[NUM]] x i1> [[lt]] to <[[NUM]] x i32>
+  // CHECK: sub nsw <[[NUM]] x i32> [[igt]], [[ilt]]
+  signs *= sign(lVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[cmp:%.*]] = icmp ne <[[NUM]] x i64> [[ulvec2]], zeroinitializer
+  // CHECK: zext <[[NUM]] x i1> [[cmp]] to <[[NUM]] x i32>
+  signs *= sign(ulVec2);
+
+  iRes += signs;
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i16> [[svec2]], zeroinitializer
+  // CHECK: [[bvec1:%.*]] = icmp ne <[[NUM]] x i16> [[svec1]], zeroinitializer
+  // CHECK: or <[[NUM]] x i1> [[bvec2]], [[bvec1]]
+  sRes += or(sVec1, sVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i16> [[svec3]], zeroinitializer
+  // CHECK: and <[[NUM]] x i1> [[bvec3]], [[bvec2]]
+  sRes += and(sVec2, sVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: select <[[NUM]] x i1> [[bvec1]], <[[NUM]] x i16> [[svec2]], <[[NUM]] x i16> [[svec3]]
+  sRes += select(sVec1, sVec2, sVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  buf.Store<vector<float16_t, NUM> >(0, hRes);
+  buf.Store<vector<float, NUM> >(2048, fRes);
+  buf.Store<vector<double, NUM> >(4096, dRes);
+
+  ibuf.Store<vector<int16_t, NUM> >(0, sRes);
+  ibuf.Store<vector<uint16_t, NUM> >(1024, usRes);
+  ibuf.Store<vector<int, NUM> >(2048, iRes);
+  ibuf.Store<vector<uint, NUM> >(3072, uiRes);
+  ibuf.Store<vector<int64_t, NUM> >(4096, lRes);
+  ibuf.Store<vector<uint64_t, NUM> >(5120, ulRes);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl
new file mode 100644
index 0000000000..12955c87f9
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl
@@ -0,0 +1,464 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=2 %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=5 %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=3 %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=9 %s | FileCheck %s
+
+// Test relevant operators on an assortment bool vector sizes with 6.9 native vectors.
+// Bools have a different representation in memory and a smaller set of interesting ops.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// Uses non vector buffer to avoid interacting with that implementation.
+// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z_0-9]*]]
+RWStructuredBuffer< bool > buf;
+
+groupshared vector<bool, NUM> gs_vec1, gs_vec2;
+groupshared vector<bool, NUM+1> gs_vec3;
+
+
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout vector<bool, NUM> things[10], bool scales[10]) {
+
+  // Another trick to capture the size.
+  // CHECK: [[res:%[0-9]*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{[^,]*}}, i32 [[NUM:[0-9]*]]
+  // CHECK: [[scl:%[0-9]*]] = extractvalue %dx.types.ResRet.i32 [[res]], 0
+  // CHECK: [[bscl:%[0-9]*]] = icmp ne i32 [[scl]], 0
+  bool scalar = buf.Load(NUM);
+
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add9]]
+  // CHECK: [[bvec9:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec9]], zeroinitializer
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec9]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  things[0] = things[9];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i1> undef, i1 [[bscl]], i32 0
+  // CHECK: [[res:%[0-9]*]] = shufflevector <[[NUM]] x i1> [[spt]], <[[NUM]] x i1> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[res]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]]
+  things[5] = scalar;
+
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export vector<bool, NUM> arithmetic(inout vector<bool, NUM> things[10])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]]
+
+  // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[svec0:%[0-9]*]] = sext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32>
+  // CHECK: [[bsvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[svec0]], zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bsvec0]] to <[[NUM]] x i32>
+  res[0] = -things[0];
+
+  // CHECK: [[vec0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32>
+  // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32>
+  res[1] = +things[0];
+
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[vec1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec1]] to <[[NUM]] x i32>
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32>
+  // CHECK: [[res2:%[0-9]*]] = add nuw nsw <[[NUM]] x i32> [[vec2]], [[vec1]]
+  // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer
+  // CHECK: [[res2:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32>
+  // CHECK: [[res3:%[0-9]*]] = sub nsw <[[NUM]] x i32> [[vec2]], [[vec3]]
+  // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer
+  // CHECK: [[res3:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32>
+  // CHECK: [[res4:%[0-9]*]] = mul nuw nsw <[[NUM]] x i32> [[vec4]], [[vec3]]
+  // CHECK: [[bres4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res4]], zeroinitializer
+  // CHECK: [[res4:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32>
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32>
+  // CHECK: [[res5:%[0-9]*]] = sdiv <[[NUM]] x i32> [[vec4]], [[vec5]]
+  // CHECK: [[bres5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res5]], zeroinitializer
+  // CHECK: [[res5:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32>
+  res[5] = things[4] / things[5];
+
+  // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer
+  // CHECK: [[vec6:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec6]] to <[[NUM]] x i32>
+  // CHECK: [[res6:%[0-9]*]] = {{[ufs]?rem( fast)?}} <[[NUM]] x i32> [[vec5]], [[vec6]]
+  res[6] = things[5] % things[6];
+
+  // Stores into res[]. Previous were for things[] inout.
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2
+  // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3
+  // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]]
+  // CHECK: ret void
+
+
+  return res;
+}
+
+// Test arithmetic operators with scalars.
+// CHECK-LABEL: define void @"\01?scarithmetic
+export vector<bool, NUM> scarithmetic(inout vector<bool, NUM> things[10], bool scales[10])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]]
+
+  // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[vec0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32>
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 0
+  // CHECK: [[scl0:%[0-9]*]] = load i32, i32* [[add0]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl0]], i32 0
+  // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = add <[[NUM]] x i32> [[spt0]], [[vec0]]
+  // CHECK: [[bres0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res0]], zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bres0]] to <[[NUM]] x i32>
+  res[0] = things[0] + scales[0];
+
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[vec1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec1]] to <[[NUM]] x i32>
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 1
+  // CHECK: [[scl1:%[0-9]*]] = load i32, i32* [[add1]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl1]], i32 0
+  // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res1:%[0-9]*]] = sub <[[NUM]] x i32> [[vec1]], [[spt1]]
+  // CHECK: [[bres1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res1]], zeroinitializer
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  res[1] = things[1] - scales[1];
+
+
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32>
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 2
+  // CHECK: [[scl2:%[0-9]*]] = load i32, i32* [[add2]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl2]], i32 0
+  // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res2:%[0-9]*]] = mul nuw <[[NUM]] x i32> [[spt2]], [[vec2]]
+  // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  res[2] = things[2] * scales[2];
+
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32>
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 3
+  // CHECK: [[scl3:%[0-9]*]] = load i32, i32* [[add3]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl3]], i32 0
+  // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = sdiv <[[NUM]] x i32> [[vec3]], [[spt3]]
+  // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  res[3] = things[3] / scales[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 4
+  // CHECK: [[scl4:%[0-9]*]] = load i32, i32* [[add4]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl4]], i32 0
+  // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32>
+  // CHECK: [[res4:%[0-9]*]] = add <[[NUM]] x i32> [[spt4]], [[vec4]]
+  // CHECK: [[bres4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res4]], zeroinitializer
+  // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32>
+  res[4] = scales[4] + things[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 5
+  // CHECK: [[scl5:%[0-9]*]] = load i32, i32* [[add5]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl5]], i32 0
+  // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32>
+  // CHECK: [[res5:%[0-9]*]] = sub <[[NUM]] x i32> [[spt5]], [[vec5]]
+  // CHECK: [[bres5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res5]], zeroinitializer
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32>
+  res[5] = scales[5] - things[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 6
+  // CHECK: [[scl6:%[0-9]*]] = load i32, i32* [[add6]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl6]], i32 0
+  // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer
+  // CHECK: [[vec6:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec6]] to <[[NUM]] x i32>
+  // CHECK: [[res6:%[0-9]*]] = mul nuw <[[NUM]] x i32> [[spt6]], [[vec6]]
+  // CHECK: [[bres6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res6]], zeroinitializer
+  // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32>
+  res[6] = scales[6] * things[6];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2
+  // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3
+  // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6
+  // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]]
+  // CHECK: ret void
+
+
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<bool, NUM> consequences[10])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32>
+  res[0] = !truth[0];
+
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]]
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]]
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // MORE STUFF
+
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[bres4:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[bvec0]], [[bvec1]]
+  // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32>
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[bres5:%[0-9]*]] = icmp {{u?}}ne <[[NUM]] x i1> [[bvec1]], [[bvec2]]
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32>
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[bres6:%[0-9]*]] = icmp {{[osu]?}}lt <[[NUM]] x i1> [[bvec2]], [[bvec3]]
+  // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32>
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[bres7:%[0-9]*]] = icmp {{[osu]]?}}gt <[[NUM]] x i1> [[bvec3]], [[bvec4]]
+  // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[bres7]] to <[[NUM]] x i32>
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[bres8:%[0-9]*]] = icmp {{[osu]]?}}le <[[NUM]] x i1> [[bvec4]], [[bvec5]]
+  // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[bres8]] to <[[NUM]] x i32>
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]]
+  // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer
+  // CHECK: [[bres9:%[0-9]*]] = icmp {{[osu]?}}ge <[[NUM]] x i1> [[bvec5]], [[bvec6]]
+  // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[bres9]] to <[[NUM]] x i32>
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6
+  // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]]
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 7
+  // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[add7]]
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 8
+  // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[add8]]
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 9
+  // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[add9]]
+  // CHECK: ret void
+
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export vector<bool, NUM> index(vector<bool, NUM> things[10], int i, bool val)[10] {
+  vector<bool, NUM> res[10];
+
+  // CHECK: [[res:%[0-9]*]] = alloca [10 x <[[NUM]] x i32>]
+  // CHECK: [[res0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> zeroinitializer, <[[NUM]] x i32>* [[res0]]
+  res[0] = 0;
+
+  // CHECK: [[resi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 %i
+  // CHECK: store <[[NUM]] x i32> <i32 1{{.*}}>, <[[NUM]] x i32>* [[resi]]
+  res[i] = 1;
+
+  // CHECK: [[res2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 2
+  // CHECK: store <[[NUM]] x i32> <i32 1{{.*}}>, <[[NUM]] x i32>* [[res2]]
+  res[Ix] = true;
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0
+  // CHECK: [[thg0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[bthg0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thg0]], zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 3
+  // CHECK: [[thg0:%[0-9]*]] = zext <[[NUM]] x i1> [[bthg0]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[thg0]], <[[NUM]] x i32>* [[res3]]
+  res[3] = things[0];
+
+  // CHECK: [[addi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 %i
+  // CHECK: [[thgi:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[addi]]
+  // CHECK: [[bthgi:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thgi]], zeroinitializer
+  // CHECK: [[res4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 4
+  // CHECK: [[thgi:%[0-9]*]] = zext <[[NUM]] x i1> [[bthgi]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[thgi]], <[[NUM]] x i32>* [[res4]]
+  res[4] = things[i];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2
+  // CHECK: [[thg2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[bthg2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thg2]], zeroinitializer
+  // CHECK: [[res5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 5
+  // CHECK: [[thg2:%[0-9]*]] = zext <[[NUM]] x i1> [[bthg2]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[thg2]], <[[NUM]] x i32>* [[res5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+
+}
+
+// Test bit twiddling operators.
+// CHECK-LABEL: define void @"\01?bittwiddlers
+export void bittwiddlers(inout vector<bool, NUM> things[10]) {
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32>
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32>
+  // CHECK: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]]
+  // CHECK: [[bres1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res1]], zeroinitializer
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]]
+  things[1] = things[2] | things[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec4]], [[bvec3]]
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]]
+  things[2] = things[3] & things[4];
+
+  // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32>
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32>
+  // CHECK: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]]
+  things[3] = things[4] ^ things[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer
+  // CHECK: [[bres4:%[0-9]*]] = or <[[NUM]] x i1> [[bvec6]], [[bvec4]]
+  // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]]
+  things[4] |= things[6];
+
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]]
+  // CHECK: [[bvec7:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec7]], zeroinitializer
+  // CHECK: [[bres5:%[0-9]*]] = and <[[NUM]] x i1> [[bvec7]], [[bvec5]]
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]]
+  things[5] &= things[7];
+
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]]
+  // CHECK: [[bvec8:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec8]], zeroinitializer
+  // CHECK: [[bres6:%[0-9]*]] = xor <[[NUM]] x i1> [[bvec6]], [[bvec8]]
+  // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]]
+  things[6] ^= things[8];
+
+  // CHECK: ret void
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl
new file mode 100644
index 0000000000..0a115bd709
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-cs.hlsl
@@ -0,0 +1,719 @@
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float    -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float    -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int      -DNUM=2 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint     -DNUM=5 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=double   -DNUM=3 -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL,NOINT
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DNUM=9 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float16_t -DNUM=17 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int16_t   -DNUM=33 -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+
+// Linking tests.
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=6 -Fo %t.1 %s
+// RUN: %dxl -T cs_6_9 %t.1 | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DNUM=3 -DDBL -Fo %t.2 %s
+// RUN: %dxl -T cs_6_9 %t.2 | FileCheck %s --check-prefixes=CHECK,DBL,NOINT
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t -DNUM=12 -DINT -enable-16bit-types -Fo %t.3 %s
+// RUN: %dxl -T cs_6_9 %t.3 | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+
+// Test relevant operators on an assortment vector sizes and types with 6.9 native vectors.
+// Tests in a CS environment where vector operations were previously disallowed to confirm that they are retained.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// Uses non vector buffer to avoid interacting with that implementation.
+// CHECK-DAG: %dx.types.ResRet.[[TY:v[0-9]*[a-z][0-9]*]] = type { <[[NUM:[0-9]*]] x [[TYPE:[a-z_0-9]*]]>
+// CHECK-DAG: %dx.types.ResRet.[[STY:[a-z][0-9]*]] = type { [[STYPE:[a-z0-9_]*]]
+// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32>
+
+void assignments(inout vector<TYPE, NUM> things[11], TYPE scales[10]);
+vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11];
+vector<TYPE, NUM> scarithmetic(vector<TYPE, NUM> things[11], TYPE scales[10])[11];
+vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> consequences[11])[10];
+vector<TYPE, NUM> index(vector<TYPE, NUM> things[11], int i)[11];
+void bittwiddlers(inout vector<TYPE, NUM> things[13]);
+
+struct Viface {
+  vector<TYPE, NUM> values[11];
+};
+
+struct Siface {
+  TYPE values[10];
+};
+
+struct Liface {
+  vector<bool, NUM> values[10];
+};
+
+struct Binface {
+  vector<TYPE, NUM> values[13];
+};
+
+RWStructuredBuffer<Viface> Input : register(u11);
+RWStructuredBuffer<Viface> Output : register(u12);
+RWStructuredBuffer<Siface> Scales : register(u13);
+RWStructuredBuffer<Liface> Truths : register(u14);
+RWStructuredBuffer<Binface> Bits : register(u15);
+RWStructuredBuffer<vector<uint,13> > Offsets : register(u16);
+
+[shader("compute")]
+[numthreads(8,1,1)]
+// CHECK-LABEL: define void @main
+void main(uint3 GID : SV_GroupThreadID) {
+
+  // CHECK-DAG: [[Input:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 11, i32 11, i32 0, i8 1 }, i32 11
+  // CHECK-DAG: [[Output:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 12, i32 12, i32 0, i8 1 }, i32 12
+  // CHECK-DAG: [[Scales:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 13, i32 13, i32 0, i8 1 }, i32 13
+  // CHECK-DAG: [[Truths:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 14, i32 14, i32 0, i8 1 }, i32 14
+  // INT-DAG: [[Bits:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 15, i32 15, i32 0, i8 1 }, i32 15
+
+  // CHECK: [[InIx1:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0)
+  // CHECK: [[InIx2:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1)
+  // CHECK: [[OutIx:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 2)
+  // CHECK: [[scratch1:%.*]] = alloca [11 x <[[NUM]] x [[TYPE]]>]
+  // CHECK: [[scratch2:%.*]] = alloca [11 x <[[NUM]] x [[TYPE]]>]
+
+  uint InIx1 = GID[0];
+  uint InIx2 = GID[1];
+  uint OutIx = GID[2];
+
+  // Assign vector offsets to capture the expected values.
+  // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 0, i32 0, <13 x i32> <i32 [[OFF0:[0-9]*]], i32 [[OFF1:[0-9]*]], i32 [[OFF2:[0-9]*]], i32 [[OFF3:[0-9]*]], i32 [[OFF4:[0-9]*]], i32 [[OFF5:[0-9]*]], i32 [[OFF6:[0-9]*]], i32 [[OFF7:[0-9]*]], i32 [[OFF8:[0-9]*]], i32 [[OFF9:[0-9]*]], i32 [[OFF10:[0-9]*]], i32 [[OFF11:[0-9]*]], i32 [[OFF12:[0-9]*]]>
+  Offsets[0] = vector<uint,13>(sizeof(vector<TYPE, NUM>)*0,
+                               sizeof(vector<TYPE, NUM>)*1,
+                               sizeof(vector<TYPE, NUM>)*2,
+                               sizeof(vector<TYPE, NUM>)*3,
+                               sizeof(vector<TYPE, NUM>)*4,
+                               sizeof(vector<TYPE, NUM>)*5,
+                               sizeof(vector<TYPE, NUM>)*6,
+                               sizeof(vector<TYPE, NUM>)*7,
+                               sizeof(vector<TYPE, NUM>)*8,
+                               sizeof(vector<TYPE, NUM>)*9,
+                               sizeof(vector<TYPE, NUM>)*10,
+                               sizeof(vector<TYPE, NUM>)*11,
+                               sizeof(vector<TYPE, NUM>)*12);
+
+  // Assign scalar offsets to capture the expected values.
+  // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 1, i32 0, <13 x i32> <i32 [[SOFF0:[0-9]*]], i32 [[SOFF1:[0-9]*]], i32 [[SOFF2:[0-9]*]], i32 [[SOFF3:[0-9]*]], i32 [[SOFF4:[0-9]*]], i32 [[SOFF5:[0-9]*]], i32 [[SOFF6:[0-9]*]], i32 [[SOFF7:[0-9]*]], i32 [[SOFF8:[0-9]*]], i32 [[SOFF9:[0-9]*]], i32 [[SOFF10:[0-9]*]], i32 [[ALN:[0-9]*]], i32 [[IALN:[0-9]*]]>
+  Offsets[1] = vector<uint,13>(sizeof(TYPE)*0,
+                               sizeof(TYPE)*1,
+                               sizeof(TYPE)*2,
+                               sizeof(TYPE)*3,
+                               sizeof(TYPE)*4,
+                               sizeof(TYPE)*5,
+                               sizeof(TYPE)*6,
+                               sizeof(TYPE)*7,
+                               sizeof(TYPE)*8,
+                               sizeof(TYPE)*9,
+                               sizeof(TYPE)*10,
+                               sizeof(TYPE),// Effectively alignof.
+                               sizeof(int));// Effectively integer alignof.
+
+  // Assign boolean offsets to capture the expected values.
+  // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 2, i32 0, <13 x i32> <i32 [[BOFF0:[0-9]*]], i32 [[BOFF1:[0-9]*]], i32 [[BOFF2:[0-9]*]], i32 [[BOFF3:[0-9]*]], i32 [[BOFF4:[0-9]*]], i32 [[BOFF5:[0-9]*]], i32 [[BOFF6:[0-9]*]], i32 [[BOFF7:[0-9]*]], i32 [[BOFF8:[0-9]*]], i32 [[BOFF9:[0-9]*]], i32 [[BOFF10:[0-9]*]], i32 [[BOFF11:[0-9]*]], i32 [[BOFF12:[0-9]*]]>
+  Offsets[2] = vector<uint,13>(sizeof(vector<int,NUM>)*0,
+                               sizeof(vector<int,NUM>)*1,
+                               sizeof(vector<int,NUM>)*2,
+                               sizeof(vector<int,NUM>)*3,
+                               sizeof(vector<int,NUM>)*4,
+                               sizeof(vector<int,NUM>)*5,
+                               sizeof(vector<int,NUM>)*6,
+                               sizeof(vector<int,NUM>)*7,
+                               sizeof(vector<int,NUM>)*8,
+                               sizeof(vector<int,NUM>)*9,
+                               sizeof(vector<int,NUM>)*10,
+                               sizeof(vector<int,NUM>)*11,
+                               sizeof(vector<int,NUM>)*12);
+
+  assignments(Input[InIx1+1].values, Scales[InIx2+1].values);
+  Output[OutIx+2].values = arithmetic(Input[InIx1+2].values);
+  Output[OutIx+3].values = scarithmetic(Input[InIx1+3].values, Scales[InIx2+3].values);
+  Truths[OutIx+4].values = logic(Truths[InIx2+4].values, Input[InIx1+4].values);
+  Output[OutIx+5].values = index(Input[InIx1+5].values, InIx2+5);
+#ifdef INT
+  bittwiddlers(Bits[InIx1+6].values);
+#endif
+}
+
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+void assignments(inout vector<TYPE, NUM> things[11], TYPE scales[10]) {
+
+  // CHECK: [[VcIx:%.*]] = add i32 [[InIx1]], 1
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], i32 [[ALN]])
+  // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], i32 [[ALN]])
+  // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], i32 [[ALN]])
+  // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], i32 [[ALN]])
+  // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], i32 [[ALN]])
+  // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], i32 [[ALN]])
+  // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], i32 [[ALN]])
+  // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], i32 [[ALN]])
+  // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], i32 [[ALN]])
+  // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // CHECK: [[ScIx:%.*]] = add i32 [[InIx2]], 1
+  // CHECK: [[ScHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[SOFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0
+  // CHECK: [[res0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  things[0] = scales[0];
+
+  // CHECK: [[res1:%[0-9]*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec1]]
+  things[1] += things[5];
+
+  // CHECK: [[res2:%[0-9]*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]]
+  things[2] -= things[6];
+
+  // CHECK: [[res3:%[0-9]*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec7]], [[vec3]]
+  things[3] *= things[7];
+
+  // CHECK: [[res4:%[0-9]*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]]
+  things[4] /= things[8];
+
+#ifdef DBL
+  // DBL can't use remainder operator, do something anyway to keep the rest consistent.
+  // DBL: [[fvec9:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec9]] to <[[NUM]] x float>
+  // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float>
+  // DBL: [[fres5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x float> [[fvec5]], [[fvec9]]
+  // DBL: [[res5:%[0-9]*]] = fpext <[[NUM]] x float> [[fres5]] to <[[NUM]] x double>
+  vector<float,NUM> f9 = (vector<float,NUM>)things[9];
+  vector<float,NUM> f5 = (vector<float,NUM>)things[5];
+  f5 %= f9;
+  things[5] = f5;
+#else
+  // NODBL: [[res5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]]
+  things[5] %= things[9];
+#endif
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0
+  // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res6:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt1]], [[vec6]]
+  things[6] += scales[1];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0
+  // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res7:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec7]], [[spt2]]
+  things[7] -= scales[2];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0
+  // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res8:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt3]], [[vec8]]
+  things[8] *= scales[3];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0
+  // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res9:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec9]], [[spt4]]
+  things[9] /= scales[4];
+
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]])
+
+}
+
+// Test arithmetic operators.
+vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11] {
+  vector<TYPE, NUM> res[11];
+
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 2
+  // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
+  // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 2
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]])
+  // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]])
+  // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]])
+  // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]])
+  // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]])
+  // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]])
+  // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]])
+  // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], i32 [[ALN]])
+  // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], i32 [[ALN]])
+  // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], i32 [[ALN]])
+  // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], i32 [[ALN]])
+  // CHECK: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // NOINT: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> <[[TYPE]] {{-?(0|0\.0*e\+0*|0xH8000),.*}}>, [[vec0]]
+  // INT: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> zeroinitializer, [[vec0]]
+  res[0] = -things[0];
+  res[1] = +things[0];
+
+  // CHECK: [[res2:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec1]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[res3:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[res4:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[res5:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  res[5] = things[4] / things[5];
+
+  // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float>
+#ifdef DBL
+  // DBL can't use remainder operator, do something anyway to keep the rest consistent.
+  // DBL: [[fvec6:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec6]] to <[[NUM]] x float>
+  // DBL: [[fres6:%[0-9]*]] = [[REM]] <[[NUM]] x float> [[fvec5]], [[fvec6]]
+  // DBL: [[res6:%[0-9]*]] = fpext <[[NUM]] x float> [[fres6]] to <[[NUM]] x double>
+  res[6] = (vector<float,NUM>)things[5] % (vector<float,NUM>)things[6];
+#else
+  // NODBL: [[res6:%[0-9]*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[res7:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]], <[[TYPE]] [[POS1:(1|1\.0*e\+0*|0xH3C00)]]
+  res[7] = things[7]++;
+
+  // CHECK: [[res8:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]], <[[TYPE]] [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]]
+  res[8] = things[8]--;
+
+  // CHECK: [[res9:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]], <[[TYPE]] [[POS1]]
+  res[9] = ++things[9];
+
+  // CHECK: [[res10:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]], <[[TYPE]] [[NEG1]]
+  res[10] = --things[10];
+
+  // Things[] input gets all the result values since pre/post inc/decrements don't change the end result.
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]])
+
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]])
+  // res1 is just vec0 since it was just the unary + operator.
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[vec0]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]])
+  // res[] input gets either the original or the preincremented value.
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[vec7]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[vec8]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]])
+
+  return res;
+}
+
+// Test arithmetic operators with scalars.
+vector<TYPE, NUM> scarithmetic(vector<TYPE, NUM> things[11], TYPE scales[10])[11] {
+  vector<TYPE, NUM> res[11];
+
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 3
+  // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
+  // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 3
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]])
+  // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]])
+  // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]])
+  // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]])
+  // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]])
+  // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]])
+  // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]])
+  // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // CHECK: [[SclIx:%.*]] = add i32 [[InIx2]], 3
+  // CHECK: [[SclHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl5:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferLoad.[[STY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[SOFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl6:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0
+  // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt0]], [[vec0]]
+  res[0] = things[0] + scales[0];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0
+  // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res1:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec1]], [[spt1]]
+  res[1] = things[1] - scales[1];
+
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0
+  // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res2:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt2]], [[vec2]]
+  res[2] = things[2] * scales[2];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0
+  // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec3]], [[spt3]]
+  res[3] = things[3] / scales[3];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0
+  // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res4:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt4]], [[vec4]]
+  res[4] = scales[4] + things[4];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl5]], i32 0
+  // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res5:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[spt5]], [[vec5]]
+  res[5] = scales[5] - things[5];
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl6]], i32 0
+  // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res6:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt6]], [[vec6]]
+  res[6] = scales[6] * things[6];
+  res[7] = res[8] = res[9] = res[10] = 0;
+
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]])
+
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> consequences[11])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 4
+  // CHECK: [[TruHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Truths]]
+  // CHECK: [[TruIx:%.*]] = add i32 [[InIx2]], 4
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF0]], i32 [[IALN]])
+  // CHECK: [[ivec0:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF1]], i32 [[IALN]])
+  // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF2]], i32 [[IALN]])
+  // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF3]], i32 [[IALN]])
+  // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF4]], i32 [[IALN]])
+  // CHECK: [[ivec4:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF5]], i32 [[IALN]])
+  // CHECK: [[ivec5:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+
+  // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 4
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]])
+  // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]])
+  // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]])
+  // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]])
+  // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]])
+  // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]])
+  // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]])
+  // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+
+  // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec0]], zeroinitializer
+  // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32>
+  res[0] = !truth[0];
+
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec1]], zeroinitializer
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec2]], zeroinitializer
+  // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]]
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec3]], zeroinitializer
+  // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]]
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec4]], zeroinitializer
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[ivec5]], zeroinitializer
+  // CHECK: [[bres3:%[0-9]*]] = select <[[NUM]] x i1> [[bvec3]], <[[NUM]] x i1> [[bvec4]], <[[NUM]] x i1> [[bvec5]]
+  // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[cmp4:%[0-9]*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]]
+  // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp4]] to <[[NUM]] x i32>
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[cmp5:%[0-9]*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]]
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp5]] to <[[NUM]] x i32>
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[cmp6:%[0-9]*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp6]] to <[[NUM]] x i32>
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[cmp7:%[0-9]*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]]
+  // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp7]] to <[[NUM]] x i32>
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[cmp8:%[0-9]*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp8]] to <[[NUM]] x i32>
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[cmp9:%[0-9]*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp9]] to <[[NUM]] x i32>
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF0]], <[[NUM]] x i32> [[res0]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF1]], <[[NUM]] x i32> [[res1]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF2]], <[[NUM]] x i32> [[res2]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF3]], <[[NUM]] x i32> [[res3]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF4]], <[[NUM]] x i32> [[res4]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF5]], <[[NUM]] x i32> [[res5]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF6]], <[[NUM]] x i32> [[res6]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF7]], <[[NUM]] x i32> [[res7]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF8]], <[[NUM]] x i32> [[res8]], i32 4)
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[ITY]](i32 304, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF9]], <[[NUM]] x i32> [[res9]], i32 4)
+
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+vector<TYPE, NUM> index(vector<TYPE, NUM> things[11], int i)[11] {
+  vector<TYPE, NUM> res[11];
+
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 5
+  // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
+  // CHECK: [[VecIx:%.*]] = add i32 [[InIx1]], 5
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF0]], i32 [[ALN]])
+  // CHECK: [[vec0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 1
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF1]], i32 [[ALN]])
+  // CHECK: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec1]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 2
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF2]], i32 [[ALN]])
+  // CHECK: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec2]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 3
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF3]], i32 [[ALN]])
+  // CHECK: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec3]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 4
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF4]], i32 [[ALN]])
+  // CHECK: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec4]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 5
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF5]], i32 [[ALN]])
+  // CHECK: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec5]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 6
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF6]], i32 [[ALN]])
+  // CHECK: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec6]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 7
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF7]], i32 [[ALN]])
+  // CHECK: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 8
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF8]], i32 [[ALN]])
+  // CHECK: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 9
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF9]], i32 [[ALN]])
+  // CHECK: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec9]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 10
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VecIx]], i32 [[OFF10]], i32 [[ALN]])
+  // CHECK: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec10]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+
+  // CHECK: [[Ix:%.*]] = add i32 [[InIx2]], 5
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[adr0]], align [[ALN]]
+  res[0] = 0;
+
+
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 [[Ix]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] [[POS1]],{{[^>]*}}>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  res[i] = 1;
+
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] [[TWO:(2|2\.?0*e?\+?0*|0xH4000)]],{{[^>]*}}>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  res[3] = things[0];
+
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch2]], i32 0, i32 [[Ix]]
+  // CHECK: [[ldix:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[ldix]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  res[4] = things[i];
+
+
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec2]], <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  res[5] = things[Ix];
+
+  // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 0, <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 1
+  // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> <[[TYPE]] [[TWO]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[vec0]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[ldix]], i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[vec2]], i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 6
+  // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 7
+  // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 8
+  // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 9
+  // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* [[scratch1]], i32 0, i32 10
+  // CHECK: [[ld:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[ld]], i32 [[ALN]])
+
+  return res;
+}
+
+#ifdef INT
+// Test bit twiddling operators.
+void bittwiddlers(inout vector<TYPE, NUM> things[13]) {
+  // INT: [[VcIx:%.*]] = add i32 [[InIx1]], 6
+  // INT: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Bits]]
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], i32 [[ALN]])
+  // INT: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], i32 [[ALN]])
+  // INT: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], i32 [[ALN]])
+  // INT: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], i32 [[ALN]])
+  // INT: [[vec4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], i32 [[ALN]])
+  // INT: [[vec5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], i32 [[ALN]])
+  // INT: [[vec6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], i32 [[ALN]])
+  // INT: [[vec7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], i32 [[ALN]])
+  // INT: [[vec8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], i32 [[ALN]])
+  // INT: [[vec9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF10]], i32 [[ALN]])
+  // INT: [[vec10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF11]], i32 [[ALN]])
+  // INT: [[vec11:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF12]], i32 [[ALN]])
+  // INT: [[vec12:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // INT: [[res0:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec1]], <[[TYPE]] -1
+  things[0] = ~things[1];
+
+  // INT: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]]
+  things[1] = things[2] | things[3];
+
+  // INT: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]]
+  things[2] = things[3] & things[4];
+
+  // INT: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  things[3] = things[4] ^ things[5];
+
+  // INT: [[shv6:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec6]]
+  // INT: [[res4:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[vec5]], [[shv6]]
+  things[4] = things[5] << things[6];
+
+  // INT: [[shv7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec7]]
+  // UNSIG: [[res5:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]]
+  // SIG: [[res5:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]]
+  things[5] = things[6] >> things[7];
+
+  // INT: [[res6:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec8]], [[vec6]]
+  things[6] |= things[8];
+
+  // INT: [[res7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec9]], [[vec7]]
+  things[7] &= things[9];
+
+  // INT: [[res8:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec8]], [[vec10]]
+  things[8] ^= things[10];
+
+  // INT: [[shv11:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec11]]
+  // INT: [[res9:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[vec9]], [[shv11]]
+  things[9] <<= things[11];
+
+  // INT: [[shv12:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec12]]
+  // UNSIG: [[res10:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[vec10]], [[shv12]]
+  // SIG: [[res10:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec10]], [[shv12]]
+  things[10] >>= things[12];
+
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF0]], <[[NUM]] x [[TYPE]]> [[res0]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF1]], <[[NUM]] x [[TYPE]]> [[res1]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF2]], <[[NUM]] x [[TYPE]]> [[res2]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF3]], <[[NUM]] x [[TYPE]]> [[res3]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF4]], <[[NUM]] x [[TYPE]]> [[res4]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF5]], <[[NUM]] x [[TYPE]]> [[res5]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF6]], <[[NUM]] x [[TYPE]]> [[res6]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF7]], <[[NUM]] x [[TYPE]]> [[res7]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF8]], <[[NUM]] x [[TYPE]]> [[res8]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF9]], <[[NUM]] x [[TYPE]]> [[res9]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF10]], <[[NUM]] x [[TYPE]]> [[res10]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF11]], <[[NUM]] x [[TYPE]]> [[vec11]], i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferVectorStore.[[TY]](i32 304, %dx.types.Handle [[InHdl]], i32 [[VcIx]], i32 [[OFF12]], <[[NUM]] x [[TYPE]]> [[vec12]], i32 [[ALN]])
+
+  // CHECK-LABEL: ret void
+}
+#endif // INT
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl
new file mode 100644
index 0000000000..b749a3b255
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl
@@ -0,0 +1,73 @@
+// RUN: %dxc -T lib_6_9   -DTYPE=uint     -DNUM=5 %s | FileCheck %s --check-prefixes=CHECK,UNSIG
+// RUN: %dxc -T lib_6_9   -DTYPE=int64_t  -DNUM=3 %s | FileCheck %s --check-prefixes=CHECK,SIG
+// RUN: %dxc -T lib_6_9   -DTYPE=uint16_t -DNUM=9 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,UNSIG
+
+// Test bitwise operators on an assortment vector sizes and integer types with 6.9 native vectors.
+
+// Test bit twiddling operators.
+// CHECK-LABEL: define void @"\01?bittwiddlers
+// CHECK-SAME: ([11 x <[[NUM:[0-9][0-9]*]] x [[TYPE:[a-z0-9]*]]>]*
+export void bittwiddlers(inout vector<TYPE, NUM> things[11]) {
+  // CHECK: [[adr1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // CHECK: [[res1:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec1]], <[[TYPE]] -1,
+  // CHECK: [[adr0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr0]]
+  things[0] = ~things[1];
+
+  // CHECK: [[adr2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+
+  // CHECK: [[adr3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr1]]
+  things[1] = things[2] | things[3];
+
+  // CHECK: [[adr4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]]
+  things[2] = things[3] & things[4];
+
+  // CHECK: [[adr5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // CHECK: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]]
+  things[3] = things[4] ^ things[5];
+
+  // CHECK: [[adr6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]]
+  // CHECK: [[shv6:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec6]], <[[TYPE]]
+  // CHECK: [[res4:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[vec5]], [[shv6]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]]
+  things[4] = things[5] << things[6];
+
+  // CHECK: [[adr7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]]
+  // CHECK: [[shv7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec7]], <[[TYPE]]
+  // UNSIG: [[res5:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]]
+  // SIG: [[res5:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[vec6]], [[shv7]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]]
+  things[5] = things[6] >> things[7];
+
+  // CHECK: [[adr8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]]
+  // CHECK: [[res6:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec8]], [[vec6]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[adr6]]
+  things[6] |= things[8];
+
+  // CHECK: [[adr9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]]
+  // CHECK: [[res7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec9]], [[vec7]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[adr7]]
+  things[7] &= things[9];
+
+  // CHECK: [[adr10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10
+  // CHECK: [[vec10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr10]]
+  // CHECK: [[res8:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec8]], [[vec10]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[adr8]]
+  things[8] ^= things[10];
+
+  // CHECK: ret void
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl
new file mode 100644
index 0000000000..8b12b96c80
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-scalars.hlsl
@@ -0,0 +1,342 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float  %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int       %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint      %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double    -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int64_t   %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t  %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t   -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t  -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+
+// Test relevant operators on an assortment bool vector sizes and types with 6.9 native vectors.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z0-9_]*]]
+RWStructuredBuffer<TYPE> buf;
+
+export void assignments(inout TYPE things[10], TYPE scales[10]);
+export TYPE arithmetic(inout TYPE things[11])[11];
+export bool logic(bool truth[10], TYPE consequences[10])[10];
+export TYPE index(TYPE things[10], int i, TYPE val)[10];
+
+struct Interface {
+  TYPE assigned[10];
+  TYPE arithmeticked[11];
+  bool logicked[10];
+  TYPE indexed[10];
+  TYPE scales[10];
+};
+
+#if 0
+// Requires vector loading support. Enable when available.
+RWStructuredBuffer<Interface> Input;
+RWStructuredBuffer<Interface> Output;
+
+TYPE g_val;
+
+[shader("compute")]
+[numthreads(8,1,1)]
+void main(uint GI : SV_GroupIndex) {
+  assignments(Output[GI].assigned, Input[GI].scales);
+  Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked);
+  Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned);
+  Output[GI].indexed = index(Input[GI].indexed, GI, g_val);
+}
+#endif
+
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout TYPE things[10]) {
+
+  // CHECK: [[buf:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle {{%.*}}, i32 1, i32 0, i8 1, i32 {{(8|4|2)}})
+  // CHECK: [[res0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[buf]], 0
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]]
+  things[0] = buf.Load(1);
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[val5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[val1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[res1:%.*]] = [[ADD:f?add( fast| nsw)?]] [[TYPE]] [[val1]], [[val5]]
+  // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  things[1] += things[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 6
+  // CHECK: [[val6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[val2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast| nsw)?]] [[TYPE]] [[val2]], [[val6]]
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  things[2] -= things[6];
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[val7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[val3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast| nsw)?]] [[TYPE]] [[val3]], [[val7]]
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  things[3] *= things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[val8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[val4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast| nsw)?]] [[TYPE]] [[val4]], [[val8]]
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  things[4] /= things[8];
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 9
+  // CHECK: [[val9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+#ifdef DBL
+  // DBL: [[fvec9:%.*]] = fptrunc double [[val9]] to float
+  // DBL: [[fvec5:%.*]] = fptrunc double [[val5]] to float
+  // DBL: [[fres5:%.*]] = [[REM:[ufs]?rem( fast| nsw)?]] float [[fvec5]], [[fvec9]]
+  // DBL: [[res5:%.*]] = fpext float [[fres5]] to double
+  float f9 = things[9];
+  float f5 = things[5];
+  f5 %= f9;
+  things[5] = f5;
+#else
+  // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast| nsw)?]] [[TYPE]] [[val5]], [[val9]]
+  things[5] %= things[9];
+#endif
+  // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export TYPE arithmetic(inout TYPE things[11])[11] {
+  TYPE res[11];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[res0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[res1:%.*]] = [[SUB]] [[TYPE]] {{-?(0|0\.0*e\+0*|0xH8000)}}, [[res0]]
+  res[0] = +things[0];
+  res[1] = -things[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[val1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[val2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[ADD]] [[TYPE]] [[val2]], [[val1]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[val3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[SUB]] [[TYPE]] [[val2]], [[val3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[val4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[MUL]] [[TYPE]] [[val4]], [[val3]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[val5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[res5:%.*]] = [[DIV]] [[TYPE]] [[val4]], [[val5]]
+  res[5] = things[4] / things[5];
+
+  // DBL: [[fvec5:%.*]] = fptrunc double [[val5]] to float
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6
+  // CHECK: [[val6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+#ifdef DBL
+  // DBL: [[fvec6:%.*]] = fptrunc double [[val6]] to float
+  // DBL: [[fres6:%.*]] = [[REM]] float [[fvec5]], [[fvec6]]
+  // DBL: [[res6:%.*]] = fpext float [[fres6]] to double
+  res[6] = (float)things[5] % (float)things[6];
+#else
+  // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[val5]], [[val6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[val7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[res7:%.*]] = [[ADD:f?add( fast| nsw)?]] [[TYPE]] [[val7]], {{(1|1\.?0*e?\+?0*|0xH3C00)}}
+  // CHECK: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]]
+  res[7] = things[7]++;
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[val8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[res8:%.*]] = [[ADD]] [[TYPE]] [[val8]]
+  // CHECK: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]]
+  res[8] = things[8]--;
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9
+  // CHECK: [[val9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // CHECK: [[res9:%.*]] = [[ADD]] [[TYPE]] [[val9]]
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  res[9] = ++things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10
+  // CHECK: [[val10:%.*]] = load [[TYPE]], [[TYPE]]* [[adr10]]
+  // CHECK: [[res10:%.*]] = [[ADD]] [[TYPE]] [[val10]]
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  res[10] = --things[10];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 0
+  // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 1
+  // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 2
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 3
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 4
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 5
+  // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 6
+  // CHECK: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 7
+  // This is a post op, so the original value goes into res[].
+  // CHECK: store [[TYPE]] [[val7]], [[TYPE]]* [[adr7]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 8
+  // This is a post op, so the original value goes into res[].
+  // CHECK: store [[TYPE]] [[val8]], [[TYPE]]* [[adr8]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 9
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 10
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  // CHECK: ret void
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export bool logic(bool truth[10], TYPE consequences[10])[10] {
+  bool res[10];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
+  // CHECK: [[val0:%.*]] = load i32, i32* [[adr0]]
+  // CHECK: [[res0:%.*]] = xor i32 [[val0]], 1
+  res[0] = !truth[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  // CHECK: [[val1:%.*]] = load i32, i32* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  // CHECK: [[val2:%.*]] = load i32, i32* [[adr2]]
+  // CHECK: [[res1:%.*]] = or i32 [[val2]], [[val1]]
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[bvec2:%.*]] = icmp ne i32 [[val2]], 0
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  // CHECK: [[val3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[bvec3:%.*]] = icmp ne i32 [[val3]], 0
+  // CHECK: [[bres2:%.*]] = and i1 [[bvec2]], [[bvec3]]
+  // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  // CHECK: [[val4:%.*]] = load i32, i32* [[adr4]]
+  // CHECK: [[bvec4:%.*]] = icmp ne i32 [[val4]], 0
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  // CHECK: [[val5:%.*]] = load i32, i32* [[adr5]]
+  // CHECK: [[bvec5:%.*]] = icmp ne i32 [[val5]], 0
+  // CHECK: [[bres3:%.*]] = select i1 [[bvec3]], i1 [[bvec4]], i1 [[bvec5]]
+  // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 0
+  // CHECK: [[val0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1
+  // CHECK: [[val1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast| nsw)?]] {{o?}}eq [[TYPE]] [[val0]], [[val1]]
+  // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2
+  // CHECK: [[val2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[val1]], [[val2]]
+  // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3
+  // CHECK: [[val3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[val2]], [[val3]]
+  // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4
+  // CHECK: [[val4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[val3]], [[val4]]
+  // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5
+  // CHECK: [[val5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[val4]], [[val5]]
+  // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 6
+  // CHECK: [[val6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[val5]], [[val6]]
+  // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 0
+  // CHECK: store i32 [[res0]], i32* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 1
+  // CHECK: store i32 [[res1]], i32* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 2
+  // CHECK: store i32 [[res2]], i32* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 3
+  // CHECK: store i32 [[res3]], i32* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 4
+  // CHECK: store i32 [[res4]], i32* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 5
+  // CHECK: store i32 [[res5]], i32* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 6
+  // CHECK: store i32 [[res6]], i32* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 7
+  // CHECK: store i32 [[res7]], i32* [[adr7]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 8
+  // CHECK: store i32 [[res8]], i32* [[adr8]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 9
+  // CHECK: store i32 [[res9]], i32* [[adr9]]
+
+  // CHECK: ret void
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export TYPE index(TYPE things[10], int i)[10] {
+  // CHECK: [[res:%.*]] = alloca [10 x [[TYPE]]]
+  TYPE res[10];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 0
+  // CHECK: store [[TYPE]] {{(0|0*\.?0*e?\+?0*|0xH0000)}}, [[TYPE]]* [[adr0]]
+  res[0] = 0;
+
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 %i
+  // CHECK: store [[TYPE]] {{(1|1\.?0*e?\+?0*|0xH3C00)}}, [[TYPE]]* [[adri]]
+  res[i] = 1;
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 2
+  // CHECK: store [[TYPE]] {{(2|2\.?0*e?\+?0*|0xH4000)}}, [[TYPE]]* [[adr2]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[thg0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 3
+  // CHECK: store [[TYPE]] [[thg0]], [[TYPE]]* [[adr3]]
+  res[3] = things[0];
+
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 %i
+  // CHECK: [[thgi:%.*]] = load [[TYPE]], [[TYPE]]* [[adri]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 4
+  // CHECK: store [[TYPE]] [[thgi]], [[TYPE]]* [[adr4]]
+  res[4] = things[i];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[thg2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 5
+  // CHECK: store [[TYPE]] [[thg2]], [[TYPE]]* [[adr5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl
new file mode 100644
index 0000000000..cb2fd5f781
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-shortcircuit.hlsl
@@ -0,0 +1,57 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T lib_6_9 %s | FileCheck %s --check-prefix=NOBR
+
+// Test that no short-circuiting takes place for logic ops with native vectors.
+// First run verifies that side effects result in stores.
+// Second runline just makes sure there are no branches nor phis at all.
+
+// NOBR-NOT: br i1
+// NOBR-NOT: = phi
+
+export int4 logic(inout bool4 truth[5], inout int4 consequences[4]) {
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load <4 x i32>, <4 x i32>* [[adr0]]
+  // CHECK: [[bvec0:%.*]] = icmp ne <4 x i32> [[vec0]], zeroinitializer
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <4 x i32>, <4 x i32>* [[adr1]]
+  // CHECK: [[add:%.*]] = add <4 x i32> [[vec1]], <i32 1, i32 1, i32 1, i32 1>
+  // CHECK: store <4 x i32> [[add]], <4 x i32>* [[adr1]]
+  // CHECK: [[bvec1:%.*]] = icmp ne <4 x i32> [[vec1]], zeroinitializer
+  // CHECK: [[bres3:%.*]] = or <4 x i1> [[bvec1]], [[bvec0]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[res3:%.*]] = zext <4 x i1> [[bres3]] to <4 x i32>
+  // CHECK: store <4 x i32> [[res3]], <4 x i32>* [[adr3]]
+  truth[3] = truth[0] || consequences[1]++;
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <4 x i32>, <4 x i32>* [[adr1]]
+  // CHECK: [[bvec1:%.*]] = icmp ne <4 x i32> [[vec1]], zeroinitializer
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load <4 x i32>, <4 x i32>* [[adr0]]
+  // CHECK: [[sub:%.*]] = add <4 x i32> [[vec0]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: store <4 x i32> [[sub]], <4 x i32>* [[adr0]]
+  // CHECK: [[bvec0:%.*]] = icmp ne <4 x i32> [[vec0]], zeroinitializer
+  // CHECK: [[bres4:%.*]] = and <4 x i1> [[bvec0]], [[bvec1]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 4
+  // CHECK: [[res4:%.*]] = zext <4 x i1> [[bres4]] to <4 x i32>
+  // CHECK: store <4 x i32> [[res4]], <4 x i32>* [[adr4]]
+  truth[4] = truth[1] && consequences[0]--;
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [5 x <4 x i32>], [5 x <4 x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <4 x i32>, <4 x i32>* [[adr2]]
+  // CHECK: [[bcond:%.*]] = icmp ne <4 x i32> [[vec2]], zeroinitializer
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <4 x i32>, <4 x i32>* [[adr2]]
+  // CHECK: [[add:%.*]] = add <4 x i32> %25, <i32 1, i32 1, i32 1, i32 1>
+  // CHECK: store <4 x i32> [[add]], <4 x i32>* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <4 x i32>, <4 x i32>* [[adr3]]
+  // CHECK: [[sub:%.*]] = add <4 x i32> [[vec3]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: store <4 x i32> [[sub]], <4 x i32>* [[adr3]]
+  // CHECK: [[res:%.*]] = select <4 x i1> [[bcond]], <4 x i32> [[vec2]], <4 x i32> [[vec3]]
+  int4 res = truth[2] ? consequences[2]++ : consequences[3]--;
+
+  // CHECK: ret <4 x i32> %30
+  return res;
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl
new file mode 100644
index 0000000000..ca239a5b22
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s-cs.hlsl
@@ -0,0 +1,680 @@
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float          %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int      -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=double   -DDBL %s | FileCheck %s
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=float16_t      -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T cs_6_9 -DTYPE=int16_t  -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+
+// Scalar variants to confirm they match.
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=float          %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=int      -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=double   -DDBL %s | FileCheck %s
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=float16_t      -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -DSCL -HV 2018 -T cs_6_9 -DTYPE=int16_t  -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+
+// Linking tests.
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -Fo %t.1 %s
+// RUN: %dxl -T cs_6_9 %t.1 | FileCheck %s --check-prefixes=CHECK,NODBL,NOINT
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DDBL -Fo %t.2 %s
+// RUN: %dxl -T cs_6_9 %t.2 | FileCheck %s --check-prefixes=CHECK,DBL,NOINT
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t -DINT -enable-16bit-types -Fo %t.3 %s
+// RUN: %dxl -T cs_6_9 %t.3 | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+
+// Test relevant operators on vec1s in a 6.9 compute shader to ensure they continue to be treated as scalars.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// CHECK-DAG: %dx.types.ResRet.[[TY:[a-z][0-9]*]] = type { [[TYPE:[a-z0-9_]*]]
+// CHECK-DAG: %dx.types.ResRet.[[ITY:i32]] = type { i32
+
+#ifdef SCL
+#define VTYPE TYPE
+#else
+#define VTYPE vector<TYPE, 1>
+#endif
+
+void assignments(inout VTYPE things[11], TYPE scales[10]);
+VTYPE arithmetic(inout VTYPE things[11])[11];
+VTYPE scarithmetic(VTYPE things[11], TYPE scales[10])[11];
+bool1 logic(bool1 truth[10], VTYPE consequences[11])[10];
+VTYPE index(VTYPE things[11], int i)[11];
+void bittwiddlers(inout VTYPE things[13]);
+
+struct Viface {
+  VTYPE values[11];
+};
+
+struct Siface {
+  TYPE values[10];
+};
+
+struct Liface {
+  bool1 values[10];
+};
+
+struct Binface {
+  VTYPE values[13];
+};
+
+RWStructuredBuffer<Viface> Input  : register(u11);
+RWStructuredBuffer<Viface> Output : register(u12);
+RWStructuredBuffer<Siface> Scales : register(u13);
+RWStructuredBuffer<Liface> Truths : register(u14);
+RWStructuredBuffer<Binface> Bits  : register(u15);
+RWStructuredBuffer<vector<uint,13> > Offsets : register(u16);
+
+[shader("compute")]
+[numthreads(8,1,1)]
+// CHECK-LABEL: define void @main
+void main(uint3 GID : SV_GroupThreadID) {
+
+  // CHECK-DAG: [[Input:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 11, i32 11, i32 0, i8 1 }, i32 11
+  // CHECK-DAG: [[Output:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 12, i32 12, i32 0, i8 1 }, i32 12
+  // CHECK-DAG: [[Scales:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 13, i32 13, i32 0, i8 1 }, i32 13
+  // CHECK-DAG: [[Truths:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 14, i32 14, i32 0, i8 1 }, i32 14
+  // INT-DAG: [[Bits:%.*]]  = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 15, i32 15, i32 0, i8 1 }, i32 15
+
+  // CHECK: [[InIx1:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0)
+  // CHECK: [[InIx2:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1)
+  // CHECK: [[OutIx:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 2)
+
+  uint InIx1 = GID[0];
+  uint InIx2 = GID[1];
+  uint OutIx = GID[2];
+
+  // Assign vector offsets to capture the expected values.
+  // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 0, i32 0, <13 x i32> <i32 [[OFF0:[0-9]*]], i32 [[OFF1:[0-9]*]], i32 [[OFF2:[0-9]*]], i32 [[OFF3:[0-9]*]], i32 [[OFF4:[0-9]*]], i32 [[OFF5:[0-9]*]], i32 [[OFF6:[0-9]*]], i32 [[OFF7:[0-9]*]], i32 [[OFF8:[0-9]*]], i32 [[OFF9:[0-9]*]], i32 [[OFF10:[0-9]*]], i32 [[OFF11:[0-9]*]], i32 [[OFF12:[0-9]*]]>
+  Offsets[0] = vector<uint,13>(sizeof(TYPE)*0,
+                               sizeof(TYPE)*1,
+                               sizeof(TYPE)*2,
+                               sizeof(TYPE)*3,
+                               sizeof(TYPE)*4,
+                               sizeof(TYPE)*5,
+                               sizeof(TYPE)*6,
+                               sizeof(TYPE)*7,
+                               sizeof(TYPE)*8,
+                               sizeof(TYPE)*9,
+                               sizeof(TYPE)*10,
+                               sizeof(TYPE)*11,
+                               sizeof(TYPE)*12);
+
+  // Assign boolean offsets to capture the expected values.
+  // CHECK: call void @dx.op.rawBufferVectorStore.v13i32(i32 304, %dx.types.Handle {{%.*}}, i32 1, i32 0, <13 x i32> <i32 [[BOFF0:[0-9]*]], i32 [[BOFF1:[0-9]*]], i32 [[BOFF2:[0-9]*]], i32 [[BOFF3:[0-9]*]], i32 [[BOFF4:[0-9]*]], i32 [[BOFF5:[0-9]*]], i32 [[BOFF6:[0-9]*]], i32 [[BOFF7:[0-9]*]], i32 [[BOFF8:[0-9]*]], i32 [[BOFF9:[0-9]*]], i32 [[BOFF10:[0-9]*]], i32 [[ALN:[0-9]*]], i32 [[IALN:[0-9]*]]>
+  Offsets[1] = vector<uint,13>(sizeof(int)*0,
+                               sizeof(int)*1,
+                               sizeof(int)*2,
+                               sizeof(int)*3,
+                               sizeof(int)*4,
+                               sizeof(int)*5,
+                               sizeof(int)*6,
+                               sizeof(int)*7,
+                               sizeof(int)*8,
+                               sizeof(int)*9,
+                               sizeof(int)*10,
+                               sizeof(TYPE),// Effectively alignof.
+                               sizeof(int));// Effectively integer alignof.
+
+  assignments(Input[InIx1+1].values, Scales[InIx2+1].values);
+  Output[OutIx+2].values = arithmetic(Input[InIx1+2].values);
+  Output[OutIx+3].values = scarithmetic(Input[InIx1+3].values, Scales[InIx2+3].values);
+  Truths[OutIx+4].values = logic(Truths[InIx2+4].values, Input[InIx1+4].values);
+  Output[OutIx+5].values = index(Input[InIx1+5].values, InIx2+5);
+#ifdef INT
+  bittwiddlers(Bits[InIx1+6].values);
+#endif
+}
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+void assignments(inout VTYPE things[11], TYPE scales[10]) {
+
+  // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 1
+
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], i8 1, i32 [[ALN]])
+  // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], i8 1, i32 [[ALN]])
+  // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], i8 1, i32 [[ALN]])
+  // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], i8 1, i32 [[ALN]])
+  // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+
+  // CHECK: [[ScIx:%.*]] = add i32 [[InIx2]], 1
+  // CHECK: [[ScHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // Nothing to check. Just a copy over.
+  things[0] = scales[0];
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ScHdl]], i32 [[ScIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]]{{( nsw)?}} [[TYPE]] [[val5]], [[val1]]
+  things[1] += things[5];
+
+  // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]]{{( nsw)?}} [[TYPE]] [[val2]], [[val6]]
+  things[2] -= things[6];
+
+  // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]]{{( nsw)?}} [[TYPE]] [[val7]], [[val3]]
+  things[3] *= things[7];
+
+  // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]]{{( nsw)?}} [[TYPE]] [[val4]], [[val8]]
+  things[4] /= things[8];
+
+#ifdef DBL
+  things[5] = 0; // Gotta give it something in any case for validation.
+#else
+  // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[TYPE]] [[val5]], [[val9]]
+  things[5] %= things[9];
+#endif
+
+  // CHECK: [[res6:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl1]], [[val6]]
+  things[6] += scales[1];
+
+  // CHECK: [[res7:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val7]], [[scl2]]
+  things[7] -= scales[2];
+
+  // CHECK: [[res8:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl3]], [[val8]]
+  things[8] *= scales[3];
+
+  // CHECK: [[res9:%[0-9]*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val9]], [[scl4]]
+  things[9] /= scales[4];
+
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], [[TYPE]] [[scl0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // NODBL: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], [[TYPE]] [[val10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+}
+
+// Test arithmetic operators.
+VTYPE arithmetic(inout VTYPE things[11])[11] {
+  TYPE res[11];
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 2
+  // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
+  // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 2
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  res[0] = +things[0];
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], i8 1, i32 [[ALN]])
+  // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], i8 1, i32 [[ALN]])
+  // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], i8 1, i32 [[ALN]])
+  // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], i8 1, i32 [[ALN]])
+  // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+
+  // CHECK: [[res1:%.*]] = [[SUB]]{{( nsw)?}} [[TYPE]] {{-?(0|0\.?0*e?\+?0*|0xH8000)}}, [[val0]]
+  res[1] = -things[0];
+
+  // CHECK: [[res2:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val2]], [[val1]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[res3:%.*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val2]], [[val3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[res4:%.*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[val4]], [[val3]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[res5:%.*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val4]], [[val5]]
+  res[5] = things[4] / things[5];
+
+#ifdef DBL
+  res[6] = 0; // Gotta give it something in any case for validation.
+#else
+  // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[val5]], [[val6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[res7:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val7]], [[POS1:(1|1\.0*e\+0*|0xH3C00)]]
+  res[7] = things[7]++;
+
+  // CHECK: [[res8:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val8]], [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]]
+  res[8] = things[8]--;
+
+  // CHECK: [[res9:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val9]], [[POS1]]
+  res[9] = ++things[9];
+
+  // CHECK: [[res10:%.*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[val10]], [[NEG1]]
+  res[10] = --things[10];
+
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], [[TYPE]] [[val1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], [[TYPE]] [[val2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], [[TYPE]] [[val3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], [[TYPE]] [[val4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], [[TYPE]] [[val5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], [[TYPE]] [[val6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // NODBL: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // Postincrement/decrements get the original value.
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], [[TYPE]] [[val7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], [[TYPE]] [[val8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+  return res;
+}
+
+// Test arithmetic operators with scalars.
+VTYPE scarithmetic(VTYPE things[11], TYPE scales[10])[11] {
+  VTYPE res[11];
+
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 3
+  // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
+  // CHECK: [[InIx:%.*]] = add i32 [[InIx1]], 3
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[InIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // CHECK: [[SclIx:%.*]] = add i32 [[InIx2]], 3
+  // CHECK: [[SclHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Scales]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[SclHdl]], i32 [[SclIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[scl6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // CHECK: [[res0:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl0]], [[val0]]
+  res[0] = things[0] + scales[0];
+
+  // CHECK: [[res1:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[val1]], [[scl1]]
+  res[1] = things[1] - scales[1];
+
+  // CHECK: [[res2:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl2]], [[val2]]
+  res[2] = things[2] * scales[2];
+
+  // CHECK: [[res3:%[0-9]*]] = [[DIV]]{{( nsw)?}} [[TYPE]] [[val3]], [[scl3]]
+  res[3] = things[3] / scales[3];
+
+  // CHECK: [[res4:%[0-9]*]] = [[ADD]]{{( nsw)?}} [[TYPE]] [[scl4]], [[val4]]
+  res[4] = scales[4] + things[4];
+
+  // CHECK: [[res5:%[0-9]*]] = [[SUB]]{{( nsw)?}} [[TYPE]] [[scl5]], [[val5]]
+  res[5] = scales[5] - things[5];
+
+  // CHECK: [[res6:%[0-9]*]] = [[MUL]]{{( nsw)?}} [[TYPE]] [[scl6]], [[val6]]
+  res[6] = scales[6] * things[6];
+  res[7] = res[8] = res[9] = res[10] = 0;
+
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF0]], [[TYPE]] [[res0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+  return res;
+}
+
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+bool1 logic(bool1 truth[10], VTYPE consequences[11])[10] {
+  bool1 res[10];
+
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 4
+  // CHECK: [[TruHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Truths]]
+  // CHECK: [[TruIx:%.*]] = add i32 [[InIx2]], 4
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF0]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival0:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF1]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF2]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF3]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF4]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival4:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferLoad.[[ITY]](i32 139, %dx.types.Handle [[TruHdl]], i32 [[TruIx]], i32 [[BOFF5]], i8 1, i32 [[IALN]])
+  // CHECK: [[ival5:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+
+  // CHECK: [[valIx:%.*]] = add i32 [[InIx1]], 4
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+
+  // CHECK: [[bres0:%.*]] = icmp eq i32 [[ival0]], 0
+  // CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32
+  res[0] = !truth[0];
+
+  // CHECK: [[res1:%.*]] = or i32 [[ival2]], [[ival1]]
+  // CHECK: [[bres1:%.*]] = icmp ne i32 [[res1]], 0
+  // CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[bval2:%.*]] = icmp ne i32 [[ival2]], 0
+  // CHECK: [[bval3:%.*]] = icmp ne i32 [[ival3]], 0
+  // CHECK: [[bres2:%.*]] = and i1 [[bval2]], [[bval3]]
+  // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[bval4:%.*]] = icmp ne i32 [[ival4]], 0
+  // CHECK: [[bval5:%.*]] = icmp ne i32 [[ival5]], 0
+  // CHECK: [[bres3:%.*]] = select i1 [[bval3]], i1 [[bval4]], i1 [[bval5]]
+  // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[TYPE]] [[val0]], [[val1]]
+  // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[val1]], [[val2]]
+  // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[val2]], [[val3]]
+  // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[val3]], [[val4]]
+  // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[val4]], [[val5]]
+  // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[val5]], [[val6]]
+  // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF0]], i32 [[res0]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF1]], i32 [[res1]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF2]], i32 [[res2]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF3]], i32 [[res3]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF4]], i32 [[res4]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF5]], i32 [[res5]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF6]], i32 [[res6]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF7]], i32 [[res7]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF8]], i32 [[res8]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+  // CHECK: call void @dx.op.rawBufferStore.[[ITY]](i32 140, %dx.types.Handle [[TruHdl]], i32 [[ResIx]], i32 [[BOFF9]], i32 [[res9]], i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+VTYPE index(VTYPE things[11], int i)[11] {
+  VTYPE res[11];
+
+  // CHECK: [[ResIx:%.*]] = add i32 [[OutIx]], 5
+  // CHECK: [[ResHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Output]]
+  // CHECK: [[valIx:%.*]] = add i32 [[InIx1]], 5
+  // CHECK: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Input]]
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1:%.*]], i32 0, i32 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF0]], i8 1, i32 [[ALN]])
+  // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val0]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 1
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val1]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 2
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val2]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 3
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val3]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 4
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val4]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 5
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val5]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 6
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val6]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 7
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF7]], i8 1, i32 [[ALN]])
+  // CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val7]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 8
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF8]], i8 1, i32 [[ALN]])
+  // CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val8]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 9
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF9]], i8 1, i32 [[ALN]])
+  // CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val9]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 10
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[valIx]], i32 [[OFF10]], i8 1, i32 [[ALN]])
+  // CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // CHECK: store [[TYPE]] [[val10]], [[TYPE]]* [[adr]], align [[ALN]]
+
+  // CHECK: [[Ix:%.*]] = add i32 [[InIx2]], 5
+
+  // CHECK: [[adr0:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2:%.*]], i32 0, i32 0
+  // CHECK: store [[TYPE]] {{(0|0\.?0*e?\+?0*|0xH0000)}}, [[TYPE]]* [[adr0]], align [[ALN]]
+  res[0] = 0;
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 [[Ix]]
+  // CHECK: store [[TYPE]] [[POS1]], [[TYPE]]* [[adr]]
+  res[i] = 1;
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 2
+  // CHECK: store [[TYPE]] [[TWO:(2|2\.?0*e?\+?0*|0xH4000)]], [[TYPE]]* [[adr]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 3
+  // CHECK: store [[TYPE]] [[val0]], [[TYPE]]* [[adr]]
+  res[3] = things[0];
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr1]], i32 0, i32 [[Ix]]
+  // CHECK: [[vali:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 4
+  // CHECK: store [[TYPE]] [[vali]], [[TYPE]]* [[adr]]
+  res[4] = things[i];
+
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 5
+  // CHECK: store [[TYPE]] [[val2]], [[TYPE]]* [[adr]]
+  res[5] = things[Ix];
+
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 0, [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 1
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF1]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF2]], [[TYPE]] [[TWO]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF3]], [[TYPE]] [[val0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF4]], [[TYPE]] [[vali]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF5]], [[TYPE]] [[val2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 6
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF6]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 7
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF7]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 8
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF8]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 9
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF9]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // CHECK: [[adr:%.*]] = getelementptr{{( inbounds)?}} [11 x [[TYPE]]], [11 x [[TYPE]]]* [[scr2]], i32 0, i32 10
+  // CHECK: [[ld:%.*]] = load [[TYPE]], [[TYPE]]* [[adr]], align [[ALN]]
+  // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ResHdl]], i32 [[ResIx]], i32 [[OFF10]], [[TYPE]] [[ld]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+  return res;
+}
+
+#ifdef INT
+// Test bit twiddling operators.
+void bittwiddlers(inout VTYPE things[13]) {
+  // INT: [[ValIx:%.*]] = add i32 [[InIx1]], 6
+  // INT: [[InHdl:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[Bits]]
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF1]], i8 1, i32 [[ALN]])
+  // INT: [[val1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF2]], i8 1, i32 [[ALN]])
+  // INT: [[val2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF3]], i8 1, i32 [[ALN]])
+  // INT: [[val3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF4]], i8 1, i32 [[ALN]])
+  // INT: [[val4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF5]], i8 1, i32 [[ALN]])
+  // INT: [[val5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF6]], i8 1, i32 [[ALN]])
+  // INT: [[val6:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF7]], i8 1, i32 [[ALN]])
+  // INT: [[val7:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF8]], i8 1, i32 [[ALN]])
+  // INT: [[val8:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF9]], i8 1, i32 [[ALN]])
+  // INT: [[val9:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF10]], i8 1, i32 [[ALN]])
+  // INT: [[val10:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF11]], i8 1, i32 [[ALN]])
+  // INT: [[val11:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+  // INT: [[ld:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF12]], i8 1, i32 [[ALN]])
+  // INT: [[val12:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[ld]], 0
+
+  // INT: [[res0:%[0-9]*]] = xor [[TYPE]] [[val1]], -1
+  things[0] = ~things[1];
+
+  // INT: [[res1:%[0-9]*]] = or [[TYPE]] [[val3]], [[val2]]
+  things[1] = things[2] | things[3];
+
+  // INT: [[res2:%[0-9]*]] = and [[TYPE]] [[val4]], [[val3]]
+  things[2] = things[3] & things[4];
+
+  // INT: [[res3:%[0-9]*]] = xor [[TYPE]] [[val5]], [[val4]]
+  things[3] = things[4] ^ things[5];
+
+  // INT: [[shv6:%[0-9]*]] = and [[TYPE]] [[val6]]
+  // INT: [[res4:%[0-9]*]] = shl [[TYPE]] [[val5]], [[shv6]]
+  things[4] = things[5] << things[6];
+
+  // INT: [[shv7:%[0-9]*]] = and [[TYPE]] [[val7]]
+  // UNSIG: [[res5:%[0-9]*]] = lshr [[TYPE]] [[val6]], [[shv7]]
+  // SIG: [[res5:%[0-9]*]] = ashr [[TYPE]] [[val6]], [[shv7]]
+  things[5] = things[6] >> things[7];
+
+  // INT: [[res6:%[0-9]*]] = or [[TYPE]] [[val8]], [[val6]]
+  things[6] |= things[8];
+
+  // INT: [[res7:%[0-9]*]] = and [[TYPE]] [[val9]], [[val7]]
+  things[7] &= things[9];
+
+  // INT: [[res8:%[0-9]*]] = xor [[TYPE]] [[val10]], [[val8]]
+  things[8] ^= things[10];
+
+  // INT: [[shv11:%[0-9]*]] = and [[TYPE]] [[val11]]
+  // INT: [[res9:%[0-9]*]] = shl [[TYPE]] [[val9]], [[shv11]]
+  things[9] <<= things[11];
+
+  // INT: [[shv12:%[0-9]*]] = and [[TYPE]] [[val12]]
+  // UNSIG: [[res10:%[0-9]*]] = lshr [[TYPE]] [[val10]], [[shv12]]
+  // SIG: [[res10:%[0-9]*]] = ashr [[TYPE]] [[val10]], [[shv12]]
+  things[10] >>= things[12];
+
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF0]], [[TYPE]] [[res0]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF1]], [[TYPE]] [[res1]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF2]], [[TYPE]] [[res2]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF3]], [[TYPE]] [[res3]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF4]], [[TYPE]] [[res4]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF5]], [[TYPE]] [[res5]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF6]], [[TYPE]] [[res6]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF7]], [[TYPE]] [[res7]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF8]], [[TYPE]] [[res8]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF9]], [[TYPE]] [[res9]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF10]], [[TYPE]] [[res10]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF11]], [[TYPE]] [[val11]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+  // INT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[InHdl]], i32 [[ValIx]], i32 [[OFF12]], [[TYPE]] [[val12]], [[TYPE]] undef, [[TYPE]] undef, [[TYPE]] undef, i8 1, i32 [[ALN]])
+
+  // CHECK-LABEL: ret void
+}
+#endif // INT
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
new file mode 100644
index 0000000000..44c9be17d4
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-vec1s.hlsl
@@ -0,0 +1,451 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float          %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int      -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double   -DDBL %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t      -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t  -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+
+// Test relevant operators on vec1s in 6.9 to ensure they continue to be treated as scalars.
+
+#define VTYPE vector<TYPE, 1>
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[ELTY:[a-z0-9_]*]]
+// CHECK: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:.*]] }
+RWStructuredBuffer<VTYPE> buf;
+
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout VTYPE things[10]) {
+
+  // CHECK: [[buf:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle {{%.*}}, i32 1, i32 0, i8 1, i32 {{8|4|2}})
+  // CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[buf]], 0
+  // CHECK: [[res0:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[val0]], i64 0
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]]
+  things[0] = buf.Load(1);
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[val5:%.*]] = extractelement [[TYPE]] [[ld5]], i32 0
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[ld1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[val1:%.*]] = extractelement [[TYPE]] [[ld1]], i32 0
+  // CHECK: [[add1:%.*]] = [[ADD:f?add( fast)?]] [[ELTY]] [[val1]], [[val5]]
+  // CHECK: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add1]], i32 0
+  // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  things[1] += things[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[val6:%.*]] = extractelement [[TYPE]] [[ld6]], i32 0
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[val2:%.*]] = extractelement [[TYPE]] [[ld2]], i32 0
+  // CHECK: [[sub2:%.*]] = [[SUB:f?sub( fast)?]] [[ELTY]] [[val2]], [[val6]]
+  // CHECK: [[res2:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[sub2]], i32 0
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  things[2] -= things[6];
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[val7:%.*]] = extractelement [[TYPE]] [[ld7]], i32 0
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[val3:%.*]] = extractelement [[TYPE]] [[ld3]], i32 0
+  // CHECK: [[mul3:%.*]] = [[MUL:f?mul( fast)?]] [[ELTY]] [[val3]], [[val7]]
+  // CHECK: [[res3:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[mul3]], i32 0
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  things[3] *= things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[val8:%.*]] = extractelement [[TYPE]] [[ld8]], i32 0
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[val4:%.*]] = extractelement [[TYPE]] [[ld4]], i32 0
+  // CHECK: [[div4:%.*]] = [[DIV:[ufs]?div( fast)?]] [[ELTY]] [[val4]], [[val8]]
+  // CHECK: [[res4:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[div4]], i32 0
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  things[4] /= things[8];
+
+#ifndef DBL
+  // NODBL: [[adr9:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 9
+  // NODBL: [[ld9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // NODBL: [[val9:%.*]] = extractelement [[TYPE]] [[ld9]]
+  // NODBL: [[rem5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[ELTY]] [[val5]], [[val9]]
+  // NODBL: [[res5:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[rem5]], i32 0
+  // NODBL: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  things[5] %= things[9];
+#endif
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export VTYPE arithmetic(inout VTYPE things[11])[11] {
+  VTYPE res[11];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[res0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[val0:%.*]] = extractelement [[TYPE]] [[res0]], i32 0
+  // CHECK: [[sub1:%.*]] = [[SUB]] [[ELTY]] {{-?(0|0\.?0*e?\+?0*|0xH8000)}}, [[val0]]
+  res[0] = +things[0];
+  res[1] = -things[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[ld1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[val1:%.*]] = extractelement [[TYPE]] [[ld1]], i32 0
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[val2:%.*]] = extractelement [[TYPE]] [[ld2]], i32 0
+  // CHECK: [[add2:%.*]] = [[ADD]] [[ELTY]] [[val2]], [[val1]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[val3:%.*]] = extractelement [[TYPE]] [[ld3]], i32 0
+  // CHECK: [[sub3:%.*]] = [[SUB]] [[ELTY]] [[val2]], [[val3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[val4:%.*]] = extractelement [[TYPE]] [[ld4]], i32 0
+  // CHECK: [[mul4:%.*]] = [[MUL]] [[ELTY]] [[val4]], [[val3]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[val5:%.*]] = extractelement [[TYPE]] [[ld5]], i32 0
+  // CHECK: [[div5:%.*]] = [[DIV]] [[ELTY]] [[val4]], [[val5]]
+  res[5] = things[4] / things[5];
+
+#ifndef DBL
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6
+  // NODBL: [[ld6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // NODBL: [[val6:%.*]] = extractelement [[TYPE]] [[ld6]]
+  // NODBL: [[rem6:%.*]] = [[REM]] [[ELTY]] [[val5]], [[val6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[val7:%.*]] = extractelement [[TYPE]] [[ld7]], i32 0
+  // CHECK: [[add7:%.*]] = [[ADD]] [[ELTY]] [[val7]], [[POS1:(1|1\.0*e\+0*|0xH3C00)]]
+  // CHECK: [[res7:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add7]], i32 0
+  // CHECK: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]]
+  res[7] = things[7]++;
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[val8:%.*]] = extractelement [[TYPE]] [[ld8]], i32 0
+  // CHECK: [[add8:%.*]] = [[ADD]] [[ELTY]] [[val8]], [[NEG1:(-1|-1\.0*e\+0*|0xHBC00)]]
+  // CHECK: [[res8:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add8]], i32 0
+  // CHECK: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]]
+  res[8] = things[8]--;
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9
+  // CHECK: [[ld9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // CHECK: [[val9:%.*]] = extractelement [[TYPE]] [[ld9]], i32 0
+  // CHECK: [[add9:%.*]] = [[ADD]] [[ELTY]] [[val9]], [[POS1]]
+  // CHECK: [[res9:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add9]], i32 0
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  res[9] = ++things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10
+  // CHECK: [[ld10:%.*]] = load [[TYPE]], [[TYPE]]* [[adr10]]
+  // CHECK: [[val10:%.*]] = extractelement [[TYPE]] [[ld10]], i32 0
+  // CHECK: [[add10:%.*]] = [[ADD]] [[ELTY]] [[val10]], [[NEG1]]
+  // CHECK: [[res10:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add10]], i32 0
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  res[10] = --things[10];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 0
+  // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 1
+  // CHECK: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[sub1]], i64 0
+  // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 2
+  // CHECK: [[res2:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add2]], i64 0
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 3
+  // CHECK: [[res3:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[sub3]], i64 0
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 4
+  // CHECK: [[res4:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[mul4]], i64 0
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 5
+  // CHECK: [[res5:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[div5]], i64 0
+  // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 6
+  // NODBL: [[res6:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[rem6]], i64 0
+  // NODBL: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 7
+  // This is a post op, so the original value goes into res[].
+  // CHECK: store [[TYPE]] [[ld7]], [[TYPE]]* [[adr7]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 8
+  // This is a post op, so the original value goes into res[].
+  // CHECK: store [[TYPE]] [[ld8]], [[TYPE]]* [[adr8]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 9
+  // CHECK: [[res9:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add9]], i64 0
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %agg.result, i32 0, i32 10
+  // CHECK: [[res10:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[add10]], i64 0
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  // CHECK: ret void
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export bool logic(bool truth[10], VTYPE consequences[10])[10] {
+  bool res[10];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
+  // CHECK: [[val0:%.*]] = load i32, i32* [[adr0]]
+  // CHECK: [[res0:%.*]] = xor i32 [[val0]], 1
+  res[0] = !truth[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  // CHECK: [[val1:%.*]] = load i32, i32* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  // CHECK: [[val2:%.*]] = load i32, i32* [[adr2]]
+  // CHECK: [[res1:%.*]] = or i32 [[val2]], [[val1]]
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[bval2:%.*]] = icmp ne i32 [[val2]], 0
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  // CHECK: [[val3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[bval3:%.*]] = icmp ne i32 [[val3]], 0
+  // CHECK: [[bres2:%.*]] = and i1 [[bval2]], [[bval3]]
+  // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  // CHECK: [[val4:%.*]] = load i32, i32* [[adr4]]
+  // CHECK: [[bval4:%.*]] = icmp ne i32 [[val4]], 0
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  // CHECK: [[val5:%.*]] = load i32, i32* [[adr5]]
+  // CHECK: [[bval5:%.*]] = icmp ne i32 [[val5]], 0
+  // CHECK: [[bres3:%.*]] = select i1 [[bval3]], i1 [[bval4]], i1 [[bval5]]
+  // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 0
+  // CHECK: [[ld0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[val0:%.*]] = extractelement [[TYPE]] [[ld0]], i32 0
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1
+  // CHECK: [[ld1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[val1:%.*]] = extractelement [[TYPE]] [[ld1]], i32 0
+  // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[ELTY]] [[val0]], [[val1]]
+  // CHECK: [[res4:%.*]] = zext i1 [[cmp4]] to i32
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[val2:%.*]] = extractelement [[TYPE]] [[ld2]], i32 0
+  // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[ELTY]] [[val1]], [[val2]]
+  // CHECK: [[res5:%.*]] = zext i1 [[cmp5]] to i32
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[val3:%.*]] = extractelement [[TYPE]] [[ld3]], i32 0
+  // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[ELTY]] [[val2]], [[val3]]
+  // CHECK: [[res6:%.*]] = zext i1 [[cmp6]] to i32
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[val4:%.*]] = extractelement [[TYPE]] [[ld4]], i32 0
+  // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[ELTY]] [[val3]], [[val4]]
+  // CHECK: [[res7:%.*]] = zext i1 [[cmp7]] to i32
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[val5:%.*]] = extractelement [[TYPE]] [[ld5]], i32 0
+  // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[ELTY]] [[val4]], [[val5]]
+  // CHECK: [[res8:%.*]] = zext i1 [[cmp8]] to i32
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[val6:%.*]] = extractelement [[TYPE]] [[ld6]], i32 0
+  // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[ELTY]] [[val5]], [[val6]]
+  // CHECK: [[res9:%.*]] = zext i1 [[cmp9]] to i32
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 0
+  // CHECK: store i32 [[res0]], i32* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 1
+  // CHECK: store i32 [[res1]], i32* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 2
+  // CHECK: store i32 [[res2]], i32* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 3
+  // CHECK: store i32 [[res3]], i32* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 4
+  // CHECK: store i32 [[res4]], i32* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 5
+  // CHECK: store i32 [[res5]], i32* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 6
+  // CHECK: store i32 [[res6]], i32* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 7
+  // CHECK: store i32 [[res7]], i32* [[adr7]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 8
+  // CHECK: store i32 [[res8]], i32* [[adr8]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 9
+  // CHECK: store i32 [[res9]], i32* [[adr9]]
+
+  // CHECK: ret void
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export VTYPE index(VTYPE things[10], int i)[10] {
+  // CHECK: [[res:%.*]] = alloca [10 x [[ELTY]]]
+  VTYPE res[10];
+
+  // CHECK: [[res0:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 0
+  // CHECK: store [[ELTY]] {{(0|0*\.?0*e?\+?0*|0xH0000)}}, [[ELTY]]* [[res0]]
+  res[0] = 0;
+
+  // CHECK: [[adri:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 %i
+  // CHECK: store [[ELTY]] [[POS1]], [[ELTY]]* [[adri]]
+  res[i] = 1;
+
+  // CHECK: [[adr2:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 2
+  // CHECK: store [[ELTY]] {{(2|2\.?0*e?\+?0*|0xH4000)}}, [[ELTY]]* [[adr2]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[ld0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr3:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 3
+  // CHECK: [[thg0:%.*]] = extractelement [[TYPE]] [[ld0]], i64 0
+  // CHECK: store [[ELTY]] [[thg0]], [[ELTY]]* [[adr3]]
+  res[3] = things[0];
+
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 %i
+  // CHECK: [[ldi:%.*]] = load [[TYPE]], [[TYPE]]* [[adri]]
+  // CHECK: [[adr4:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 4
+  // CHECK: [[thgi:%.*]] = extractelement [[TYPE]] [[ldi]], i64 0
+  // CHECK: store [[ELTY]] [[thgi]], [[ELTY]]* [[adr4]]
+  res[4] = things[i];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr5:%.*]] = getelementptr [10 x [[ELTY]]], [10 x [[ELTY]]]* [[res]], i32 0, i32 5
+  // CHECK: [[thg2:%.*]] = extractelement [[TYPE]] [[ld2]], i64 0
+  // CHECK: store [[ELTY]] [[thg2]], [[ELTY]]* [[adr5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+}
+
+#ifdef INT
+// Test bit twiddling operators.
+// INT-LABEL: define void @"\01?bittwiddlers
+export void bittwiddlers(inout VTYPE things[13]) {
+  // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 1
+  // INT: [[ld1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // INT: [[val1:%[0-9]*]] = extractelement [[TYPE]] [[ld1]], i32 0
+  // INT: [[xor1:%[0-9]*]] = xor [[ELTY]] [[val1]], -1
+  // INT: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor1]], i32 0
+  // INT: [[adr0:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 0
+  // INT: store [[TYPE]] [[res1]], [[TYPE]]* [[adr0]]
+  things[0] = ~things[1];
+
+  // INT: [[adr2:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 2
+  // INT: [[ld2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // INT: [[val2:%[0-9]*]] = extractelement [[TYPE]] [[ld2]], i32 0
+  // INT: [[adr3:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 3
+  // INT: [[ld3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // INT: [[val3:%[0-9]*]] = extractelement [[TYPE]] [[ld3]], i32 0
+  // INT: [[or1:%[0-9]*]] = or [[ELTY]] [[val3]], [[val2]]
+  // INT: [[res1:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[or1]], i32 0
+  // INT: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  things[1] = things[2] | things[3];
+
+  // INT: [[adr4:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 4
+  // INT: [[ld4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // INT: [[val4:%[0-9]*]] = extractelement [[TYPE]] [[ld4]], i32 0
+  // INT: [[and2:%[0-9]*]] = and [[ELTY]] [[val4]], [[val3]]
+  // INT: [[res2:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[and2]], i32 0
+  // INT: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  things[2] = things[3] & things[4];
+
+  // INT: [[adr5:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 5
+  // INT: [[ld5:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // INT: [[val5:%[0-9]*]] = extractelement [[TYPE]] [[ld5]], i32 0
+  // INT: [[xor3:%[0-9]*]] = xor [[ELTY]] [[val5]], [[val4]]
+  // INT: [[res3:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor3]], i32 0
+  // INT: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  things[3] = things[4] ^ things[5];
+
+  // INT: [[adr6:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 6
+  // INT: [[ld6:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // INT: [[val6:%[0-9]*]] = extractelement [[TYPE]] [[ld6]], i32 0
+  // INT: [[shv6:%[0-9]*]] = and [[ELTY]] [[val6]]
+  // INT: [[shl4:%[0-9]*]] = shl [[ELTY]] [[val5]], [[shv6]]
+  // INT: [[res4:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shl4]], i32 0
+  // INT: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  things[4] = things[5] << things[6];
+
+  // INT: [[adr7:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 7
+  // INT: [[ld7:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // INT: [[val7:%[0-9]*]] = extractelement [[TYPE]] [[ld7]], i32 0
+  // INT: [[shv7:%[0-9]*]] = and [[ELTY]] [[val7]]
+  // UNSIG: [[shr5:%[0-9]*]] = lshr [[ELTY]] [[val6]], [[shv7]]
+  // SIG: [[shr5:%[0-9]*]] = ashr [[ELTY]] [[val6]], [[shv7]]
+  // INT: [[res5:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shr5]], i32 0
+  // INT: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  things[5] = things[6] >> things[7];
+
+  // INT: [[adr8:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 8
+  // INT: [[ld8:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // INT: [[val8:%[0-9]*]] = extractelement [[TYPE]] [[ld8]], i32 0
+  // INT: [[or6:%[0-9]*]] = or [[ELTY]] [[val8]], [[val6]]
+  // INT: [[res6:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[or6]], i32 0
+  // INT: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]]
+  things[6] |= things[8];
+
+  // INT: [[adr9:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 9
+  // INT: [[ld9:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // INT: [[val9:%[0-9]*]] = extractelement [[TYPE]] [[ld9]], i32 0
+  // INT: [[and7:%[0-9]*]] = and [[ELTY]] [[val9]], [[val7]]
+  // INT: [[res7:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[and7]], i32 0
+  // INT: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]]
+  things[7] &= things[9];
+
+  // INT: [[adr10:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 10
+  // INT: [[ld10:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr10]]
+  // INT: [[val10:%[0-9]*]] = extractelement [[TYPE]] [[ld10]], i32 0
+  // INT: [[xor8:%[0-9]*]] = xor [[ELTY]] [[val10]], [[val8]]
+  // INT: [[res8:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[xor8]], i32 0
+  // INT: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]]
+  things[8] ^= things[10];
+
+  // INT: [[adr11:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 11
+  // INT: [[ld11:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr11]]
+  // INT: [[val11:%[0-9]*]] = extractelement [[TYPE]] [[ld11]], i32 0
+  // INT: [[shv11:%[0-9]*]] = and [[ELTY]] [[val11]]
+  // INT: [[shl9:%[0-9]*]] = shl [[ELTY]] [[val9]], [[shv11]]
+  // INT: [[res9:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shl9]], i32 0
+  // INT: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  things[9] <<= things[11];
+
+  // INT: [[adr12:%[0-9]*]] = getelementptr inbounds [13 x [[TYPE]]], [13 x [[TYPE]]]* %things, i32 0, i32 12
+  // INT: [[ld12:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[adr12]]
+  // INT: [[val12:%[0-9]*]] = extractelement [[TYPE]] [[ld12]], i32 0
+  // INT: [[shv12:%[0-9]*]] = and [[ELTY]] [[val12]]
+  // UNSIG: [[shr10:%[0-9]*]] = lshr [[ELTY]] [[val10]], [[shv12]]
+  // SIG: [[shr10:%[0-9]*]] = ashr [[ELTY]] [[val10]], [[shv12]]
+  // INT: [[res10:%.*]] = insertelement [[TYPE]] undef, [[ELTY]] [[shr10]], i32 0
+  // INT: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  things[10] >>= things[12];
+
+  // INT: ret void
+}
+#endif // INT
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
new file mode 100644
index 0000000000..ba76eca619
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl
@@ -0,0 +1,563 @@
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=3 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=4 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=5 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=6 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=7 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=8 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=9 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=10 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=11 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=12 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=13 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=14 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=15 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=16 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=18 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=128 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+
+// Less exhaustive testing for some other types.
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int      -DNUM=2 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint     -DNUM=5 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double   -DNUM=3 -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DNUM=9 -DINT %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,UNSIG
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -DNUM=17 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t   -DNUM=177 -DINT -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL,INT,SIG
+
+// Test relevant operators on an assortment vector sizes and types with 6.9 native vectors.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// Uses non vector buffer to avoid interacting with that implementation.
+// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z_0-9]*]]
+
+RWStructuredBuffer< TYPE > buf;
+
+export void assignments(inout vector<TYPE, NUM> things[10], TYPE scales[10]);
+export vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11];
+export vector<TYPE, NUM> scarithmetic(inout vector<TYPE, NUM> things[10], TYPE scales[10])[10];
+export vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> consequences[10])[10];
+export vector<TYPE, NUM> index(vector<TYPE, NUM> things[10], int i, TYPE val)[10];
+
+struct Interface {
+  vector<TYPE, NUM> assigned[10];
+  vector<TYPE, NUM> arithmeticked[11];
+  vector<TYPE, NUM> scarithmeticked[10];
+  vector<bool, NUM> logicked[10];
+  vector<TYPE, NUM> indexed[10];
+  TYPE scales[10];
+};
+
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout vector<TYPE, NUM> things[10], TYPE scales[10]) {
+
+  // Another trick to capture the size.
+  // CHECK: [[res:%[0-9]*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle %{{[^,]*}}, i32 [[NUM:[0-9]*]]
+  // CHECK: [[scl:%[0-9]*]] = extractvalue %dx.types.ResRet.[[TY]] [[res]], 0
+  TYPE scalar = buf.Load(NUM);
+
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl]], i32 0
+  // CHECK: [[res0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]]
+  things[0] = scalar;
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[res1:%[0-9]*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec5]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]]
+  things[1] += things[5];
+
+   // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[res2:%[0-9]*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]]
+  things[2] -= things[6];
+
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[res3:%[0-9]*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec7]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]]
+  things[3] *= things[7];
+
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[res4:%[0-9]*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]]
+  things[4] /= things[8];
+
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add9]]
+#ifdef DBL
+  // DBL can't use remainder operator, do something anyway to keep the rest consistent.
+  // DBL: [[fvec9:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec9]] to <[[NUM]] x float>
+  // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float>
+  // DBL: [[fres5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x float> [[fvec5]], [[fvec9]]
+  // DBL: [[res5:%[0-9]*]] = fpext <[[NUM]] x float> [[fres5]] to <[[NUM]] x double>
+  vector<float,NUM> f9 = things[9];
+  vector<float,NUM> f5 = things[5];
+  f5 %= f9;
+  things[5] = f5;
+#else
+  // NODBL: [[res5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]]
+  things[5] %= things[9];
+#endif
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]]
+
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 1
+  // CHECK: [[scl1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add1]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0
+  // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res6:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt1]], [[vec6]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]]
+  things[6] += scales[1];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 2
+  // CHECK: [[scl2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add2]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0
+  // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res7:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec7]], [[spt2]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[add7]]
+  things[7] -= scales[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 3
+  // CHECK: [[scl3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add3]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0
+  // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res8:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt3]], [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[add8]]
+  things[8] *= scales[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 4
+  // CHECK: [[scl4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add4]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0
+  // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res9:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec9]], [[spt4]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]]
+  things[9] /= scales[4];
+
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11] {
+  vector<TYPE, NUM> res[11];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: [[res1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]>
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]]
+  res[0] = -things[0];
+  res[1] = +things[0];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[res2:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec1]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[res3:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[res4:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[res5:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  res[5] = things[4] / things[5];
+
+  // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float>
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+#ifdef DBL
+  // DBL can't use remainder operator, do something anyway to keep the rest consistent.
+  // DBL: [[fvec6:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec6]] to <[[NUM]] x float>
+  // DBL: [[fres6:%[0-9]*]] = [[REM]] <[[NUM]] x float> [[fvec5]], [[fvec6]]
+  // DBL: [[res6:%[0-9]*]] = fpext <[[NUM]] x float> [[fres6]] to <[[NUM]] x double>
+  res[6] = (vector<float,NUM>)things[5] % (vector<float,NUM>)things[6];
+#else
+  // NODBL: [[res6:%[0-9]*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]]
+  // CHECK: [[res7:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[add7]]
+  res[7] = things[7]++;
+
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]]
+  // CHECK: [[res8:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[add8]]
+  res[8] = things[8]--;
+
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add9]]
+  // CHECK: [[res9:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]]
+  res[9] = ++things[9];
+
+  // CHECK: [[add10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10
+  // CHECK: [[vec10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add10]]
+  // CHECK: [[res10:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[add10]]
+  res[10] = --things[10];
+
+  // Stores into res[]. Previous were for things[] inout.
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 1
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 6
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]]
+  // These two were post ops, so the original value goes into res[].
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 7
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[add7]]
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 8
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[add8]]
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 9
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]]
+  // CHECK: [[add10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 10
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[add10]]
+  // CHECK: ret void
+
+
+  return res;
+}
+
+// Test arithmetic operators with scalars.
+// CHECK-LABEL: define void @"\01?scarithmetic
+export vector<TYPE, NUM> scarithmetic(inout vector<TYPE, NUM> things[10], TYPE scales[10])[10] {
+  vector<TYPE, NUM> res[10];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 0
+  // CHECK: [[scl0:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add0]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0
+  // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt0]], [[vec0]]
+  res[0] = things[0] + scales[0];
+
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 1
+  // CHECK: [[scl1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add1]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0
+  // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res1:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec1]], [[spt1]]
+  res[1] = things[1] - scales[1];
+
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 2
+  // CHECK: [[scl2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add2]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0
+  // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res2:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt2]], [[vec2]]
+  res[2] = things[2] * scales[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 3
+  // CHECK: [[scl3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add3]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0
+  // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res3:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec3]], [[spt3]]
+  res[3] = things[3] / scales[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 4
+  // CHECK: [[scl4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add4]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0
+  // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res4:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt4]], [[vec4]]
+  res[4] = scales[4] + things[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 5
+  // CHECK: [[scl5:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add5]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl5]], i32 0
+  // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res5:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[spt5]], [[vec5]]
+  res[5] = scales[5] - things[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 6
+  // CHECK: [[scl6:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add6]]
+  // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl6]], i32 0
+  // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[res6:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt6]], [[vec6]]
+  res[6] = scales[6] * things[6];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 1
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 6
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: ret void
+
+
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export vector<bool, NUM> logic(vector<bool, NUM> truth[10], vector<TYPE, NUM> consequences[10])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer
+  // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32>
+  res[0] = !truth[0];
+
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]]
+  // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]]
+  // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[bres3:%[0-9]*]] = select <[[NUM]] x i1> [[bvec3]], <[[NUM]] x i1> [[bvec4]], <[[NUM]] x i1> [[bvec5]]
+  // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]]
+  // CHECK: [[cmp4:%[0-9]*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]]
+  // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp4]] to <[[NUM]] x i32>
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[cmp5:%[0-9]*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]]
+  // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp5]] to <[[NUM]] x i32>
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]]
+  // CHECK: [[cmp6:%[0-9]*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp6]] to <[[NUM]] x i32>
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]]
+  // CHECK: [[cmp7:%[0-9]*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]]
+  // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp7]] to <[[NUM]] x i32>
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]]
+  // CHECK: [[cmp8:%[0-9]*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp8]] to <[[NUM]] x i32>
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 6
+  // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]]
+  // CHECK: [[cmp9:%[0-9]*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp9]] to <[[NUM]] x i32>
+  res[9] = consequences[5] >= consequences[6];
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]]
+  // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]]
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2
+  // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]]
+  // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3
+  // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]]
+  // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]]
+  // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]]
+  // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6
+  // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]]
+  // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 7
+  // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[add7]]
+  // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 8
+  // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[add8]]
+  // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 9
+  // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[add9]]
+  // CHECK: ret void
+
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export vector<TYPE, NUM> index(vector<TYPE, NUM> things[10], int i, TYPE val)[10] {
+  vector<TYPE, NUM> res[10];
+
+  // CHECK: [[res:%[0-9]*]] = alloca [10 x <[[NUM]] x [[TYPE]]>]
+  // CHECK: [[res0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[res0]]
+  res[0] = 0;
+
+  // CHECK: [[resi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 %i
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(1|0xH3C00).*}}>, <[[NUM]] x [[TYPE]]>* [[resi]]
+  res[i] = 1;
+
+  // CHECK: [[res2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(2|0xH4000).*}}>, <[[NUM]] x [[TYPE]]>* [[res2]]
+  res[Ix] = 2;
+
+  // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: [[thg0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]]
+  // CHECK: [[res3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thg0]], <[[NUM]] x [[TYPE]]>* [[res3]]
+  res[3] = things[0];
+
+  // CHECK: [[addi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 %i
+  // CHECK: [[thgi:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[addi]]
+  // CHECK: [[res4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thgi]], <[[NUM]] x [[TYPE]]>* [[res4]]
+  res[4] = things[i];
+
+  // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[thg2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]]
+  // CHECK: [[res5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thg2]], <[[NUM]] x [[TYPE]]>* [[res5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+}
+
+#ifdef INT
+// Test bit twiddling operators.
+// INT-LABEL: define void @"\01?bittwiddlers
+export void bittwiddlers(inout vector<TYPE, NUM> things[13]) {
+  // INT: [[adr1:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // INT: [[ld1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // INT: [[res1:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[ld1]], <[[TYPE]] -1
+  // INT: [[adr0:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // INT: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr0]]
+  things[0] = ~things[1];
+
+  // INT: [[adr2:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // INT: [[ld2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // INT: [[adr3:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // INT: [[ld3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // INT: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[ld3]], [[ld2]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr1]]
+  things[1] = things[2] | things[3];
+
+  // INT: [[adr4:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // INT: [[ld4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // INT: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld4]], [[ld3]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]]
+  things[2] = things[3] & things[4];
+
+  // INT: [[adr5:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // INT: [[ld5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // INT: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[ld4]], [[ld5]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]]
+  things[3] = things[4] ^ things[5];
+
+  // INT: [[adr6:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // INT: [[ld6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]]
+  // INT: [[shv6:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld6]]
+  // INT: [[res4:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[ld5]], [[shv6]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]]
+  things[4] = things[5] << things[6];
+
+  // INT: [[adr7:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // INT: [[ld7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]]
+  // INT: [[shv7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld7]]
+  // UNSIG: [[res5:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[ld6]], [[shv7]]
+  // SIG: [[res5:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[ld6]], [[shv7]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]]
+  things[5] = things[6] >> things[7];
+
+  // INT: [[adr8:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // INT: [[ld8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]]
+  // INT: [[res6:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[ld8]], [[ld6]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[adr6]]
+  things[6] |= things[8];
+
+  // INT: [[adr9:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // INT: [[ld9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]]
+  // INT: [[res7:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld9]], [[ld7]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[adr7]]
+  things[7] &= things[9];
+
+  // INT: [[adr10:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10
+  // INT: [[ld10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr10]]
+  // INT: [[res8:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[ld8]], [[ld10]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[adr8]]
+  things[8] ^= things[10];
+
+  // INT: [[adr11:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 11
+  // INT: [[ld11:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr11]]
+  // INT: [[shv11:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld11]]
+  // INT: [[res9:%[0-9]*]] = shl <[[NUM]] x [[TYPE]]> [[ld9]], [[shv11]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[adr9]]
+  things[9] <<= things[11];
+
+  // INT: [[adr12:%[0-9]*]] = getelementptr inbounds [13 x <[[NUM]] x [[TYPE]]>], [13 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 12
+  // INT: [[ld12:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr12]]
+  // INT: [[shv12:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[ld12]]
+  // UNSIG: [[res10:%[0-9]*]] = lshr <[[NUM]] x [[TYPE]]> [[ld10]], [[shv12]]
+  // SIG: [[res10:%[0-9]*]] = ashr <[[NUM]] x [[TYPE]]> [[ld10]], [[shv12]]
+  // INT: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[adr10]]
+  things[10] >>= things[12];
+
+  // INT: ret void
+}
+#endif // INT
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl
new file mode 100644
index 0000000000..2ae3c92e85
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-scalarized-intrinsics.hlsl
@@ -0,0 +1,115 @@
+// RUN: %dxc -T lib_6_9 %s | FileCheck %s
+
+// Long vector tests for vec ops that scalarize to something more complex
+//  than a simple repetition of the same dx.op calls.
+
+// CHECK-LABEL: test_atan2
+// CHECK: fdiv fast <8 x float>
+// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 17, <8 x float> %{{.*}}) ; Atan(value)
+// CHECK: fadd fast <8 x float> %{{.*}}, <float 0x
+// CHECK: fadd fast <8 x float> %{{.*}}, <float 0x
+// CHECK: fcmp fast olt <8 x float>
+// CHECK: fcmp fast oeq <8 x float>
+// CHECK: fcmp fast oge <8 x float>
+// CHECK: fcmp fast olt <8 x float>
+// CHECK: and <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float>
+// CHECK: and <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float>
+// CHECK: and <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> <float 0x
+// CHECK: and <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> <float 0x
+export void test_atan2(inout vector<float, 8> vec1, vector<float, 8> vec2) {
+  vec1 = atan2(vec1, vec2);
+}
+
+// CHECK-LABEL: test_fmod
+// CHECK: fdiv fast <8 x float>
+// CHECK: fsub fast <8 x float> <float
+// CHECK: fcmp fast oge <8 x float>
+// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 6, <8 x float> %{{.*}}) ; FAbs(value)
+// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 22, <8 x float> %{{.*}}) ; Frc(value)
+
+// CHECK: fsub fast <8 x float> <float
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float>
+// CHECK: fmul fast <8 x float>
+export void test_fmod(inout vector<float, 8> vec1, vector<float, 8> vec2) {
+  vec1 = fmod(vec1, vec2);
+}
+
+// CHECK-LABEL: test_ldexp
+// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 21, <8 x float> %{{.*}}) ; Exp(value)
+// CHECK: fmul fast <8 x float>
+
+export void test_ldexp(inout vector<float, 8> vec1, vector<float, 8> vec2) {
+  vec1 = ldexp(vec1, vec2);
+}
+
+
+// CHECK-LABEL: test_pow
+// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 23, <8 x float> %{{.*}}) ; Log(value)
+// CHECK: fmul fast <8 x float>
+// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 21, <8 x float> %{{.*}}) ; Exp(value)
+export void test_pow(inout vector<float, 8> vec1, vector<float, 8> vec2) {
+  vec1 = pow(vec1, vec2);
+}
+
+// CHECK-LABEL: test_modf
+// CHECK: call <8 x float>  @dx.op.unary.v8f32(i32 29, <8 x float>  %{{.*}}) ; Round_z(value)
+// CHECK: fsub fast <8 x float>
+export void test_modf(inout vector<float, 8> vec1, vector<float, 8> vec2) {
+  vec1 = modf(vec1, vec2);
+}
+
+// CHECK-LABEL: test_dot
+// CHECK: [[el:%.*]] = extractelement <8 x float>
+// CHECK: [[mul:%.*]] = fmul fast float [[el]]
+// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c)
+// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c)
+// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c)
+// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c)
+// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c)
+// CHECK: [[pong:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[ping]]) ; FMad(a,b,c)
+// CHECK: [[ping:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[pong]]) ; FMad(a,b,c)
+export void test_dot(inout vector<float, 8> vec1, vector<float, 8> vec2) {
+  vec1 = dot(vec1, vec2);
+}
+
+// CHECK-LABEL: test_any
+// CHECK: or i1
+// CHECK: or i1
+// CHECK: or i1
+// CHECK: or i1
+// CHECK: or i1
+// CHECK: or i1
+// CHECK: or i1
+export void test_any(vector<float, 8> vec1, inout vector<bool, 8> bvec) {
+  bvec &= any(vec1);
+}
+
+// CHECK-LABEL: test_all
+// CHECK: and i1
+// CHECK: and i1
+// CHECK: and i1
+// CHECK: and i1
+// CHECK: and i1
+// CHECK: and i1
+// CHECK: and i1
+export void test_all(vector<float, 8> vec1, inout vector<bool, 8> bvec) {
+  bvec &= all(vec1);
+}
+
+// CHECK-LABEL: test_WaveMatch
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+// call {{.*}} @dx.op.wave
+export uint4 test_WaveMatch(vector<bool, 8> bvec) {
+  return WaveMatch(bvec);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl
new file mode 100644
index 0000000000..02cad5b894
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-float-intrinsics.hlsl
@@ -0,0 +1,69 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max   -DOP=35 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max   -DOP=35 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min   -DOP=36 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min   -DOP=36 -DNUM=1022 %s | FileCheck %s
+
+// Test vector-enabled binary intrinsics that take float-like parameters and
+// and are "trivial" in that they can be implemented with a single call
+// instruction with the same parameter and return types.
+
+RWByteAddressBuffer buf;
+
+// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half>
+// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float>
+// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double>
+
+[numthreads(8,1,1)]
+void main() {
+
+  // Capture opcode number.
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]]
+  buf.Store(999, OP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512
+  // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  vector<float16_t, NUM> hVec1 = buf.Load<vector<float16_t, NUM> >(0);
+  vector<float16_t, NUM> hVec2 = buf.Load<vector<float16_t, NUM> >(512);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048
+  // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560
+  // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  vector<float, NUM> fVec1 = buf.Load<vector<float, NUM> >(2048);
+  vector<float, NUM> fVec2 = buf.Load<vector<float, NUM> >(2560);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096
+  // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608
+  // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  vector<double, NUM> dVec1 = buf.Load<vector<double, NUM> >(4096);
+  vector<double, NUM> dVec2 = buf.Load<vector<double, NUM> >(4608);
+
+  // Test simple matching type overloads.
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x half> @dx.op.binary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]])
+  vector<float16_t, NUM> hRes = FUNC(hVec1, hVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x float> @dx.op.binary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]])
+  vector<float, NUM> fRes = FUNC(fVec1, fVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x double> @dx.op.binary.[[DTY]](i32 [[OP]], <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]])
+  vector<double, NUM> dRes = FUNC(dVec1, dVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  buf.Store<vector<float16_t, NUM> >(0, hRes);
+  buf.Store<vector<float, NUM> >(2048, fRes);
+  buf.Store<vector<double, NUM> >(4096, dRes);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl
new file mode 100644
index 0000000000..994246b753
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-binary-int-intrinsics.hlsl
@@ -0,0 +1,116 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max   -DOP=37 -DUOP=39 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=max   -DOP=37 -DUOP=39 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min   -DOP=38 -DUOP=40 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=min   -DOP=38 -DUOP=40 -DNUM=1022 %s | FileCheck %s
+
+#ifndef UOP
+#define UOP OP
+#endif
+
+// Test vector-enabled binary intrinsics that take signed and unsigned integer parameters of
+// different widths and are "trivial" in that they can be implemented with a single call
+// instruction with the same parameter and return types.
+
+RWByteAddressBuffer buf;
+
+// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16>
+// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32>
+// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64>
+
+[numthreads(8,1,1)]
+void main() {
+
+  // Capture opcode numbers.
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 888, i32 undef, i32 [[OP:[0-9]*]]
+  buf.Store(888, OP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[UOP:[0-9]*]]
+  buf.Store(999, UOP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512
+  // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<int16_t, NUM> sVec1 = buf.Load<vector<int16_t, NUM> >(0);
+  vector<int16_t, NUM> sVec2 = buf.Load<vector<int16_t, NUM> >(512);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024
+  // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536
+  // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<uint16_t, NUM> usVec1 = buf.Load<vector<uint16_t, NUM> >(1024);
+  vector<uint16_t, NUM> usVec2 = buf.Load<vector<uint16_t, NUM> >(1536);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2048
+  // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560
+  // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<int, NUM> iVec1 = buf.Load<vector<int, NUM> >(2048);
+  vector<int, NUM> iVec2 = buf.Load<vector<int, NUM> >(2560);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072
+  // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584
+  // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<uint, NUM> uiVec1 = buf.Load<vector<uint, NUM> >(3072);
+  vector<uint, NUM> uiVec2 = buf.Load<vector<uint, NUM> >(3584);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4096
+  // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608
+  // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<int64_t, NUM> lVec1 = buf.Load<vector<int64_t, NUM> >(4096);
+  vector<int64_t, NUM> lVec2 = buf.Load<vector<int64_t, NUM> >(4608);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120
+  // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632
+  // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<uint64_t, NUM> ulVec1 = buf.Load<vector<uint64_t, NUM> >(5120);
+  vector<uint64_t, NUM> ulVec2 = buf.Load<vector<uint64_t, NUM> >(5632);
+
+  // Test simple matching type overloads.
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]])
+  vector<int16_t, NUM> sRes = FUNC(sVec1, sVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 [[UOP]], <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]])
+  vector<uint16_t, NUM> usRes = FUNC(usVec1, usVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]])
+  vector<int, NUM> iRes = FUNC(iVec1, iVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 [[UOP]], <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]])
+  vector<uint, NUM> uiRes = FUNC(uiVec1, uiVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]])
+  vector<int64_t, NUM> lRes = FUNC(lVec1, lVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 [[UOP]], <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]])
+  vector<uint64_t, NUM> ulRes = FUNC(ulVec1, ulVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  buf.Store<vector<int16_t, NUM> >(0, sRes);
+  buf.Store<vector<uint16_t, NUM> >(1024, usRes);
+  buf.Store<vector<int, NUM> >(2048, iRes);
+  buf.Store<vector<uint, NUM> >(3072, uiRes);
+  buf.Store<vector<int64_t, NUM> >(4096, lRes);
+  buf.Store<vector<uint64_t, NUM> >(5120, ulRes);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl
new file mode 100644
index 0000000000..6ebb511b00
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl
@@ -0,0 +1,77 @@
+// The binary part of some of these is all just a vector math ops with as many unary dxops as elements.
+// These will have apparent mismatches between the ARITY define and the check prefix.
+
+// RUN: %dxc -DFUNC=f16tof32    -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,LEGACY
+// RUN: %dxc -DFUNC=f32tof16    -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,LEGACY
+// RUN: %dxc -DFUNC=isfinite    -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT
+// RUN: %dxc -DFUNC=isinf       -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT
+// RUN: %dxc -DFUNC=isnan       -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,SPECFLT
+// RUN: %dxc -DFUNC=countbits   -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
+// RUN: %dxc -DFUNC=firstbithigh -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
+// RUN: %dxc -DFUNC=firstbitlow  -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
+// RUN: %dxc -DFUNC=QuadReadLaneAt         -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD
+// RUN: %dxc -DFUNC=QuadReadAcrossX        -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD
+// RUN: %dxc -DFUNC=QuadReadAcrossY        -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD
+// RUN: %dxc -DFUNC=QuadReadAcrossDiagonal -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD
+// RUN: %dxc -DFUNC=WaveActiveBitAnd       -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveActiveBitOr        -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveActiveBitXor       -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveActiveProduct      -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveActiveSum          -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveActiveMin          -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveActiveMax          -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveMultiPrefixBitAnd  -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveMultiPrefixBitOr   -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveMultiPrefixBitXor  -DARITY=5 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveMultiPrefixProduct -DARITY=5 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveMultiPrefixSum     -DARITY=5 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WavePrefixSum          -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WavePrefixProduct      -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveReadLaneAt         -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveReadLaneFirst      -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+// RUN: %dxc -DFUNC=WaveActiveAllEqual     -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,WAVE
+
+#ifndef TYPE
+#define TYPE float
+#endif
+
+#if ARITY == 1
+#define CALLARGS(x,y,z) x
+#elif ARITY == 2
+#define CALLARGS(x,y,z) x, y
+#elif ARITY == 3
+#define CALLARGS(x,y,z) x, y, z
+// ARITY 4 is used for 1 vec + scalar
+#elif ARITY == 4
+#define CALLARGS(x,y,z) x, i
+// ARITY 5 is used for 1 vec + uint4 mask for wavemultiprefix*
+#elif ARITY == 5
+#define CALLARGS(x,y,z) x, m
+#endif
+
+StructuredBuffer< vector<TYPE, 8> > buf;
+ByteAddressBuffer rbuf;
+
+float4 main(uint i : SV_PrimitiveID, uint4 m : M) : SV_Target {
+  vector<TYPE, 8> arg1 = rbuf.Load< vector<TYPE, 8> >(i++*32);
+  vector<TYPE, 8> arg2 = rbuf.Load< vector<TYPE, 8> >(i++*32);
+  vector<TYPE, 8> arg3 = rbuf.Load< vector<TYPE, 8> >(i++*32);
+
+  // UNARY: call {{.*}} [[DXOP:@dx.op.unary]]
+  // BINARY: call {{.*}} [[DXOP:@dx.op.binary]]
+  // TERTIARY: call {{.*}} [[DXOP:@dx.op.tertiary]]
+  // LEGACY: call {{.*}} [[DXOP:@dx.op.legacy]]
+  // SPECFLT: call {{.*}} [[DXOP:@dx.op.isSpecialFloat]]
+  // QUAD: call {{.*}} [[DXOP:@dx.op.quad]]
+  // WAVE: call {{.*}} [[DXOP:@dx.op.wave]]
+  // CHECK: call {{.*}} [[DXOP]]
+  // CHECK: call {{.*}} [[DXOP]]
+  // CHECK: call {{.*}} [[DXOP]]
+  // CHECK: call {{.*}} [[DXOP]]
+  // CHECK: call {{.*}} [[DXOP]]
+  // CHECK: call {{.*}} [[DXOP]]
+  // CHECK: call {{.*}} [[DXOP]]
+
+  vector<TYPE, 8> ret = FUNC(CALLARGS(arg1, arg2, arg3));
+  return float4(ret[0] + ret[1], ret[2] + ret[3], ret[4] + ret[5], ret[6] + ret[7]);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl
new file mode 100644
index 0000000000..e32ebc1db2
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-float-intrinsics.hlsl
@@ -0,0 +1,86 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad   -DOP=46 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad   -DOP=46 -DNUM=1022 %s | FileCheck %s
+
+// Test vector-enabled ternary intrinsics that take float-like parameters and
+// and are "trivial" in that they can be implemented with a single call
+// instruction with the same parameter and return types.
+
+// Given that all we have at the moment are fmad and fma and the latter only takes doubles,
+// fma is tacked on as an additional check.
+
+RWByteAddressBuffer buf;
+
+// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half>
+// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float>
+// CHECK-DAG: %dx.types.ResRet.[[DTY:v[0-9]*f64]] = type { <[[NUM]] x double>
+
+[numthreads(8,1,1)]
+void main() {
+
+  // Capture opcode number.
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]]
+  buf.Store(999, OP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 512
+  // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 1024
+  // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  vector<float16_t, NUM> hVec1 = buf.Load<vector<float16_t, NUM> >(0);
+  vector<float16_t, NUM> hVec2 = buf.Load<vector<float16_t, NUM> >(512);
+  vector<float16_t, NUM> hVec3 = buf.Load<vector<float16_t, NUM> >(1024);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2048
+  // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 2560
+  // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 3072
+  // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  vector<float, NUM> fVec1 = buf.Load<vector<float, NUM> >(2048);
+  vector<float, NUM> fVec2 = buf.Load<vector<float, NUM> >(2560);
+  vector<float, NUM> fVec3 = buf.Load<vector<float, NUM> >(3072);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4096
+  // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 4608
+  // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[DTY]] @dx.op.rawBufferVectorLoad.[[DTY]](i32 303, %dx.types.Handle [[buf]], i32 5120
+  // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.[[DTY]] [[ld]], 0
+  vector<double, NUM> dVec1 = buf.Load<vector<double, NUM> >(4096);
+  vector<double, NUM> dVec2 = buf.Load<vector<double, NUM> >(4608);
+  vector<double, NUM> dVec3 = buf.Load<vector<double, NUM> >(5120);
+
+  // Test simple matching type overloads.
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x half> @dx.op.tertiary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec1]], <[[NUM]] x half> [[hvec2]], <[[NUM]] x half> [[hvec3]])
+  vector<float16_t, NUM> hRes = FUNC(hVec1, hVec2, hVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x float> @dx.op.tertiary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec1]], <[[NUM]] x float> [[fvec2]], <[[NUM]] x float> [[fvec3]])
+  vector<float, NUM> fRes = FUNC(fVec1, fVec2, fVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x double> @dx.op.tertiary.[[DTY]](i32 [[OP]], <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]], <[[NUM]] x double> [[dvec3]])
+  vector<double, NUM> dRes = FUNC(dVec1, dVec2, dVec3);
+
+  // Tacked on fma() check since it only takes doubles.
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x double> @dx.op.tertiary.[[DTY]](i32 47, <[[NUM]] x double> [[dvec1]], <[[NUM]] x double> [[dvec2]], <[[NUM]] x double> [[dvec3]])
+  vector<double, NUM> dRes2 = fma(dVec1, dVec2, dVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  buf.Store<vector<float16_t, NUM> >(0, hRes);
+  buf.Store<vector<float, NUM> >(2048, fRes);
+  buf.Store<vector<double, NUM> >(4096, dRes);
+  buf.Store<vector<double, NUM> >(5120, dRes2);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl
new file mode 100644
index 0000000000..50f98715e4
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-tertiary-int-intrinsics.hlsl
@@ -0,0 +1,131 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad   -DOP=48 -DUOP=49 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=mad   -DOP=48 -DUOP=49 -DNUM=1022 %s | FileCheck %s
+
+#ifndef UOP
+#define UOP OP
+#endif
+
+// Test vector-enabled tertiary intrinsics that take signed and unsigned integer parameters of
+// different widths and are "trivial" in that they can be implemented with a single call
+// instruction with the same parameter and return types.
+
+RWByteAddressBuffer buf;
+
+// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16>
+// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32>
+// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64>
+
+[numthreads(8,1,1)]
+void main() {
+
+  // Capture opcode numbers.
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 888, i32 undef, i32 [[OP:[0-9]*]]
+  buf.Store(888, OP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[UOP:[0-9]*]]
+  buf.Store(999, UOP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[svec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 512
+  // CHECK: [[svec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024
+  // CHECK: [[svec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<int16_t, NUM> sVec1 = buf.Load<vector<int16_t, NUM> >(0);
+  vector<int16_t, NUM> sVec2 = buf.Load<vector<int16_t, NUM> >(512);
+  vector<int16_t, NUM> sVec3 = buf.Load<vector<int16_t, NUM> >(1024);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1025
+  // CHECK: [[usvec1:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1536
+  // CHECK: [[usvec2:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 2048
+  // CHECK: [[usvec3:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<uint16_t, NUM> usVec1 = buf.Load<vector<uint16_t, NUM> >(1025);
+  vector<uint16_t, NUM> usVec2 = buf.Load<vector<uint16_t, NUM> >(1536);
+  vector<uint16_t, NUM> usVec3 = buf.Load<vector<uint16_t, NUM> >(2048);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2049
+  // CHECK: [[ivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2560
+  // CHECK: [[ivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072
+  // CHECK: [[ivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<int, NUM> iVec1 = buf.Load<vector<int, NUM> >(2049);
+  vector<int, NUM> iVec2 = buf.Load<vector<int, NUM> >(2560);
+  vector<int, NUM> iVec3 = buf.Load<vector<int, NUM> >(3072);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3073
+  // CHECK: [[uivec1:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3584
+  // CHECK: [[uivec2:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 4096
+  // CHECK: [[uivec3:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<uint, NUM> uiVec1 = buf.Load<vector<uint, NUM> >(3073);
+  vector<uint, NUM> uiVec2 = buf.Load<vector<uint, NUM> >(3584);
+  vector<uint, NUM> uiVec3 = buf.Load<vector<uint, NUM> >(4096);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4097
+  // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4608
+  // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120
+  // CHECK: [[lvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<int64_t, NUM> lVec1 = buf.Load<vector<int64_t, NUM> >(4097);
+  vector<int64_t, NUM> lVec2 = buf.Load<vector<int64_t, NUM> >(4608);
+  vector<int64_t, NUM> lVec3 = buf.Load<vector<int64_t, NUM> >(5120);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5121
+  // CHECK: [[ulvec1:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5632
+  // CHECK: [[ulvec2:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 6144
+  // CHECK: [[ulvec3:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<uint64_t, NUM> ulVec1 = buf.Load<vector<uint64_t, NUM> >(5121);
+  vector<uint64_t, NUM> ulVec2 = buf.Load<vector<uint64_t, NUM> >(5632);
+  vector<uint64_t, NUM> ulVec3 = buf.Load<vector<uint64_t, NUM> >(6144);
+
+  // Test simple matching type overloads.
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i16> @dx.op.tertiary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[svec2]], <[[NUM]] x i16> [[svec3]])
+  vector<int16_t, NUM> sRes = FUNC(sVec1, sVec2, sVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i16> @dx.op.tertiary.[[STY]](i32 [[UOP]], <[[NUM]] x i16> [[usvec1]], <[[NUM]] x i16> [[usvec2]], <[[NUM]] x i16> [[usvec3]])
+  vector<uint16_t, NUM> usRes = FUNC(usVec1, usVec2, usVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i32> @dx.op.tertiary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[ivec2]], <[[NUM]] x i32> [[ivec3]])
+  vector<int, NUM> iRes = FUNC(iVec1, iVec2, iVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i32> @dx.op.tertiary.[[ITY]](i32 [[UOP]], <[[NUM]] x i32> [[uivec1]], <[[NUM]] x i32> [[uivec2]], <[[NUM]] x i32> [[uivec3]])
+  vector<uint, NUM> uiRes = FUNC(uiVec1, uiVec2, uiVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i64> @dx.op.tertiary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[lvec2]], <[[NUM]] x i64> [[lvec3]])
+  vector<int64_t, NUM> lRes = FUNC(lVec1, lVec2, lVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i64> @dx.op.tertiary.[[LTY]](i32 [[UOP]], <[[NUM]] x i64> [[ulvec1]], <[[NUM]] x i64> [[ulvec2]], <[[NUM]] x i64> [[ulvec3]])
+  vector<uint64_t, NUM> ulRes = FUNC(ulVec1, ulVec2, ulVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  buf.Store<vector<int16_t, NUM> >(0, sRes);
+  buf.Store<vector<uint16_t, NUM> >(1024, usRes);
+  buf.Store<vector<int, NUM> >(2048, iRes);
+  buf.Store<vector<uint, NUM> >(3072, uiRes);
+  buf.Store<vector<int64_t, NUM> >(4096, lRes);
+  buf.Store<vector<uint64_t, NUM> >(5120, ulRes);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl
new file mode 100644
index 0000000000..91ab631a7e
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl
@@ -0,0 +1,83 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate  -DOP=7 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate  -DOP=7 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos  -DOP=12 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos  -DOP=12 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sin  -DOP=13 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sin  -DOP=13 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tan  -DOP=14 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tan  -DOP=14 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=acos -DOP=15 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=acos -DOP=15 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=asin -DOP=16 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=asin -DOP=16 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=atan -DOP=17 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=atan -DOP=17 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cosh -DOP=18 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cosh -DOP=18 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sinh -DOP=19 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sinh -DOP=19 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tanh -DOP=20 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=tanh -DOP=20 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=exp2 -DOP=21 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=exp2 -DOP=21 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=frac -DOP=22 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=frac -DOP=22 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log2 -DOP=23 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log2 -DOP=23 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log10 -DOP=23 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=log10 -DOP=23 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sqrt -DOP=24 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=sqrt -DOP=24 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=rsqrt -DOP=25 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=rsqrt -DOP=25 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=round -DOP=26 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=round -DOP=26 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=floor -DOP=27 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=floor -DOP=27 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ceil -DOP=28 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=ceil -DOP=28 -DNUM=1022 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=trunc -DOP=29 -DNUM=1022 %s | FileCheck %s
+
+// Test vector-enabled unary intrinsics that take float-like parameters and
+// and are "trivial" in that they can be implemented with a single call
+// instruction with the same parameter and return types.
+
+RWByteAddressBuffer buf;
+
+// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half>
+// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float>
+
+[numthreads(8,1,1)]
+void main() {
+
+  // Capture opcode number.
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]]
+  buf.Store(999, OP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[hvec:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  vector<float16_t, NUM> hVec = buf.Load<vector<float16_t, NUM> >(0);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 1024
+  // CHECK: [[fvec:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  vector<float, NUM> fVec = buf.Load<vector<float, NUM> >(1024);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 [[OP]], <[[NUM]] x half> [[hvec]])
+  vector<float16_t, NUM> hRes = FUNC(hVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec]])
+  vector<float, NUM> fRes = FUNC(fVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  buf.Store<vector<float16_t, NUM> >(0, hRes);
+  buf.Store<vector<float, NUM> >(1024, fRes);
+}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl
new file mode 100644
index 0000000000..ef0b250745
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-int-intrinsics.hlsl
@@ -0,0 +1,86 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=reversebits   -DOP=30 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=reversebits   -DOP=30 -DNUM=1022 %s | FileCheck %s
+
+// Test vector-enabled unary intrinsics that take signed and unsigned integer parameters of
+// different widths and are "trivial" in that they can be implemented with a single call
+// instruction with the same parameter and return types.
+
+RWByteAddressBuffer buf;
+
+// CHECK-DAG: %dx.types.ResRet.[[STY:v[0-9]*i16]] = type { <[[NUM:[0-9]*]] x i16>
+// CHECK-DAG: %dx.types.ResRet.[[ITY:v[0-9]*i32]] = type { <[[NUM]] x i32>
+// CHECK-DAG: %dx.types.ResRet.[[LTY:v[0-9]*i64]] = type { <[[NUM]] x i64>
+
+[numthreads(8,1,1)]
+void main() {
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // Capture opcode number.
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]]
+  buf.Store(999, OP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[svec:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<int16_t, NUM> sVec = buf.Load<vector<int16_t, NUM> >(0);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[STY]] @dx.op.rawBufferVectorLoad.[[STY]](i32 303, %dx.types.Handle [[buf]], i32 1024
+  // CHECK: [[usvec:%.*]] = extractvalue %dx.types.ResRet.[[STY]] [[ld]], 0
+  vector<uint16_t, NUM> usVec = buf.Load<vector<uint16_t, NUM> >(1024);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 2048
+  // CHECK: [[ivec:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<int, NUM> iVec = buf.Load<vector<int, NUM> >(2048);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[ITY]] @dx.op.rawBufferVectorLoad.[[ITY]](i32 303, %dx.types.Handle [[buf]], i32 3072
+  // CHECK: [[uivec:%.*]] = extractvalue %dx.types.ResRet.[[ITY]] [[ld]], 0
+  vector<uint, NUM> uiVec = buf.Load<vector<uint, NUM> >(3072);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 4096
+  // CHECK: [[lvec:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<int64_t, NUM> lVec = buf.Load<vector<int64_t, NUM> >(4096);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[LTY]] @dx.op.rawBufferVectorLoad.[[LTY]](i32 303, %dx.types.Handle [[buf]], i32 5120
+  // CHECK: [[ulvec:%.*]] = extractvalue %dx.types.ResRet.[[LTY]] [[ld]], 0
+  vector<uint64_t, NUM> ulVec = buf.Load<vector<uint64_t, NUM> >(5120);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i16> @dx.op.unary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[svec]])
+  vector<int16_t, NUM> sRes = FUNC(sVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i16> @dx.op.unary.[[STY]](i32 [[OP]], <[[NUM]] x i16> [[usvec]])
+  vector<uint16_t, NUM> usRes = FUNC(usVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i32> @dx.op.unary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[ivec]])
+  vector<int, NUM> iRes = FUNC(iVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i32> @dx.op.unary.[[ITY]](i32 [[OP]], <[[NUM]] x i32> [[uivec]])
+  vector<uint, NUM> uiRes = FUNC(uiVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i64> @dx.op.unary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[lvec]])
+  vector<int64_t, NUM> lRes = FUNC(lVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i64> @dx.op.unary.[[LTY]](i32 [[OP]], <[[NUM]] x i64> [[ulvec]])
+  vector<uint64_t, NUM> ulRes = FUNC(ulVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  buf.Store<vector<int16_t, NUM> >(0, sRes);
+  buf.Store<vector<uint16_t, NUM> >(1024, usRes);
+  buf.Store<vector<int, NUM> >(2048, iRes);
+  buf.Store<vector<uint, NUM> >(3072, uiRes);
+  buf.Store<vector<int64_t, NUM> >(4096, lRes);
+  buf.Store<vector<uint64_t, NUM> >(5120, ulRes);
+}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll
new file mode 100644
index 0000000000..987f997a2a
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-dynvec2array.ll
@@ -0,0 +1,269 @@
+; RUN: %dxopt %s -dynamic-vector-to-array,ReplaceAllVectors=0 -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%struct.VectRec1 = type { <1 x float> }
+%struct.VectRec2 = type { <2 x float> }
+
+; Vec2s should be preserved.
+; CHECK-DAG: @dyglob2 = internal global <2 x float> zeroinitializer, align 4
+; CHECK-DAG: @dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+; CHECK-DAG: @dygrec2.0 = internal global <2 x float> zeroinitializer, align 4
+
+; CHECK-DAG: @stgrec2.0 = internal global <2 x float> zeroinitializer, align 4
+; CHECK-DAG: @stglob2 = internal global <2 x float> zeroinitializer, align 4
+; CHECK-DAG: @stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+
+; Dynamic Vec1s should be reduced.
+; CHECK-DAG: @dygar1.v = internal global [2 x [1 x float]] zeroinitializer, align 4
+; CHECK-DAG: @dygrec1.0.v = internal global [1 x float] zeroinitializer, align 4
+; CHECK-DAG: @dyglob1.v = internal global [1 x float] zeroinitializer, align 4
+
+; These static accessed Vec1s were already reduced by SROA
+; CHECK-DAG: @stgar1.0 = internal global [2 x float] zeroinitializer, align 4
+; CHECK-DAG: @stglob1.0 = internal global float 0.000000e+00, align 4
+; CHECK-DAG: @stgrec1.0.0 = internal global float 0.000000e+00, align 4
+
+@dyglob1 = internal global <1 x float> zeroinitializer, align 4
+@dyglob2 = internal global <2 x float> zeroinitializer, align 4
+@stglob2 = internal global <2 x float> zeroinitializer, align 4
+@dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+@dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+@stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+@dygrec2.0 = internal global <2 x float> zeroinitializer, align 4
+@stgrec2.0 = internal global <2 x float> zeroinitializer, align 4
+@stgar1.0 = internal global [2 x float] zeroinitializer, align 4
+@dygrec1.0 = internal global <1 x float> zeroinitializer, align 4
+@stglob1.0 = internal global float 0.000000e+00, align 4
+@stgrec1.0.0 = internal global float 0.000000e+00, align 4
+
+; Function Attrs: nounwind
+; CHECK-LOCAL: define <4 x float> @"\01?tester
+define <4 x float> @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z"(i32 %ix, [12 x float]* %vals) #0 {
+bb:
+  ; Vec2s are preserved.
+  ; CHECK-DAG: %dyloc2 = alloca <2 x float>
+  ; CHECK-DAG: %dylar2 = alloca [4 x <2 x float>]
+  ; CHECK-DAG: %dylorc2.0 = alloca <2 x float>
+
+  ; CHECK-DAG: %stloc2 = alloca <2 x float>
+  ; CHECK-DAG: %stlar2 = alloca [4 x <2 x float>]
+  ; CHECK-DAG: %stlorc2.0 = alloca <2 x float>
+
+  ; Statics vec1s are unaltered by dynamic vector to array.
+  ; CHECK-DAG: %stloc1 = alloca <1 x float>
+  ; CHECK-DAG: %stlar1.0 = alloca [3 x float]
+  ; CHECK-DAG: %stlorc1.0 = alloca <1 x float>
+
+  ; Dynamic vec1s are removed and lose their names.
+  ; CHECK-DAG: alloca [1 x float]
+  ; CHECK-DAG: alloca [3 x [1 x float]]
+  ; CHECK-DAG: alloca [1 x float]
+
+  %dylorc1.0 = alloca <1 x float>
+  %stlorc1.0 = alloca <1 x float>
+  %dylorc2.0 = alloca <2 x float>
+  %stlorc2.0 = alloca <2 x float>
+  %stlar1.0 = alloca [3 x float]
+  %tmp = alloca i32, align 4
+  %dyloc1 = alloca <1 x float>, align 4
+  %dyloc2 = alloca <2 x float>, align 4
+  %dylar1 = alloca [3 x <1 x float>], align 4
+  %dylar2 = alloca [4 x <2 x float>], align 4
+  %stloc1 = alloca <1 x float>, align 4
+  %stloc2 = alloca <2 x float>, align 4
+  %stlar2 = alloca [4 x <2 x float>], align 4
+  store i32 %ix, i32* %tmp, align 4
+
+  %tmp13 = load i32, i32* %tmp, align 4 ; line:53 col:7
+  %tmp14 = icmp sgt i32 %tmp13, 0 ; line:53 col:10
+  %tmp15 = icmp ne i1 %tmp14, false ; line:53 col:10
+  %tmp16 = icmp ne i1 %tmp15, false ; line:53 col:10
+  br i1 %tmp16, label %bb17, label %bb76 ; line:53 col:7
+
+bb17:                                             ; preds = %bb
+  %tmp18 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 0 ; line:54 col:30
+  %tmp19 = load float, float* %tmp18, align 4 ; line:54 col:30
+  %tmp20 = load i32, i32* %tmp, align 4 ; line:54 col:24
+  %tmp21 = getelementptr <1 x float>, <1 x float>* %dyloc1, i32 0, i32 %tmp20 ; line:54 col:17
+  store float %tmp19, float* %tmp21 ; line:54 col:28
+  %tmp22 = getelementptr <1 x float>, <1 x float>* %stloc1, i32 0, i32 0 ; line:54 col:5
+  store float %tmp19, float* %tmp22 ; line:54 col:15
+  %tmp23 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 1 ; line:55 col:30
+  %tmp24 = load float, float* %tmp23, align 4 ; line:55 col:30
+  %tmp25 = load i32, i32* %tmp, align 4 ; line:55 col:24
+  %tmp26 = getelementptr <2 x float>, <2 x float>* %dyloc2, i32 0, i32 %tmp25 ; line:55 col:17
+  store float %tmp24, float* %tmp26 ; line:55 col:28
+  %tmp27 = getelementptr <2 x float>, <2 x float>* %stloc2, i32 0, i32 1 ; line:55 col:5
+  store float %tmp24, float* %tmp27 ; line:55 col:15
+  %tmp28 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 2 ; line:56 col:37
+  %tmp29 = load float, float* %tmp28, align 4 ; line:56 col:37
+  %tmp30 = load i32, i32* %tmp, align 4 ; line:56 col:27
+  %tmp31 = load i32, i32* %tmp, align 4 ; line:56 col:31
+  %tmp32 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp30, i32 %tmp31 ; line:56 col:20
+  store float %tmp29, float* %tmp32 ; line:56 col:35
+  %tmp33 = getelementptr inbounds [3 x float], [3 x float]* %stlar1.0, i32 0, i32 1 ; line:56 col:5
+  store float %tmp29, float* %tmp33 ; line:56 col:18
+  %tmp34 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 3 ; line:57 col:37
+  %tmp35 = load float, float* %tmp34, align 4 ; line:57 col:37
+  %tmp36 = load i32, i32* %tmp, align 4 ; line:57 col:27
+  %tmp37 = load i32, i32* %tmp, align 4 ; line:57 col:31
+  %tmp38 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp36, i32 %tmp37 ; line:57 col:20
+  store float %tmp35, float* %tmp38 ; line:57 col:35
+  %tmp39 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 1, i32 0 ; line:57 col:5
+  store float %tmp35, float* %tmp39 ; line:57 col:18
+  %tmp40 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 4 ; line:58 col:36
+  %tmp41 = load float, float* %tmp40, align 4 ; line:58 col:36
+  %tmp42 = load i32, i32* %tmp, align 4 ; line:58 col:30
+  %tmp43 = getelementptr inbounds <1 x float>, <1 x float>* %dylorc1.0, i32 0, i32 %tmp42 ; line:58 col:20
+  store float %tmp41, float* %tmp43 ; line:58 col:34
+  %tmp44 = getelementptr inbounds <1 x float>, <1 x float>* %stlorc1.0, i32 0, i32 0 ; line:58 col:5
+  store float %tmp41, float* %tmp44 ; line:58 col:18
+  %tmp45 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 5 ; line:59 col:36
+  %tmp46 = load float, float* %tmp45, align 4 ; line:59 col:36
+  %tmp47 = load i32, i32* %tmp, align 4 ; line:59 col:30
+  %tmp48 = getelementptr inbounds <2 x float>, <2 x float>* %dylorc2.0, i32 0, i32 %tmp47 ; line:59 col:20
+  store float %tmp46, float* %tmp48 ; line:59 col:34
+  %tmp49 = getelementptr inbounds <2 x float>, <2 x float>* %stlorc2.0, i32 0, i32 1 ; line:59 col:5
+  store float %tmp46, float* %tmp49 ; line:59 col:18
+  %tmp50 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 6 ; line:61 col:32
+  %tmp51 = load float, float* %tmp50, align 4 ; line:61 col:32
+  %tmp52 = load i32, i32* %tmp, align 4 ; line:61 col:26
+  %tmp53 = getelementptr <1 x float>, <1 x float>* @dyglob1, i32 0, i32 %tmp52 ; line:61 col:18
+  store float %tmp51, float* %tmp53 ; line:61 col:30
+  store float %tmp51, float* @stglob1.0 ; line:61 col:16
+  %tmp54 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 7 ; line:62 col:32
+  %tmp55 = load float, float* %tmp54, align 4 ; line:62 col:32
+  %tmp56 = load i32, i32* %tmp, align 4 ; line:62 col:26
+  %tmp57 = getelementptr <2 x float>, <2 x float>* @dyglob2, i32 0, i32 %tmp56 ; line:62 col:18
+  store float %tmp55, float* %tmp57 ; line:62 col:30
+  store float %tmp55, float* getelementptr inbounds (<2 x float>, <2 x float>* @stglob2, i32 0, i32 1) ; line:62 col:16
+  %tmp58 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 8 ; line:63 col:37
+  %tmp59 = load float, float* %tmp58, align 4 ; line:63 col:37
+  %tmp60 = load i32, i32* %tmp, align 4 ; line:63 col:27
+  %tmp61 = load i32, i32* %tmp, align 4 ; line:63 col:31
+  %tmp62 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp60, i32 %tmp61 ; line:63 col:20
+  store float %tmp59, float* %tmp62 ; line:63 col:35
+  store float %tmp59, float* getelementptr inbounds ([2 x float], [2 x float]* @stgar1.0, i32 0, i32 1) ; line:63 col:18
+  %tmp63 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 9 ; line:64 col:37
+  %tmp64 = load float, float* %tmp63, align 4 ; line:64 col:37
+  %tmp65 = load i32, i32* %tmp, align 4 ; line:64 col:27
+  %tmp66 = load i32, i32* %tmp, align 4 ; line:64 col:31
+  %tmp67 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp65, i32 %tmp66 ; line:64 col:20
+  store float %tmp64, float* %tmp67 ; line:64 col:35
+  store float %tmp64, float* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 1, i32 1) ; line:64 col:18
+  %tmp68 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 10 ; line:65 col:36
+  %tmp69 = load float, float* %tmp68, align 4 ; line:65 col:36
+  %tmp70 = load i32, i32* %tmp, align 4 ; line:65 col:30
+  %tmp71 = getelementptr inbounds <1 x float>, <1 x float>* @dygrec1.0, i32 0, i32 %tmp70 ; line:65 col:20
+  store float %tmp69, float* %tmp71 ; line:65 col:34
+  store float %tmp69, float* @stgrec1.0.0 ; line:65 col:18
+  %tmp72 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 11 ; line:66 col:36
+  %tmp73 = load float, float* %tmp72, align 4 ; line:66 col:36
+  %tmp74 = load i32, i32* %tmp, align 4 ; line:66 col:30
+  %tmp75 = getelementptr inbounds <2 x float>, <2 x float>* @dygrec2.0, i32 0, i32 %tmp74 ; line:66 col:20
+  store float %tmp73, float* %tmp75 ; line:66 col:34
+  store float %tmp73, float* getelementptr inbounds (<2 x float>, <2 x float>* @stgrec2.0, i32 0, i32 1) ; line:66 col:18
+  br label %bb76 ; line:67 col:3
+
+bb76:                                             ; preds = %bb17, %bb
+  %tmp77 = load <1 x float>, <1 x float>* %dyloc1, align 4 ; line:68 col:17
+  %tmp78 = extractelement <1 x float> %tmp77, i32 0 ; line:68 col:17
+  %tmp79 = load <2 x float>, <2 x float>* %dyloc2, align 4 ; line:68 col:27
+  %tmp80 = extractelement <2 x float> %tmp79, i32 1 ; line:68 col:27
+  %tmp81 = load <1 x float>, <1 x float>* %stloc1, align 4 ; line:68 col:37
+  %tmp82 = extractelement <1 x float> %tmp81, i32 0 ; line:68 col:37
+  %tmp83 = load <2 x float>, <2 x float>* %stloc2, align 4 ; line:68 col:47
+  %tmp84 = extractelement <2 x float> %tmp83, i32 1 ; line:68 col:47
+  %tmp85 = insertelement <4 x float> undef, float %tmp78, i64 0 ; line:68 col:16
+  %tmp86 = insertelement <4 x float> %tmp85, float %tmp80, i64 1 ; line:68 col:16
+  %tmp87 = insertelement <4 x float> %tmp86, float %tmp82, i64 2 ; line:68 col:16
+  %tmp88 = insertelement <4 x float> %tmp87, float %tmp84, i64 3 ; line:68 col:16
+  %tmp89 = load i32, i32* %tmp, align 4 ; line:68 col:73
+  %tmp90 = load i32, i32* %tmp, align 4 ; line:68 col:77
+  %tmp91 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp89, i32 %tmp90 ; line:68 col:66
+  %tmp92 = load float, float* %tmp91 ; line:68 col:66
+  %tmp93 = load i32, i32* %tmp, align 4 ; line:68 col:89
+  %tmp94 = load i32, i32* %tmp, align 4 ; line:68 col:93
+  %tmp95 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp93, i32 %tmp94 ; line:68 col:82
+  %tmp96 = load float, float* %tmp95 ; line:68 col:82
+  %tmp97 = getelementptr [3 x float], [3 x float]* %stlar1.0, i32 0, i32 0 ; line:68 col:98
+  %load = load float, float* %tmp97 ; line:68 col:98
+  %insert = insertelement <1 x float> undef, float %load, i64 0 ; line:68 col:98
+  %tmp98 = extractelement <1 x float> %insert, i32 0 ; line:68 col:98
+  %tmp99 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 0 ; line:68 col:111
+  %tmp100 = load <2 x float>, <2 x float>* %tmp99, align 4 ; line:68 col:111
+  %tmp101 = extractelement <2 x float> %tmp100, i32 1 ; line:68 col:111
+  %tmp102 = insertelement <4 x float> undef, float %tmp92, i64 0 ; line:68 col:65
+  %tmp103 = insertelement <4 x float> %tmp102, float %tmp96, i64 1 ; line:68 col:65
+  %tmp104 = insertelement <4 x float> %tmp103, float %tmp98, i64 2 ; line:68 col:65
+  %tmp105 = insertelement <4 x float> %tmp104, float %tmp101, i64 3 ; line:68 col:65
+  %tmp106 = fadd <4 x float> %tmp88, %tmp105 ; line:68 col:57
+  %tmp107 = load <1 x float>, <1 x float>* @dyglob1, align 4 ; line:69 col:10
+  %tmp108 = extractelement <1 x float> %tmp107, i32 0 ; line:69 col:10
+  %tmp109 = load <2 x float>, <2 x float>* @dyglob2, align 4 ; line:69 col:21
+  %tmp110 = extractelement <2 x float> %tmp109, i32 1 ; line:69 col:21
+  %load3 = load float, float* @stglob1.0 ; line:69 col:32
+  %insert4 = insertelement <1 x float> undef, float %load3, i64 0 ; line:69 col:32
+  %tmp111 = extractelement <1 x float> %insert4, i32 0 ; line:69 col:32
+  %tmp112 = load <2 x float>, <2 x float>* @stglob2, align 4 ; line:69 col:43
+  %tmp113 = extractelement <2 x float> %tmp112, i32 1 ; line:69 col:43
+  %tmp114 = insertelement <4 x float> undef, float %tmp108, i64 0 ; line:69 col:9
+  %tmp115 = insertelement <4 x float> %tmp114, float %tmp110, i64 1 ; line:69 col:9
+  %tmp116 = insertelement <4 x float> %tmp115, float %tmp111, i64 2 ; line:69 col:9
+  %tmp117 = insertelement <4 x float> %tmp116, float %tmp113, i64 3 ; line:69 col:9
+  %tmp118 = fadd <4 x float> %tmp106, %tmp117 ; line:68 col:124
+  %tmp119 = load i32, i32* %tmp, align 4 ; line:69 col:70
+  %tmp120 = load i32, i32* %tmp, align 4 ; line:69 col:74
+  %tmp121 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp119, i32 %tmp120 ; line:69 col:63
+  %tmp122 = load float, float* %tmp121 ; line:69 col:63
+  %tmp123 = load i32, i32* %tmp, align 4 ; line:69 col:86
+  %tmp124 = load i32, i32* %tmp, align 4 ; line:69 col:90
+  %tmp125 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp123, i32 %tmp124 ; line:69 col:79
+  %tmp126 = load float, float* %tmp125 ; line:69 col:79
+  %load1 = load float, float* getelementptr inbounds ([2 x float], [2 x float]* @stgar1.0, i32 0, i32 0) ; line:69 col:95
+  %insert2 = insertelement <1 x float> undef, float %load1, i64 0 ; line:69 col:95
+  %tmp127 = extractelement <1 x float> %insert2, i32 0 ; line:69 col:95
+  %tmp128 = load <2 x float>, <2 x float>* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 0), align 4 ; line:69 col:108
+  %tmp129 = extractelement <2 x float> %tmp128, i32 1 ; line:69 col:108
+  %tmp130 = insertelement <4 x float> undef, float %tmp122, i64 0 ; line:69 col:62
+  %tmp131 = insertelement <4 x float> %tmp130, float %tmp126, i64 1 ; line:69 col:62
+  %tmp132 = insertelement <4 x float> %tmp131, float %tmp127, i64 2 ; line:69 col:62
+  %tmp133 = insertelement <4 x float> %tmp132, float %tmp129, i64 3 ; line:69 col:62
+  %tmp134 = fadd <4 x float> %tmp118, %tmp133 ; line:69 col:54
+  %tmp135 = load <1 x float>, <1 x float>* %stlorc1.0, align 4 ; line:70 col:20
+  %tmp136 = extractelement <1 x float> %tmp135, i64 0 ; line:70 col:11
+  %tmp137 = getelementptr inbounds <2 x float>, <2 x float>* %stlorc2.0, i32 0, i32 1 ; line:70 col:23
+  %tmp138 = load float, float* %tmp137 ; line:70 col:23
+  %tmp139 = load <1 x float>, <1 x float>* %dylorc1.0, align 4 ; line:70 col:45
+  %tmp140 = extractelement <1 x float> %tmp139, i64 0 ; line:70 col:11
+  %tmp141 = load i32, i32* %tmp, align 4 ; line:70 col:58
+  %tmp142 = getelementptr inbounds <2 x float>, <2 x float>* %dylorc2.0, i32 0, i32 %tmp141 ; line:70 col:48
+  %tmp143 = load float, float* %tmp142 ; line:70 col:48
+  %tmp144 = insertelement <4 x float> undef, float %tmp136, i64 0 ; line:70 col:11
+  %tmp145 = insertelement <4 x float> %tmp144, float %tmp138, i64 1 ; line:70 col:11
+  %tmp146 = insertelement <4 x float> %tmp145, float %tmp140, i64 2 ; line:70 col:11
+  %tmp147 = insertelement <4 x float> %tmp146, float %tmp143, i64 3 ; line:70 col:11
+  %tmp148 = fadd <4 x float> %tmp134, %tmp147 ; line:69 col:121
+  %load5 = load float, float* @stgrec1.0.0 ; line:70 col:80
+  %insert6 = insertelement <1 x float> undef, float %load5, i64 0 ; line:70 col:80
+  %tmp149 = extractelement <1 x float> %insert6, i64 0 ; line:70 col:71
+  %tmp150 = load float, float* getelementptr inbounds (<2 x float>, <2 x float>* @stgrec2.0, i32 0, i32 1) ; line:70 col:83
+  %tmp151 = load <1 x float>, <1 x float>* @dygrec1.0, align 4 ; line:70 col:105
+  %tmp152 = extractelement <1 x float> %tmp151, i64 0 ; line:70 col:71
+  %tmp153 = load i32, i32* %tmp, align 4 ; line:70 col:118
+  %tmp154 = getelementptr inbounds <2 x float>, <2 x float>* @dygrec2.0, i32 0, i32 %tmp153 ; line:70 col:108
+  %tmp155 = load float, float* %tmp154 ; line:70 col:108
+  %tmp156 = insertelement <4 x float> undef, float %tmp149, i64 0 ; line:70 col:71
+  %tmp157 = insertelement <4 x float> %tmp156, float %tmp150, i64 1 ; line:70 col:71
+  %tmp158 = insertelement <4 x float> %tmp157, float %tmp152, i64 2 ; line:70 col:71
+  %tmp159 = insertelement <4 x float> %tmp158, float %tmp155, i64 3 ; line:70 col:71
+  %tmp160 = fadd <4 x float> %tmp148, %tmp159 ; line:70 col:63
+  ret <4 x float> %tmp160 ; line:68 col:3
+}
+
+attributes #0 = { nounwind }
+
+!dx.version = !{!3}
+!3 = !{i32 1, i32 9}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll
new file mode 100644
index 0000000000..95a64a17d4
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv-sroa.ll
@@ -0,0 +1,324 @@
+; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s
+
+; Test for SROA reduction of globals and allocas.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%struct.VectRec1 = type { <1 x float> }
+%struct.VectRec2 = type { <2 x float> }
+%ConstantBuffer = type opaque
+
+; Confirm that the dynamic globals are untouched and the statics are scalarized.
+; DAG used to preserve the convenient ordering.
+
+; Dynamic access preserves even vec1s in SROA.
+; CHECK-DAG: @dyglob1 = internal global <1 x float> zeroinitializer, align 4
+; CHECK-DAG: @dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+; CHECK-DAG: @dygrec1.0 = internal global <1 x float> zeroinitializer, align 4
+; CHECK-DAG: @dyglob2 = internal global <2 x float> zeroinitializer, align 4
+; CHECK-DAG: @dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+; CHECK-DAG: @dygrec2.0 = internal global <2 x float> zeroinitializer, align 4
+
+; Having >1 elements preserves even statically-accessed vec2s.
+; CHECK-DAG: @stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+; CHECK-DAG: @stglob2 = internal global <2 x float> zeroinitializer, align 4
+; CHECK-DAG: @stgrec2.0 = internal global <2 x float> zeroinitializer, align 4
+
+; Statically-accessed vec1s should get scalarized.
+; CHECK-DAG: @stgar1.0 = internal global [2 x float] zeroinitializer, align 4
+; CHECK-DAG: @stglob1.0 = internal global float 0.000000e+00, align 4
+; CHECK-DAG: @stgrec1.0.0 = internal global float 0.000000e+00, align 4
+
+@dyglob2 = internal global <2 x float> zeroinitializer, align 4
+@dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+@dygrec2 = internal global %struct.VectRec2 zeroinitializer, align 4
+@dyglob1 = internal global <1 x float> zeroinitializer, align 4
+@dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+@dygrec1 = internal global %struct.VectRec1 zeroinitializer, align 4
+
+@stglob2 = internal global <2 x float> zeroinitializer, align 4
+@stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+@stgrec2 = internal global %struct.VectRec2 zeroinitializer, align 4
+
+@stglob1 = internal global <1 x float> zeroinitializer, align 4
+@stgar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+@stgrec1 = internal global %struct.VectRec1 zeroinitializer, align 4
+
+@"$Globals" = external constant %ConstantBuffer
+
+; Function Attrs: nounwind
+define <4 x float> @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z"(i32 %ix, [12 x float]* %vals) #0 {
+bb:
+  ; Dynamic access preserves even vec1s in SROA.
+  ; CHECK-DAG: %dylorc1.0 = alloca <1 x float>
+  ; CHECK-DAG: %dylorc2.0 = alloca <2 x float>
+  ; CHECK-DAG: %dylorc1.0 = alloca <1 x float>
+  ; CHECK-DAG: %dylorc2.0 = alloca <2 x float>
+  ; CHECK-DAG: %dylar1 = alloca [3 x <1 x float>]
+  ; CHECK-DAG: %dylar2 = alloca [4 x <2 x float>]
+
+  ; SROA doesn't reduce non-array allocas because scalarizer should get them.
+  ; CHECK-DAG: %stlorc1.0 = alloca <1 x float>
+  ; CHECK-DAG: %stlorc2.0 = alloca <2 x float>
+  ; CHECK-DAG: %stloc1 = alloca <1 x float>, align 4
+  ; CHECK-DAG: %stloc2 = alloca <2 x float>, align 4
+
+  ; Statically-accessed arrays should get reduced.
+  ; CHECK-DAG: %stlar2 = alloca [4 x <2 x float>]
+  ; CHECK-DAG: %stlar1.0 = alloca [3 x float]
+
+  %tmp = alloca i32, align 4, !dx.temp !14
+  %dyloc1 = alloca <1 x float>, align 4
+  %dyloc2 = alloca <2 x float>, align 4
+  %dylar1 = alloca [3 x <1 x float>], align 4
+  %dylar2 = alloca [4 x <2 x float>], align 4
+  %dylorc1 = alloca %struct.VectRec1, align 4
+  %dylorc2 = alloca %struct.VectRec2, align 4
+  %stloc1 = alloca <1 x float>, align 4
+  %stloc2 = alloca <2 x float>, align 4
+  %stlar1 = alloca [3 x <1 x float>], align 4
+  %stlar2 = alloca [4 x <2 x float>], align 4
+  %stlorc1 = alloca %struct.VectRec1, align 4
+  %stlorc2 = alloca %struct.VectRec2, align 4
+
+  store i32 %ix, i32* %tmp, align 4
+  %tmp13 = load i32, i32* %tmp, align 4 ; line:53 col:7
+  %tmp14 = icmp sgt i32 %tmp13, 0 ; line:53 col:10
+  %tmp15 = icmp ne i1 %tmp14, false ; line:53 col:10
+  %tmp16 = icmp ne i1 %tmp15, false ; line:53 col:10
+  br i1 %tmp16, label %bb17, label %bb86 ; line:53 col:7
+
+bb17:                                             ; preds = %bb
+  %tmp18 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 0 ; line:54 col:30
+  %tmp19 = load float, float* %tmp18, align 4 ; line:54 col:30
+  %tmp20 = load i32, i32* %tmp, align 4 ; line:54 col:24
+  %tmp21 = getelementptr <1 x float>, <1 x float>* %dyloc1, i32 0, i32 %tmp20 ; line:54 col:17
+  store float %tmp19, float* %tmp21 ; line:54 col:28
+  %tmp22 = getelementptr <1 x float>, <1 x float>* %stloc1, i32 0, i32 0 ; line:54 col:5
+  store float %tmp19, float* %tmp22 ; line:54 col:15
+  %tmp23 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 1 ; line:55 col:30
+  %tmp24 = load float, float* %tmp23, align 4 ; line:55 col:30
+  %tmp25 = load i32, i32* %tmp, align 4 ; line:55 col:24
+  %tmp26 = getelementptr <2 x float>, <2 x float>* %dyloc2, i32 0, i32 %tmp25 ; line:55 col:17
+  store float %tmp24, float* %tmp26 ; line:55 col:28
+  %tmp27 = getelementptr <2 x float>, <2 x float>* %stloc2, i32 0, i32 1 ; line:55 col:5
+  store float %tmp24, float* %tmp27 ; line:55 col:15
+  %tmp28 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 2 ; line:56 col:37
+  %tmp29 = load float, float* %tmp28, align 4 ; line:56 col:37
+  %tmp30 = load i32, i32* %tmp, align 4 ; line:56 col:27
+  %tmp31 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp30 ; line:56 col:20
+  %tmp32 = load i32, i32* %tmp, align 4 ; line:56 col:31
+  %tmp33 = getelementptr <1 x float>, <1 x float>* %tmp31, i32 0, i32 %tmp32 ; line:56 col:20
+  store float %tmp29, float* %tmp33 ; line:56 col:35
+  %tmp34 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %stlar1, i32 0, i32 1 ; line:56 col:5
+  %tmp35 = getelementptr <1 x float>, <1 x float>* %tmp34, i32 0, i32 0 ; line:56 col:5
+  store float %tmp29, float* %tmp35 ; line:56 col:18
+  %tmp36 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 3 ; line:57 col:37
+  %tmp37 = load float, float* %tmp36, align 4 ; line:57 col:37
+  %tmp38 = load i32, i32* %tmp, align 4 ; line:57 col:27
+  %tmp39 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp38 ; line:57 col:20
+  %tmp40 = load i32, i32* %tmp, align 4 ; line:57 col:31
+  %tmp41 = getelementptr <2 x float>, <2 x float>* %tmp39, i32 0, i32 %tmp40 ; line:57 col:20
+  store float %tmp37, float* %tmp41 ; line:57 col:35
+  %tmp42 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 1 ; line:57 col:5
+  %tmp43 = getelementptr <2 x float>, <2 x float>* %tmp42, i32 0, i32 0 ; line:57 col:5
+  store float %tmp37, float* %tmp43 ; line:57 col:18
+  %tmp44 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 4 ; line:58 col:36
+  %tmp45 = load float, float* %tmp44, align 4 ; line:58 col:36
+  %tmp46 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %dylorc1, i32 0, i32 0 ; line:58 col:28
+  %tmp47 = load i32, i32* %tmp, align 4 ; line:58 col:30
+  %tmp48 = getelementptr <1 x float>, <1 x float>* %tmp46, i32 0, i32 %tmp47 ; line:58 col:20
+  store float %tmp45, float* %tmp48 ; line:58 col:34
+  %tmp49 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %stlorc1, i32 0, i32 0 ; line:58 col:13
+  %tmp50 = getelementptr <1 x float>, <1 x float>* %tmp49, i32 0, i32 0 ; line:58 col:5
+  store float %tmp45, float* %tmp50 ; line:58 col:18
+  %tmp51 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 5 ; line:59 col:36
+  %tmp52 = load float, float* %tmp51, align 4 ; line:59 col:36
+  %tmp53 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %dylorc2, i32 0, i32 0 ; line:59 col:28
+  %tmp54 = load i32, i32* %tmp, align 4 ; line:59 col:30
+  %tmp55 = getelementptr <2 x float>, <2 x float>* %tmp53, i32 0, i32 %tmp54 ; line:59 col:20
+  store float %tmp52, float* %tmp55 ; line:59 col:34
+  %tmp56 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %stlorc2, i32 0, i32 0 ; line:59 col:13
+  %tmp57 = getelementptr <2 x float>, <2 x float>* %tmp56, i32 0, i32 1 ; line:59 col:5
+  store float %tmp52, float* %tmp57 ; line:59 col:18
+  %tmp58 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 6 ; line:61 col:32
+  %tmp59 = load float, float* %tmp58, align 4 ; line:61 col:32
+  %tmp60 = load i32, i32* %tmp, align 4 ; line:61 col:26
+  %tmp61 = getelementptr <1 x float>, <1 x float>* @dyglob1, i32 0, i32 %tmp60 ; line:61 col:18
+  store float %tmp59, float* %tmp61 ; line:61 col:30
+  store float %tmp59, float* getelementptr inbounds (<1 x float>, <1 x float>* @stglob1, i32 0, i32 0) ; line:61 col:16
+  %tmp62 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 7 ; line:62 col:32
+  %tmp63 = load float, float* %tmp62, align 4 ; line:62 col:32
+  %tmp64 = load i32, i32* %tmp, align 4 ; line:62 col:26
+  %tmp65 = getelementptr <2 x float>, <2 x float>* @dyglob2, i32 0, i32 %tmp64 ; line:62 col:18
+  store float %tmp63, float* %tmp65 ; line:62 col:30
+  store float %tmp63, float* getelementptr inbounds (<2 x float>, <2 x float>* @stglob2, i32 0, i32 1) ; line:62 col:16
+  %tmp66 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 8 ; line:63 col:37
+  %tmp67 = load float, float* %tmp66, align 4 ; line:63 col:37
+  %tmp68 = load i32, i32* %tmp, align 4 ; line:63 col:27
+  %tmp69 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp68 ; line:63 col:20
+  %tmp70 = load i32, i32* %tmp, align 4 ; line:63 col:31
+  %tmp71 = getelementptr <1 x float>, <1 x float>* %tmp69, i32 0, i32 %tmp70 ; line:63 col:20
+  store float %tmp67, float* %tmp71 ; line:63 col:35
+  store float %tmp67, float* getelementptr inbounds ([2 x <1 x float>], [2 x <1 x float>]* @stgar1, i32 0, i32 1, i32 0) ; line:63 col:18
+  %tmp72 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 9 ; line:64 col:37
+  %tmp73 = load float, float* %tmp72, align 4 ; line:64 col:37
+  %tmp74 = load i32, i32* %tmp, align 4 ; line:64 col:27
+  %tmp75 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp74 ; line:64 col:20
+  %tmp76 = load i32, i32* %tmp, align 4 ; line:64 col:31
+  %tmp77 = getelementptr <2 x float>, <2 x float>* %tmp75, i32 0, i32 %tmp76 ; line:64 col:20
+  store float %tmp73, float* %tmp77 ; line:64 col:35
+  store float %tmp73, float* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 1, i32 1) ; line:64 col:18
+  %tmp78 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 10 ; line:65 col:36
+  %tmp79 = load float, float* %tmp78, align 4 ; line:65 col:36
+  %tmp80 = load i32, i32* %tmp, align 4 ; line:65 col:30
+  %tmp81 = getelementptr <1 x float>, <1 x float>* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @dygrec1, i32 0, i32 0), i32 0, i32 %tmp80 ; line:65 col:20
+  store float %tmp79, float* %tmp81 ; line:65 col:34
+  store float %tmp79, float* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @stgrec1, i32 0, i32 0, i32 0) ; line:65 col:18
+  %tmp82 = getelementptr inbounds [12 x float], [12 x float]* %vals, i32 0, i32 11 ; line:66 col:36
+  %tmp83 = load float, float* %tmp82, align 4 ; line:66 col:36
+  %tmp84 = load i32, i32* %tmp, align 4 ; line:66 col:30
+  %tmp85 = getelementptr <2 x float>, <2 x float>* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @dygrec2, i32 0, i32 0), i32 0, i32 %tmp84 ; line:66 col:20
+  store float %tmp83, float* %tmp85 ; line:66 col:34
+  store float %tmp83, float* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @stgrec2, i32 0, i32 0, i32 1) ; line:66 col:18
+  br label %bb86 ; line:67 col:3
+
+bb86:                                             ; preds = %bb17, %bb
+  %tmp87 = load <1 x float>, <1 x float>* %dyloc1, align 4 ; line:68 col:17
+  %tmp88 = extractelement <1 x float> %tmp87, i32 0 ; line:68 col:17
+  %tmp89 = load <2 x float>, <2 x float>* %dyloc2, align 4 ; line:68 col:27
+  %tmp90 = extractelement <2 x float> %tmp89, i32 1 ; line:68 col:27
+  %tmp91 = load <1 x float>, <1 x float>* %stloc1, align 4 ; line:68 col:37
+  %tmp92 = extractelement <1 x float> %tmp91, i32 0 ; line:68 col:37
+  %tmp93 = load <2 x float>, <2 x float>* %stloc2, align 4 ; line:68 col:47
+  %tmp94 = extractelement <2 x float> %tmp93, i32 1 ; line:68 col:47
+  %tmp95 = insertelement <4 x float> undef, float %tmp88, i64 0 ; line:68 col:16
+  %tmp96 = insertelement <4 x float> %tmp95, float %tmp90, i64 1 ; line:68 col:16
+  %tmp97 = insertelement <4 x float> %tmp96, float %tmp92, i64 2 ; line:68 col:16
+  %tmp98 = insertelement <4 x float> %tmp97, float %tmp94, i64 3 ; line:68 col:16
+  %tmp99 = load i32, i32* %tmp, align 4 ; line:68 col:73
+  %tmp100 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %dylar1, i32 0, i32 %tmp99 ; line:68 col:66
+  %tmp101 = load i32, i32* %tmp, align 4 ; line:68 col:77
+  %tmp102 = getelementptr <1 x float>, <1 x float>* %tmp100, i32 0, i32 %tmp101 ; line:68 col:66
+  %tmp103 = load float, float* %tmp102 ; line:68 col:66
+  %tmp104 = load i32, i32* %tmp, align 4 ; line:68 col:89
+  %tmp105 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %dylar2, i32 0, i32 %tmp104 ; line:68 col:82
+  %tmp106 = load i32, i32* %tmp, align 4 ; line:68 col:93
+  %tmp107 = getelementptr <2 x float>, <2 x float>* %tmp105, i32 0, i32 %tmp106 ; line:68 col:82
+  %tmp108 = load float, float* %tmp107 ; line:68 col:82
+  %tmp109 = getelementptr inbounds [3 x <1 x float>], [3 x <1 x float>]* %stlar1, i32 0, i32 0 ; line:68 col:98
+  %tmp110 = load <1 x float>, <1 x float>* %tmp109, align 4 ; line:68 col:98
+  %tmp111 = extractelement <1 x float> %tmp110, i32 0 ; line:68 col:98
+  %tmp112 = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* %stlar2, i32 0, i32 0 ; line:68 col:111
+  %tmp113 = load <2 x float>, <2 x float>* %tmp112, align 4 ; line:68 col:111
+  %tmp114 = extractelement <2 x float> %tmp113, i32 1 ; line:68 col:111
+  %tmp115 = insertelement <4 x float> undef, float %tmp103, i64 0 ; line:68 col:65
+  %tmp116 = insertelement <4 x float> %tmp115, float %tmp108, i64 1 ; line:68 col:65
+  %tmp117 = insertelement <4 x float> %tmp116, float %tmp111, i64 2 ; line:68 col:65
+  %tmp118 = insertelement <4 x float> %tmp117, float %tmp114, i64 3 ; line:68 col:65
+  %tmp119 = fadd <4 x float> %tmp98, %tmp118 ; line:68 col:57
+  %tmp120 = load <1 x float>, <1 x float>* @dyglob1, align 4 ; line:69 col:10
+  %tmp121 = extractelement <1 x float> %tmp120, i32 0 ; line:69 col:10
+  %tmp122 = load <2 x float>, <2 x float>* @dyglob2, align 4 ; line:69 col:21
+  %tmp123 = extractelement <2 x float> %tmp122, i32 1 ; line:69 col:21
+  %tmp124 = load <1 x float>, <1 x float>* @stglob1, align 4 ; line:69 col:32
+  %tmp125 = extractelement <1 x float> %tmp124, i32 0 ; line:69 col:32
+  %tmp126 = load <2 x float>, <2 x float>* @stglob2, align 4 ; line:69 col:43
+  %tmp127 = extractelement <2 x float> %tmp126, i32 1 ; line:69 col:43
+  %tmp128 = insertelement <4 x float> undef, float %tmp121, i64 0 ; line:69 col:9
+  %tmp129 = insertelement <4 x float> %tmp128, float %tmp123, i64 1 ; line:69 col:9
+  %tmp130 = insertelement <4 x float> %tmp129, float %tmp125, i64 2 ; line:69 col:9
+  %tmp131 = insertelement <4 x float> %tmp130, float %tmp127, i64 3 ; line:69 col:9
+  %tmp132 = fadd <4 x float> %tmp119, %tmp131 ; line:68 col:124
+  %tmp133 = load i32, i32* %tmp, align 4 ; line:69 col:70
+  %tmp134 = getelementptr inbounds [2 x <1 x float>], [2 x <1 x float>]* @dygar1, i32 0, i32 %tmp133 ; line:69 col:63
+  %tmp135 = load i32, i32* %tmp, align 4 ; line:69 col:74
+  %tmp136 = getelementptr <1 x float>, <1 x float>* %tmp134, i32 0, i32 %tmp135 ; line:69 col:63
+  %tmp137 = load float, float* %tmp136 ; line:69 col:63
+  %tmp138 = load i32, i32* %tmp, align 4 ; line:69 col:86
+  %tmp139 = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* @dygar2, i32 0, i32 %tmp138 ; line:69 col:79
+  %tmp140 = load i32, i32* %tmp, align 4 ; line:69 col:90
+  %tmp141 = getelementptr <2 x float>, <2 x float>* %tmp139, i32 0, i32 %tmp140 ; line:69 col:79
+  %tmp142 = load float, float* %tmp141 ; line:69 col:79
+  %tmp143 = load <1 x float>, <1 x float>* getelementptr inbounds ([2 x <1 x float>], [2 x <1 x float>]* @stgar1, i32 0, i32 0), align 4 ; line:69 col:95
+  %tmp144 = extractelement <1 x float> %tmp143, i32 0 ; line:69 col:95
+  %tmp145 = load <2 x float>, <2 x float>* getelementptr inbounds ([3 x <2 x float>], [3 x <2 x float>]* @stgar2, i32 0, i32 0), align 4 ; line:69 col:108
+  %tmp146 = extractelement <2 x float> %tmp145, i32 1 ; line:69 col:108
+  %tmp147 = insertelement <4 x float> undef, float %tmp137, i64 0 ; line:69 col:62
+  %tmp148 = insertelement <4 x float> %tmp147, float %tmp142, i64 1 ; line:69 col:62
+  %tmp149 = insertelement <4 x float> %tmp148, float %tmp144, i64 2 ; line:69 col:62
+  %tmp150 = insertelement <4 x float> %tmp149, float %tmp146, i64 3 ; line:69 col:62
+  %tmp151 = fadd <4 x float> %tmp132, %tmp150 ; line:69 col:54
+  %tmp152 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %stlorc1, i32 0, i32 0 ; line:70 col:20
+  %tmp153 = load <1 x float>, <1 x float>* %tmp152, align 4 ; line:70 col:20
+  %tmp154 = extractelement <1 x float> %tmp153, i64 0 ; line:70 col:11
+  %tmp155 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %stlorc2, i32 0, i32 0 ; line:70 col:31
+  %tmp156 = getelementptr <2 x float>, <2 x float>* %tmp155, i32 0, i32 1 ; line:70 col:23
+  %tmp157 = load float, float* %tmp156 ; line:70 col:23
+  %tmp158 = getelementptr inbounds %struct.VectRec1, %struct.VectRec1* %dylorc1, i32 0, i32 0 ; line:70 col:45
+  %tmp159 = load <1 x float>, <1 x float>* %tmp158, align 4 ; line:70 col:45
+  %tmp160 = extractelement <1 x float> %tmp159, i64 0 ; line:70 col:11
+  %tmp161 = getelementptr inbounds %struct.VectRec2, %struct.VectRec2* %dylorc2, i32 0, i32 0 ; line:70 col:56
+  %tmp162 = load i32, i32* %tmp, align 4 ; line:70 col:58
+  %tmp163 = getelementptr <2 x float>, <2 x float>* %tmp161, i32 0, i32 %tmp162 ; line:70 col:48
+  %tmp164 = load float, float* %tmp163 ; line:70 col:48
+  %tmp165 = insertelement <4 x float> undef, float %tmp154, i64 0 ; line:70 col:11
+  %tmp166 = insertelement <4 x float> %tmp165, float %tmp157, i64 1 ; line:70 col:11
+  %tmp167 = insertelement <4 x float> %tmp166, float %tmp160, i64 2 ; line:70 col:11
+  %tmp168 = insertelement <4 x float> %tmp167, float %tmp164, i64 3 ; line:70 col:11
+  %tmp169 = fadd <4 x float> %tmp151, %tmp168 ; line:69 col:121
+  %tmp170 = load <1 x float>, <1 x float>* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @stgrec1, i32 0, i32 0), align 4 ; line:70 col:80
+  %tmp171 = extractelement <1 x float> %tmp170, i64 0 ; line:70 col:71
+  %tmp172 = load float, float* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @stgrec2, i32 0, i32 0, i32 1) ; line:70 col:83
+  %tmp173 = load <1 x float>, <1 x float>* getelementptr inbounds (%struct.VectRec1, %struct.VectRec1* @dygrec1, i32 0, i32 0), align 4 ; line:70 col:105
+  %tmp174 = extractelement <1 x float> %tmp173, i64 0 ; line:70 col:71
+  %tmp175 = load i32, i32* %tmp, align 4 ; line:70 col:118
+  %tmp176 = getelementptr <2 x float>, <2 x float>* getelementptr inbounds (%struct.VectRec2, %struct.VectRec2* @dygrec2, i32 0, i32 0), i32 0, i32 %tmp175 ; line:70 col:108
+  %tmp177 = load float, float* %tmp176 ; line:70 col:108
+  %tmp178 = insertelement <4 x float> undef, float %tmp171, i64 0 ; line:70 col:71
+  %tmp179 = insertelement <4 x float> %tmp178, float %tmp172, i64 1 ; line:70 col:71
+  %tmp180 = insertelement <4 x float> %tmp179, float %tmp174, i64 2 ; line:70 col:71
+  %tmp181 = insertelement <4 x float> %tmp180, float %tmp177, i64 3 ; line:70 col:71
+  %tmp182 = fadd <4 x float> %tmp169, %tmp181 ; line:70 col:63
+  ret <4 x float> %tmp182 ; line:68 col:3
+}
+
+attributes #0 = { nounwind }
+
+!pauseresume = !{!1}
+!dx.version = !{!3}
+!dx.valver = !{!3}
+!dx.shaderModel = !{!4}
+!dx.typeAnnotations = !{!5, !10}
+!dx.entryPoints = !{!19}
+!dx.fnprops = !{}
+!dx.options = !{!23, !24}
+
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!3 = !{i32 1, i32 9}
+!4 = !{!"lib", i32 6, i32 9}
+!5 = !{i32 0, %struct.VectRec1 undef, !6, %struct.VectRec2 undef, !8}
+!6 = !{i32 4, !7}
+!7 = !{i32 6, !"f", i32 3, i32 0, i32 4, !"REC1", i32 7, i32 9, i32 13, i32 1}
+!8 = !{i32 8, !9}
+!9 = !{i32 6, !"f", i32 3, i32 0, i32 4, !"REC2", i32 7, i32 9, i32 13, i32 2}
+!10 = !{i32 1, <4 x float> (i32, [12 x float]*)* @"\01?tester@@YA?AV?$vector@M$03@@HY0M@M@Z", !11}
+!11 = !{!12, !15, !17}
+!12 = !{i32 1, !13, !14}
+!13 = !{i32 7, i32 9, i32 13, i32 4}
+!14 = !{}
+!15 = !{i32 0, !16, !14}
+!16 = !{i32 4, !"IX", i32 7, i32 4}
+!17 = !{i32 0, !18, !14}
+!18 = !{i32 4, !"VAL", i32 7, i32 9}
+!19 = !{null, !"", null, !20, null}
+!20 = !{null, null, !21, null}
+!21 = !{!22}
+!22 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null}
+!23 = !{i32 64}
+!24 = !{i32 -1}
+!25 = !{!26, !26, i64 0}
+!26 = !{!"int", !27, i64 0}
+!27 = !{!"omnipotent char", !28, i64 0}
+!28 = !{!"Simple C/C++ TBAA"}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl
new file mode 100644
index 0000000000..7641cb4f39
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-alloca-gv.hlsl
@@ -0,0 +1,112 @@
+// RUN: %dxc -fcgl -T lib_6_9 %s | FileCheck %s
+
+// Mainly a source for the ScalarReductionOfAggregatesHLSL(SROA)
+//  and DynamicIndexingVectorToArray(DIVA) IR tests with native vectors
+//  using allocas, static globals, and parameters.
+// Dynamically accessed 1-element vectors should get skipped by SROA,
+//  but addressed by DynamicIndexingVectorToArray (hence the name).
+// Larger vectors should be untouched.
+// Arrays of vectors get some special treatment as well.
+// Verifies that the original code is as expected for the IR tests.
+
+struct VectRec1 {
+  float1 f : REC1;
+};
+struct VectRec2 {
+  float2 f : REC2;
+};
+
+// Vec2s will be preserved.
+// CHECK-DAG: @dyglob2 = internal global <2 x float> zeroinitializer, align 4
+// CHECK-DAG: @dygar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+// CHECK-DAG: @dygrec2 = internal global %struct.VectRec2 zeroinitializer, align 4
+
+// Dynamic vec1s will get replaced with dynamic vector to array.
+// CHECK-DAG: @dyglob1 = internal global <1 x float> zeroinitializer, align 4
+// CHECK-DAG: @dygar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+// CHECK-DAG: @dygrec1 = internal global %struct.VectRec1 zeroinitializer, align 4
+
+// Vec2s will be preserved.
+// CHECK-DAG: @stglob2 = internal global <2 x float> zeroinitializer, align 4
+// CHECK-DAG: @stgar2 = internal global [3 x <2 x float>] zeroinitializer, align 4
+// CHECK-DAG: @stgrec2 = internal global %struct.VectRec2 zeroinitializer, align 4
+
+// Static vec1s will get replaced with SROA.
+// CHECK-DAG: @stglob1 = internal global <1 x float> zeroinitializer, align 4
+// CHECK-DAG: @stgar1 = internal global [2 x <1 x float>] zeroinitializer, align 4
+// CHECK-DAG: @stgrec1 = internal global %struct.VectRec1 zeroinitializer, align 4
+
+static float1 dyglob1;
+static float2 dyglob2;
+static float1 dygar1[2];
+static float2 dygar2[3];
+static VectRec1 dygrec1;
+static VectRec2 dygrec2;
+
+static float1 stglob1;
+static float2 stglob2;
+static float1 stgar1[2];
+static float2 stgar2[3];
+static VectRec1 stgrec1;
+static VectRec2 stgrec2;
+
+// Test assignment operators.
+// Vec2s should be skipped by SROA and DIVA
+// DIVA will lower statically-indexed vectors and vectors in an array.
+// CHECK-LABEL: define <4 x float> @"\01?tester
+export float4 tester(int ix : IX, float vals[12] : VAL) {
+
+  // Vec2s will be preserved.
+  // CHECK-DAG: %dyloc2 = alloca <2 x float>, align 4
+  // CHECK-DAG: %dylar2 = alloca [4 x <2 x float>], align 4
+  // CHECK-DAG: %dylorc2 = alloca %struct.VectRec2, align 4
+
+  // Dynamic local vec1s will get replaced with dynamic vector to array.
+  // CHECK-DAG: %dyloc1 = alloca <1 x float>, align 4
+  // CHECK-DAG: %dylar1 = alloca [3 x <1 x float>], align 4
+  // CHECK-DAG: %dylorc1 = alloca %struct.VectRec1, align 4
+
+  // Vec2s will be preserved.
+  // CHECK-DAG: %stloc2 = alloca <2 x float>, align 4
+  // CHECK-DAG: %stlar2 = alloca [4 x <2 x float>], align 4
+  // CHECK-DAG: %stlorc2 = alloca %struct.VectRec2, align 4
+
+  // Static local vec1s will get replaced by various passes.
+  // CHECK-DAG: %stloc1 = alloca <1 x float>, align 4
+  // CHECK-DAG: %stlar1 = alloca [3 x <1 x float>], align 4
+  // CHECK-DAG: %stlorc1 = alloca %struct.VectRec1, align 4
+
+  float1 dyloc1;
+  float2 dyloc2;
+  float1 dylar1[3];
+  float2 dylar2[4];
+  VectRec1 dylorc1;
+  VectRec2 dylorc2;
+
+  float1 stloc1;
+  float2 stloc2;
+  float1 stlar1[3];
+  float2 stlar2[4];
+  VectRec1 stlorc1;
+  VectRec2 stlorc2;
+
+  if (ix > 0) {
+    stloc1[0] = dyloc1[ix] = vals[0];
+    stloc2[1] = dyloc2[ix] = vals[1];
+    stlar1[1][0] = dylar1[ix][ix] = vals[2];
+    stlar2[1][0] = dylar2[ix][ix] = vals[3];
+    stlorc1.f[0] = dylorc1.f[ix] = vals[4];
+    stlorc2.f[1] = dylorc2.f[ix] = vals[5];
+
+    stglob1[0] = dyglob1[ix] = vals[6];
+    stglob2[1] = dyglob2[ix] = vals[7];
+    stgar1[1][0] = dygar1[ix][ix] = vals[8];
+    stgar2[1][1] = dygar2[ix][ix] = vals[9];
+    stgrec1.f[0] = dygrec1.f[ix] = vals[10];
+    stgrec2.f[1] = dygrec2.f[ix] = vals[11];
+  }
+  return float4(dyloc1.x, dyloc2.y, stloc1.x, stloc2.y) + float4(dylar1[ix][ix], dylar2[ix][ix], stlar1[0].x, stlar2[0].y) +
+  float4(dyglob1.x, dyglob2.y, stglob1.x, stglob2.y) + float4(dygar1[ix][ix], dygar2[ix][ix], stgar1[0].x, stgar2[0].y) +
+    float4(stlorc1.f, stlorc2.f[1], dylorc1.f, dylorc2.f[ix]) + float4(stgrec1.f, stgrec2.f[1], dygrec1.f, dygrec2.f[ix]);
+}
+
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl
new file mode 100644
index 0000000000..11d705305d
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.hlsl
@@ -0,0 +1,186 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=13   %s | FileCheck %s
+
+// Source for dxilgen test CodeGenDXIL/passes/longvec-intrinsics.ll.
+// Some targetted filecheck testing as an incidental.
+
+RWStructuredBuffer<vector<float16_t, NUM> > hBuf;
+RWStructuredBuffer<vector<float, NUM> > fBuf;
+RWStructuredBuffer<vector<double, NUM> > dBuf;
+
+RWStructuredBuffer<vector<bool, NUM> > bBuf;
+RWStructuredBuffer<vector<uint, NUM> > uBuf;
+RWStructuredBuffer<vector<int64_t, NUM> > lBuf;
+
+[numthreads(8,1,1)]
+void main() {
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 11, i32 0, i32 4) 
+  // CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 12, i32 0, i32 4) 
+  // CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f32 @dx.op.rawBufferVectorLoad.v13f32(i32 303, %dx.types.Handle {{%.*}}, i32 13, i32 0, i32 4) 
+  // CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.v13f32 [[ld]], 0
+  vector<float, NUM> fVec1 = fBuf[11];
+  vector<float, NUM> fVec2 = fBuf[12];
+  vector<float, NUM> fVec3 = fBuf[13];
+  
+  // CHECK: [[tmp:%.*]] = call <13 x float> @dx.op.binary.v13f32(i32 35, <13 x float> [[fvec1]], <13 x float> [[fvec2]])  ; FMax(a,b)
+  // CHECK: call <13 x float> @dx.op.binary.v13f32(i32 36, <13 x float> [[tmp]], <13 x float> [[fvec3]])  ; FMin(a,b)
+  vector<float, NUM> fRes = clamp(fVec1, fVec2, fVec3);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 14, i32 0, i32 2) 
+  // CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 15, i32 0, i32 2) 
+  // CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f16 @dx.op.rawBufferVectorLoad.v13f16(i32 303, %dx.types.Handle {{%.*}}, i32 16, i32 0, i32 2) 
+  // CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.v13f16 [[ld]], 0
+  vector<float16_t, NUM> hVec1 = hBuf[14];
+  vector<float16_t, NUM> hVec2 = hBuf[15];
+  vector<float16_t, NUM> hVec3 = hBuf[16];
+
+  // CHECK: [[tmp:%.*]] = fcmp fast olt <13 x half> [[hvec2]], [[hvec1]]
+  // CHECK: select <13 x i1> [[tmp]], <13 x half> zeroinitializer, <13 x half> <half 0xH3C00
+  vector<float16_t, NUM> hRes = step(hVec1, hVec2);
+
+  // CHECK: [[tmp:%.*]] = fmul fast <13 x float> [[fvec1]], <float 0x
+  // CHECK: call <13 x float> @dx.op.unary.v13f32(i32 21, <13 x float> [[tmp]])  ; Exp(value)
+  fRes += exp(fVec1);
+
+  // CHECK: [[tmp:%.*]] = call <13 x half> @dx.op.unary.v13f16(i32 23, <13 x half> [[hvec1]])  ; Log(value)
+  // CHECK: fmul fast <13 x half> [[tmp]], <half 0xH398C
+  hRes += log(hVec1);
+
+  // CHECK: [[sub:%.*]] = fsub fast <13 x float> [[fvec2]], [[fvec1]]
+  // CHECK: [[xsub:%.*]] = fsub fast <13 x float> [[fvec3]], [[fvec1]]
+  // CHECK: [[div:%.*]] = fdiv fast <13 x float> [[xsub]], [[sub]]
+  // CHECK: [[sat:%.*]] = call <13 x float> @dx.op.unary.v13f32(i32 7, <13 x float> [[div]])  ; Saturate(value)
+  // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], <float 2.000000e+00,
+  // CHECK: [[sub:%.*]] = fsub fast <13 x float> <float 3.000000e+00, {{.*}}>, [[mul]]
+  // CHECK: [[mul:%.*]] = fmul fast <13 x float> [[sat]], [[sat]]
+  // CHECK: fmul fast <13 x float> [[mul]], [[sub]]
+  fRes += smoothstep(fVec1, fVec2, fVec3);
+
+  // Intrinsics that expand into llvm ops.
+
+  // CHECK: fmul fast <13 x float> [[fvec3]], <float 0x3F91DF46A0000000
+  fRes += radians(fVec3);
+
+  // CHECK: [[cmp:%.*]] = fcmp fast une <13 x float> [[fvec1]], zeroinitializer
+  // CHECK: [[f2i:%.*]] = bitcast <13 x float> [[fvec1]] to <13 x i32>
+  // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], <i32 2139095040
+  // CHECK: [[add:%.*]] = add nsw <13 x i32> [[and]], <i32 -1056964608
+  // CHECK: [[shr:%.*]] = ashr <13 x i32> [[add]], <i32 23
+  // CHECK: [[i2f:%.*]] = sitofp <13 x i32> [[shr]] to <13 x float>
+  // CHECK: [[sel:%.*]] = select <13 x i1> [[cmp]], <13 x float> [[i2f]], <13 x float> zeroinitializer
+  // CHECK: [[and:%.*]] = and <13 x i32> [[f2i]], <i32 8388607
+  // CHECK: or <13 x i32> [[and]], <i32 1056964608
+  vector<float, NUM> exp = fVec3;
+  fRes += frexp(fVec1, exp);
+  fRes += exp;
+
+  // CHECK: [[tmp:%.*]] = fsub fast <13 x half> [[hvec3]], [[hvec2]]
+  // CHECK: fmul fast <13 x half> [[tmp]], [[hvec1]]
+  hRes += lerp(hVec2, hVec3, hVec1);
+
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4) 
+  // CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 18, i32 0, i32 4) 
+  // CHECK: [[uvec2:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0
+  vector<uint, NUM> uVec1 = uBuf[17];
+  vector<uint, NUM> uVec2 = uBuf[18];
+
+  vector<uint, NUM> signs = 1;
+  // CHECK: [[cmp:%.*]] = icmp ne <13 x i32> [[uvec2]], zeroinitializer
+  // CHECK: zext <13 x i1> [[cmp]] to <13 x i32>
+  signs *= sign(uVec2);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 19, i32 0, i32 8) 
+  // CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i64 @dx.op.rawBufferVectorLoad.v13i64(i32 303, %dx.types.Handle {{%.*}}, i32 20, i32 0, i32 8) 
+  // CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.v13i64 [[ld]], 0
+  vector<int64_t, NUM> lVec1 = lBuf[19];
+  vector<int64_t, NUM> lVec2 = lBuf[20];
+
+  // CHECK: [[gt:%.*]] = icmp sgt <13 x i64> [[lvec2]], zeroinitializer
+  // CHECK: [[lt:%.*]] = icmp slt <13 x i64> [[lvec2]], zeroinitializer
+  // CHECK: [[igt:%.*]] = zext <13 x i1> [[gt]] to <13 x i32>
+  // CHECK: [[ilt:%.*]] = zext <13 x i1> [[lt]] to <13 x i32>
+  // CHECK: sub nsw <13 x i32> [[igt]], [[ilt]]
+  signs *= sign(lVec2);
+
+  vector<uint, NUM> uRes = signs;
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 21, i32 0, i32 4) 
+  // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0
+  // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer
+  // CHECK: [[vec1:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32>
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 22, i32 0, i32 4) 
+  // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0
+  // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer
+  // CHECK: [[vec2:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32>
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13i32 @dx.op.rawBufferVectorLoad.v13i32(i32 303, %dx.types.Handle {{%.*}}, i32 23, i32 0, i32 4) 
+  // CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v13i32 [[ld]], 0
+  // CHECK: [[bvec:%.*]] = icmp ne <13 x i32> [[vec]], zeroinitializer
+  // CHECK: [[vec3:%.*]] = zext <13 x i1> [[bvec]] to <13 x i32>
+  vector<bool, NUM> bVec1 = bBuf[21];
+  vector<bool, NUM> bVec2 = bBuf[22];
+  vector<bool, NUM> bVec3 = bBuf[23];
+
+  // CHECK: [[bvec2:%.*]] = icmp ne <13 x i32> [[vec2]], zeroinitializer
+  // CHECK: [[bvec1:%.*]] = icmp ne <13 x i32> [[vec1]], zeroinitializer
+  // CHECK: or <13 x i1> [[bvec2]], [[bvec1]]
+  uRes += or(bVec1, bVec2);
+
+  // CHECK: [[bvec3:%.*]] = icmp ne <13 x i32> [[vec3]], zeroinitializer
+  // CHECK: and <13 x i1> [[bvec3]], [[bvec2]]
+  uRes += and(bVec2, bVec3);
+
+  // CHECK: select <13 x i1> [[bvec3]], <13 x i64> [[lvec1]], <13 x i64> [[lvec2]]
+  vector<int64_t, NUM> lRes = select(bVec3, lVec1, lVec2);
+
+  // CHECK: [[el1:%.*]] = extractelement <13 x float> [[fvec1]]
+  // CHECK: [[el2:%.*]] = extractelement <13 x float> [[fvec2]]
+  // CHECK: [[mul:%.*]] = fmul fast float [[el2]], [[el1]]
+  // CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mul]]) ; FMad(a,b,c)
+  // CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad1]]) ; FMad(a,b,c)
+  // CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad2]]) ; FMad(a,b,c)
+  // CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad3]]) ; FMad(a,b,c)
+  // CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad4]]) ; FMad(a,b,c)
+  // CHECK: [[mad6:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad5]]) ; FMad(a,b,c)
+  // CHECK: [[mad7:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad6]]) ; FMad(a,b,c)
+  // CHECK: [[mad8:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad7]]) ; FMad(a,b,c)
+  // CHECK: [[mad9:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad8]]) ; FMad(a,b,c)
+  // CHECK: [[mad10:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad9]]) ; FMad(a,b,c)
+  // CHECK: [[mad11:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad10]]) ; FMad(a,b,c)
+  // CHECK: [[mad12:%.*]] = call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float [[mad11]]) ; FMad(a,b,c)
+  fRes += dot(fVec1, fVec2);
+
+  // CHECK: call <13 x float> @dx.op.unary.v13f32(i32 17, <13 x float> [[fvec1]])  ; Atan(value)
+  fRes += atan(fVec1);
+
+  // CHECK: call <13 x i32> @dx.op.binary.v13i32(i32 40, <13 x i32> [[uvec1]], <13 x i32> [[uvec2]])  ; UMin(a,b)
+  uRes += min(uVec1, uVec2);
+
+  // CHECK: call <13 x float> @dx.op.tertiary.v13f32(i32 46, <13 x float> [[fvec1]], <13 x float> [[fvec2]], <13 x float> [[fvec3]])  ; FMad(a,b,c)
+  fRes += mad(fVec1, fVec2, fVec3);
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8) 
+  // CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8) 
+  // CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.v13f64 @dx.op.rawBufferVectorLoad.v13f64(i32 303, %dx.types.Handle {{%.*}}, i32 26, i32 0, i32 8) 
+  // CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.v13f64 [[ld]], 0
+  vector<double, NUM> dVec1 = dBuf[24];
+  vector<double, NUM> dVec2 = dBuf[25];
+  vector<double, NUM> dVec3 = dBuf[26];
+
+  // CHECK: call <13 x double> @dx.op.tertiary.v13f64(i32 47, <13 x double> [[dvec1]], <13 x double> [[dvec2]], <13 x double> [[dvec3]])
+  vector<double, NUM> dRes = fma(dVec1, dVec2, dVec3);
+
+  hBuf[0] = hRes;
+  fBuf[0] = fRes;
+  dBuf[0] = dRes;
+  uBuf[0] = uRes;
+  lBuf[0] = lRes;
+}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll
new file mode 100644
index 0000000000..8f9dcbbdbc
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-intrinsics.ll
@@ -0,0 +1,434 @@
+; RUN: %dxopt %s -dxilgen -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%"class.RWStructuredBuffer<vector<half, 7> >" = type { <7 x half> }
+%"class.RWStructuredBuffer<vector<float, 7> >" = type { <7 x float> }
+%"class.RWStructuredBuffer<vector<double, 7> >" = type { <7 x double> }
+%"class.RWStructuredBuffer<vector<bool, 7> >" = type { <7 x i32> }
+%"class.RWStructuredBuffer<vector<unsigned int, 7> >" = type { <7 x i32> }
+%"class.RWStructuredBuffer<vector<long long, 7> >" = type { <7 x i64> }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+
+@"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" = external global %"class.RWStructuredBuffer<vector<half, 7> >", align 2
+@"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" = external global %"class.RWStructuredBuffer<vector<float, 7> >", align 4
+@"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" = external global %"class.RWStructuredBuffer<vector<double, 7> >", align 8
+@"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" = external global %"class.RWStructuredBuffer<vector<bool, 7> >", align 4
+@"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" = external global %"class.RWStructuredBuffer<vector<unsigned int, 7> >", align 4
+@"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" = external global %"class.RWStructuredBuffer<vector<long long, 7> >", align 8
+
+; CHECK-LABEL: define void @main()
+define void @main() #0 {
+bb:
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 11, i32 0, i32 4)
+  ; CHECK: [[fvec1:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 12, i32 0, i32 4)
+  ; CHECK: [[fvec2:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f32 @dx.op.rawBufferVectorLoad.v7f32(i32 303, %dx.types.Handle {{%.*}}, i32 13, i32 0, i32 4)
+  ; CHECK: [[fvec3:%.*]] = extractvalue %dx.types.ResRet.v7f32 [[ld]], 0
+
+  %exp = alloca <7 x float>, align 4
+  %tmp = load %"class.RWStructuredBuffer<vector<float, 7> >", %"class.RWStructuredBuffer<vector<float, 7> >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:23 col:30
+  %tmp1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 7> >" %tmp) ; line:23 col:30
+  %tmp2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 14, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<float, 7> >" zeroinitializer) ; line:23 col:30
+  %tmp3 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp2, i32 11) ; line:23 col:30
+  %tmp4 = load <7 x float>, <7 x float>* %tmp3 ; line:23 col:30
+  %tmp5 = load %"class.RWStructuredBuffer<vector<float, 7> >", %"class.RWStructuredBuffer<vector<float, 7> >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:24 col:30
+  %tmp6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 7> >" %tmp5) ; line:24 col:30
+  %tmp7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 14, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<float, 7> >" zeroinitializer) ; line:24 col:30
+  %tmp8 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp7, i32 12) ; line:24 col:30
+  %tmp9 = load <7 x float>, <7 x float>* %tmp8 ; line:24 col:30
+  %tmp10 = load %"class.RWStructuredBuffer<vector<float, 7> >", %"class.RWStructuredBuffer<vector<float, 7> >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:25 col:30
+  %tmp11 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 7> >" %tmp10) ; line:25 col:30
+  %tmp12 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 14, %dx.types.Handle %tmp11, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<float, 7> >" zeroinitializer) ; line:25 col:30
+  %tmp13 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp12, i32 13) ; line:25 col:30
+  %tmp14 = load <7 x float>, <7 x float>* %tmp13 ; line:25 col:30
+
+  ;  Clamp operation.
+  ; CHECK: [[max:%.*]] = call <7 x float> @dx.op.binary.v7f32(i32 35, <7 x float> [[fvec1]], <7 x float> [[fvec2]])
+  ; CHECK: call <7 x float> @dx.op.binary.v7f32(i32 36, <7 x float> [[max]], <7 x float> [[fvec3]])
+  %tmp15 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 119, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:29 col:29
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 14, i32 0, i32 2)
+  ; CHECK: [[hvec1:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 15, i32 0, i32 2)
+  ; CHECK: [[hvec2:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle {{%.*}}, i32 16, i32 0, i32 2)
+  ; CHECK: [[hvec3:%.*]] = extractvalue %dx.types.ResRet.v7f16 [[ld]], 0
+  %tmp16 = load %"class.RWStructuredBuffer<vector<half, 7> >", %"class.RWStructuredBuffer<vector<half, 7> >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:37 col:34
+  %tmp17 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<half, 7> >" %tmp16) ; line:37 col:34
+  %tmp18 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 14, %dx.types.Handle %tmp17, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer<vector<half, 7> >" zeroinitializer) ; line:37 col:34
+  %tmp19 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp18, i32 14) ; line:37 col:34
+  %tmp20 = load <7 x half>, <7 x half>* %tmp19 ; line:37 col:34
+  %tmp21 = load %"class.RWStructuredBuffer<vector<half, 7> >", %"class.RWStructuredBuffer<vector<half, 7> >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:38 col:34
+  %tmp22 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<half, 7> >" %tmp21) ; line:38 col:34
+  %tmp23 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 14, %dx.types.Handle %tmp22, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer<vector<half, 7> >" zeroinitializer) ; line:38 col:34
+  %tmp24 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp23, i32 15) ; line:38 col:34
+  %tmp25 = load <7 x half>, <7 x half>* %tmp24 ; line:38 col:34
+  %tmp26 = load %"class.RWStructuredBuffer<vector<half, 7> >", %"class.RWStructuredBuffer<vector<half, 7> >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:39 col:34
+  %tmp27 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<half, 7> >" %tmp26) ; line:39 col:34
+  %tmp28 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 14, %dx.types.Handle %tmp27, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer<vector<half, 7> >" zeroinitializer) ; line:39 col:34
+  %tmp29 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp28, i32 16) ; line:39 col:34
+  %tmp30 = load <7 x half>, <7 x half>* %tmp29 ; line:39 col:34
+
+  ; Step operation.
+  ; CHECK: [[cmp:%.*]] = fcmp fast olt <7 x half> [[hvec2]], [[hvec1]]
+  ; CHECK: select <7 x i1> [[cmp]], <7 x half> zeroinitializer, <7 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
+  %tmp31 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>)"(i32 192, <7 x half> %tmp20, <7 x half> %tmp25) ; line:43 col:33
+
+  ;  Exp operation.
+  ; CHECK: [[mul:%.*]] = fmul fast <7 x float> <float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000>, [[fvec1]]
+  ; CHECK call <7 x float> @dx.op.unary.v7f32(i32 21, <7 x float> [[mul]])
+  %tmp32 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 139, <7 x float> %tmp4) ; line:47 col:11
+  %tmp33 = fadd <7 x float> %tmp15, %tmp32 ; line:47 col:8
+
+  ;  Log operation.
+  ; CHECK: [[log:%.*]] = call <7 x half> @dx.op.unary.v7f16(i32 23, <7 x half> [[hvec1]])
+  ; CHECK: fmul fast <7 x half> <half 0xH398C, half 0xH398C, half 0xH398C, half 0xH398C, half 0xH398C, half 0xH398C, half 0xH398C>, [[log]]
+  %tmp34 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>)"(i32 159, <7 x half> %tmp20) ; line:51 col:11
+  %tmp35 = fadd <7 x half> %tmp31, %tmp34 ; line:51 col:8
+
+  ; Smoothstep operation.
+  ; CHECK: [[sub1:%.*]] = fsub fast <7 x float> [[fvec2]], [[fvec1]]
+  ; CHECK: [[sub2:%.*]] = fsub fast <7 x float> [[fvec3]], [[fvec1]]
+  ; CHECK: [[div:%.*]] = fdiv fast <7 x float> [[sub2]], [[sub1]]
+  ; CHECK: [[sat:%.*]] = call <7 x float> @dx.op.unary.v7f32(i32 7, <7 x float> [[div]])
+  ; CHECK: [[mul:%.*]] = fmul fast <7 x float> [[sat]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+  ; CHECK: [[sub:%.*]] = fsub fast <7 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>, [[mul]]
+  ; CHECK: [[mul:%.*]] = fmul fast <7 x float> [[sat]], [[sub]]
+  ; CHECK: fmul fast <7 x float> %Saturate, [[mul]]
+  %tmp36 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 189, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:61 col:11
+  %tmp37 = fadd <7 x float> %tmp33, %tmp36 ; line:61 col:8
+
+  ;  Radians operation.
+  ; CHECK: fmul fast <7 x float> <float 0x3F91DF46A0000000, float 0x3F91DF46A0000000, float 0x3F91DF46A0000000, float 0x3F91DF46A0000000, float 0x3F91DF46A0000000, float 0x3F91DF46A0000000, float 0x3F91DF46A0000000>, [[fvec3]]
+  %tmp38 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 176, <7 x float> %tmp14) ; line:66 col:11
+  %tmp39 = fadd <7 x float> %tmp37, %tmp38 ; line:66 col:8
+  store <7 x float> %tmp14, <7 x float>* %exp, align 4 ; line:77 col:22
+
+  ;  Frexp operation.
+  ; CHECK: [[cmp:%.*]] = fcmp fast une <7 x float> [[fvec1]], zeroinitializer
+  ; CHECK: [[ext:%.*]] = sext <7 x i1> [[cmp]] to <7 x i32>
+  ; CHECK: [[bct:%.*]] = bitcast <7 x float> [[fvec1]] to <7 x i32>
+  ; CHECK: [[and:%.*]] = and <7 x i32> [[bct]], <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
+  ; CHECK: [[add:%.*]] = add <7 x i32> [[and]], <i32 -1056964608, i32 -1056964608, i32 -1056964608, i32 -1056964608, i32 -1056964608, i32 -1056964608, i32 -1056964608>
+  ; CHECK: [[and:%.*]] = and <7 x i32> [[add]], [[ext]]
+  ; CHECK: [[shr:%.*]] = ashr <7 x i32> [[and]], <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
+  ; CHECK: [[i2f:%.*]] = sitofp <7 x i32> [[shr]] to <7 x float>
+  ; CHECK: store <7 x float> [[i2f]], <7 x float>* %exp
+  ; CHECK: [[and:%.*]] = and <7 x i32> [[bct]], <i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  ; CHECK: [[or:%.*]] = or <7 x i32> [[and]], <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
+  ; CHECK: [[and:%.*]] = and <7 x i32> [[or]], [[ext]]
+  ; CHECK: bitcast <7 x i32> [[and]] to <7 x float>
+  %tmp41 = call <7 x float> @"dx.hl.op..<7 x float> (i32, <7 x float>, <7 x float>*)"(i32 150, <7 x float> %tmp4, <7 x float>* %exp) ; line:78 col:11
+  %tmp42 = fadd <7 x float> %tmp39, %tmp41 ; line:78 col:8
+  %tmp43 = load <7 x float>, <7 x float>* %exp, align 4 ; line:79 col:11
+  %tmp44 = fadd <7 x float> %tmp42, %tmp43 ; line:79 col:8
+
+  ;  Lerp operation.
+  ; CHECK: [[sub:%.*]] = fsub fast <7 x half> [[hvec3]], [[hvec2]]
+  ; CHECK: fmul fast <7 x half> [[hvec1]], [[sub]]
+  %tmp45 = call <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>, <7 x half>)"(i32 157, <7 x half> %tmp25, <7 x half> %tmp30, <7 x half> %tmp20) ; line:83 col:11
+  %tmp46 = fadd <7 x half> %tmp35, %tmp45 ; line:83 col:8
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 17, i32 0, i32 4)
+  ; CHECK: [[uvec1:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 18, i32 0, i32 4)
+  ; CHECK: [[uvec2:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0
+  %tmp47 = load %"class.RWStructuredBuffer<vector<unsigned int, 7> >", %"class.RWStructuredBuffer<vector<unsigned int, 7> >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:90 col:29
+  %tmp48 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<unsigned int, 7> >" %tmp47) ; line:90 col:29
+  %tmp49 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32 14, %dx.types.Handle %tmp48, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<unsigned int, 7> >" zeroinitializer) ; line:90 col:29
+  %tmp50 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp49, i32 17) ; line:90 col:29
+  %tmp51 = load <7 x i32>, <7 x i32>* %tmp50 ; line:90 col:29
+  %tmp52 = load %"class.RWStructuredBuffer<vector<unsigned int, 7> >", %"class.RWStructuredBuffer<vector<unsigned int, 7> >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:91 col:29
+  %tmp53 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<unsigned int, 7> >" %tmp52) ; line:91 col:29
+  %tmp54 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32 14, %dx.types.Handle %tmp53, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<unsigned int, 7> >" zeroinitializer) ; line:91 col:29
+  %tmp55 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp54, i32 18) ; line:91 col:29
+  %tmp56 = load <7 x i32>, <7 x i32>* %tmp55 ; line:91 col:29
+
+  ; Unsigned int sign operation.
+  ; CHECK: [[cmp:%.*]] = icmp ne <7 x i32> [[uvec2]], zeroinitializer
+  ; CHECK: zext <7 x i1> [[cmp]] to <7 x i32>
+  %tmp57 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>)"(i32 355, <7 x i32> %tmp56) ; line:96 col:12
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i64 @dx.op.rawBufferVectorLoad.v7i64(i32 303, %dx.types.Handle {{%.*}}, i32 19, i32 0, i32 8)
+  ; CHECK: [[lvec1:%.*]] = extractvalue %dx.types.ResRet.v7i64 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i64 @dx.op.rawBufferVectorLoad.v7i64(i32 303, %dx.types.Handle {{%.*}}, i32 20, i32 0, i32 8)
+  ; CHECK: [[lvec2:%.*]] = extractvalue %dx.types.ResRet.v7i64 [[ld]], 0
+  %tmp58 = load %"class.RWStructuredBuffer<vector<long long, 7> >", %"class.RWStructuredBuffer<vector<long long, 7> >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:102 col:32
+  %tmp59 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<long long, 7> >" %tmp58) ; line:102 col:32
+  %tmp60 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32 14, %dx.types.Handle %tmp59, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer<vector<long long, 7> >" zeroinitializer) ; line:102 col:32
+  %tmp61 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp60, i32 19) ; line:102 col:32
+  %tmp62 = load <7 x i64>, <7 x i64>* %tmp61 ; line:102 col:32
+  %tmp63 = load %"class.RWStructuredBuffer<vector<long long, 7> >", %"class.RWStructuredBuffer<vector<long long, 7> >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:103 col:32
+  %tmp64 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<long long, 7> >" %tmp63) ; line:103 col:32
+  %tmp65 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32 14, %dx.types.Handle %tmp64, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer<vector<long long, 7> >" zeroinitializer) ; line:103 col:32
+  %tmp66 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp65, i32 20) ; line:103 col:32
+  %tmp67 = load <7 x i64>, <7 x i64>* %tmp66 ; line:103 col:32
+
+  ; Signed int sign operation.
+  ; CHECK: [[lt1:%.*]] = icmp slt <7 x i64> zeroinitializer, [[lvec2]]
+  ; CHECK: [[lt2:%.*]] = icmp slt <7 x i64> [[lvec2]], zeroinitializer
+  ; CHECK: [[ilt1:%.*]] = zext <7 x i1> [[lt1]] to <7 x i32>
+  ; CHECK: [[ilt2:%.*]] = zext <7 x i1> [[lt2]] to <7 x i32>
+  ; CHECK: sub <7 x i32> [[ilt1]], [[ilt2]]
+  %tmp68 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i64>)"(i32 185, <7 x i64> %tmp67) ; line:110 col:12
+  %tmp69 = mul <7 x i32> %tmp57, %tmp68 ; line:110 col:9
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 21, i32 0, i32 4) 
+  ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0
+  ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer
+  ; CHECK: [[vec1:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32>
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 22, i32 0, i32 4) 
+  ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0
+  ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer
+  ; CHECK: [[vec2:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32>
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7i32 @dx.op.rawBufferVectorLoad.v7i32(i32 303, %dx.types.Handle {{%.*}}, i32 23, i32 0, i32 4) 
+  ; CHECK: [[vec:%.*]] = extractvalue %dx.types.ResRet.v7i32 [[ld]], 0
+  ; CHECK: [[bvec:%.*]] = icmp ne <7 x i32> [[vec]], zeroinitializer
+  ; CHECK: [[vec3:%.*]] = zext <7 x i1> [[bvec]] to <7 x i32>
+  %tmp70 = load %"class.RWStructuredBuffer<vector<bool, 7> >", %"class.RWStructuredBuffer<vector<bool, 7> >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:126 col:29
+  %tmp71 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<bool, 7> >" %tmp70) ; line:126 col:29
+  %tmp72 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32 14, %dx.types.Handle %tmp71, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<bool, 7> >" zeroinitializer) ; line:126 col:29
+  %tmp73 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp72, i32 21) ; line:126 col:29
+  %tmp74 = load <7 x i32>, <7 x i32>* %tmp73 ; line:126 col:29
+  %tmp75 = icmp ne <7 x i32> %tmp74, zeroinitializer ; line:126 col:29
+  %tmp76 = zext <7 x i1> %tmp75 to <7 x i32> ; line:126 col:21
+  %tmp77 = load %"class.RWStructuredBuffer<vector<bool, 7> >", %"class.RWStructuredBuffer<vector<bool, 7> >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:127 col:29
+  %tmp78 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<bool, 7> >" %tmp77) ; line:127 col:29
+  %tmp79 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32 14, %dx.types.Handle %tmp78, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<bool, 7> >" zeroinitializer) ; line:127 col:29
+  %tmp80 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp79, i32 22) ; line:127 col:29
+  %tmp81 = load <7 x i32>, <7 x i32>* %tmp80 ; line:127 col:29
+  %tmp82 = icmp ne <7 x i32> %tmp81, zeroinitializer ; line:127 col:29
+  %tmp83 = zext <7 x i1> %tmp82 to <7 x i32> ; line:127 col:21
+  %tmp84 = load %"class.RWStructuredBuffer<vector<bool, 7> >", %"class.RWStructuredBuffer<vector<bool, 7> >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A" ; line:128 col:29
+  %tmp85 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<bool, 7> >" %tmp84) ; line:128 col:29
+  %tmp86 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32 14, %dx.types.Handle %tmp85, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<bool, 7> >" zeroinitializer) ; line:128 col:29
+  %tmp87 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp86, i32 23) ; line:128 col:29
+  %tmp88 = load <7 x i32>, <7 x i32>* %tmp87 ; line:128 col:29
+  %tmp89 = icmp ne <7 x i32> %tmp88, zeroinitializer ; line:128 col:29
+  %tmp90 = zext <7 x i1> %tmp89 to <7 x i32> ; line:128 col:21
+
+
+  ; Or() operation.
+  ; CHECK: [[bvec2:%.*]] = icmp ne <7 x i32> [[vec2]], zeroinitializer
+  ; CHECK: [[bvec1:%.*]] = icmp ne <7 x i32> [[vec1]], zeroinitializer
+  ; CHECK: or <7 x i1> [[bvec1]], [[bvec2]]
+  %tmp91 = icmp ne <7 x i32> %tmp83, zeroinitializer ; line:133 col:21
+  %tmp92 = icmp ne <7 x i32> %tmp76, zeroinitializer ; line:133 col:14
+  %tmp93 = call <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32 169, <7 x i1> %tmp92, <7 x i1> %tmp91) ; line:133 col:11
+  %tmp94 = zext <7 x i1> %tmp93 to <7 x i32> ; line:133 col:11
+  %tmp95 = add <7 x i32> %tmp69, %tmp94 ; line:133 col:8
+
+  ; And() operation.
+  ; CHECK: [[bvec3:%.*]] = icmp ne <7 x i32> [[vec3]], zeroinitializer
+  ; CHECK: [[bvec2:%.*]] = icmp ne <7 x i32> [[vec2]], zeroinitializer
+  ; CHECK: and <7 x i1> [[bvec2]], [[bvec3]]
+  %tmp96 = icmp ne <7 x i32> %tmp90, zeroinitializer ; line:137 col:22
+  %tmp97 = icmp ne <7 x i32> %tmp83, zeroinitializer ; line:137 col:15
+  %tmp98 = call <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32 106, <7 x i1> %tmp97, <7 x i1> %tmp96) ; line:137 col:11
+  %tmp99 = zext <7 x i1> %tmp98 to <7 x i32> ; line:137 col:11
+  %tmp100 = add <7 x i32> %tmp95, %tmp99 ; line:137 col:8
+
+  ; Select() operation.
+  ; CHECK: [[bvec3:%.*]] = icmp ne <7 x i32> [[vec3]], zeroinitializer
+  ; CHECK: select <7 x i1> [[bvec3]], <7 x i64> [[lvec1]], <7 x i64> [[lvec2]]
+  %tmp101 = icmp ne <7 x i32> %tmp90, zeroinitializer ; line:140 col:38
+  %tmp102 = call <7 x i64> @"dx.hl.op.rn.<7 x i64> (i32, <7 x i1>, <7 x i64>, <7 x i64>)"(i32 184, <7 x i1> %tmp101, <7 x i64> %tmp62, <7 x i64> %tmp67) ; line:140 col:31
+  %tmp103 = call float @"dx.hl.op.rn.float (i32, <7 x float>, <7 x float>)"(i32 134, <7 x float> %tmp4, <7 x float> %tmp9) ; line:152 col:11
+
+  ; Dot operation.
+  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 0
+  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 0
+  ; CHECK: [[mul:%.*]] = fmul fast float [[el1]], [[el2]]
+  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 1
+  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 1
+  ; CHECK: [[mad1:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mul]])
+  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 2
+  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 2
+  ; CHECK: [[mad2:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad1]])
+  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 3
+  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 3
+  ; CHECK: [[mad3:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad2]])
+  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 4
+  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 4
+  ; CHECK: [[mad4:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad3]])
+  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 5
+  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 5
+  ; CHECK: [[mad5:%.*]] = call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad4]])
+  ; CHECK: [[el1:%.*]] = extractelement <7 x float> [[fvec1]], i64 6
+  ; CHECK: [[el2:%.*]] = extractelement <7 x float> [[fvec2]], i64 6
+  ; CHECK: call float @dx.op.tertiary.f32(i32 46, float [[el1]], float [[el2]], float [[mad5]])
+  %tmp104 = insertelement <7 x float> undef, float %tmp103, i32 0 ; line:152 col:11
+  %tmp105 = shufflevector <7 x float> %tmp104, <7 x float> undef, <7 x i32> zeroinitializer ; line:152 col:11
+  %tmp106 = fadd <7 x float> %tmp44, %tmp105 ; line:152 col:8
+
+  ; Atan operation.
+  ; CHECK: call <7 x float> @dx.op.unary.v7f32(i32 17, <7 x float> [[fvec1]])
+  %tmp107 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32 116, <7 x float> %tmp4) ; line:155 col:11
+  %tmp108 = fadd <7 x float> %tmp106, %tmp107 ; line:155 col:8
+
+  ; Min operation.
+  ; CHECK: call <7 x i32> @dx.op.binary.v7i32(i32 40, <7 x i32> [[uvec1]], <7 x i32> [[uvec2]])
+  %tmp109 = call <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>, <7 x i32>)"(i32 353, <7 x i32> %tmp51, <7 x i32> %tmp56) ; line:158 col:11
+  %tmp110 = add <7 x i32> %tmp100, %tmp109 ; line:158 col:8
+
+  ; Mad operation.
+  ; CHECK: call <7 x float> @dx.op.tertiary.v7f32(i32 46, <7 x float> [[fvec1]], <7 x float> [[fvec2]], <7 x float> [[fvec3]])
+  %tmp111 = call <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32 162, <7 x float> %tmp4, <7 x float> %tmp9, <7 x float> %tmp14) ; line:161 col:11
+  %tmp112 = fadd <7 x float> %tmp108, %tmp111 ; line:161 col:8
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 24, i32 0, i32 8) 
+  ; CHECK: [[dvec1:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 25, i32 0, i32 8) 
+  ; CHECK: [[dvec2:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.v7f64 @dx.op.rawBufferVectorLoad.v7f64(i32 303, %dx.types.Handle {{%.*}}, i32 26, i32 0, i32 8) 
+  ; CHECK: [[dvec3:%.*]] = extractvalue %dx.types.ResRet.v7f64 [[ld]], 0
+  %tmp113 = load %"class.RWStructuredBuffer<vector<double, 7> >", %"class.RWStructuredBuffer<vector<double, 7> >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:169 col:31
+  %tmp114 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<double, 7> >" %tmp113) ; line:169 col:31
+  %tmp115 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 14, %dx.types.Handle %tmp114, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer<vector<double, 7> >" zeroinitializer) ; line:169 col:31
+  %tmp116 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp115, i32 24) ; line:169 col:31
+  %tmp117 = load <7 x double>, <7 x double>* %tmp116 ; line:169 col:31
+  %tmp118 = load %"class.RWStructuredBuffer<vector<double, 7> >", %"class.RWStructuredBuffer<vector<double, 7> >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:170 col:31
+  %tmp119 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<double, 7> >" %tmp118) ; line:170 col:31
+  %tmp120 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 14, %dx.types.Handle %tmp119, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer<vector<double, 7> >" zeroinitializer) ; line:170 col:31
+  %tmp121 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp120, i32 25) ; line:170 col:31
+  %tmp122 = load <7 x double>, <7 x double>* %tmp121 ; line:170 col:31
+  %tmp123 = load %"class.RWStructuredBuffer<vector<double, 7> >", %"class.RWStructuredBuffer<vector<double, 7> >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:171 col:31
+  %tmp124 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<double, 7> >" %tmp123) ; line:171 col:31
+  %tmp125 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 14, %dx.types.Handle %tmp124, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer<vector<double, 7> >" zeroinitializer) ; line:171 col:31
+  %tmp126 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp125, i32 26) ; line:171 col:31
+  %tmp127 = load <7 x double>, <7 x double>* %tmp126 ; line:171 col:31
+
+  ; FMA operation.
+  ; CHECK: call <7 x double> @dx.op.tertiary.v7f64(i32 47, <7 x double> [[dvec1]], <7 x double> [[dvec2]], <7 x double> [[dvec3]])
+  %tmp128 = call <7 x double> @"dx.hl.op.rn.<7 x double> (i32, <7 x double>, <7 x double>, <7 x double>)"(i32 147, <7 x double> %tmp117, <7 x double> %tmp122, <7 x double> %tmp127) ; line:174 col:30
+  %tmp129 = load %"class.RWStructuredBuffer<vector<half, 7> >", %"class.RWStructuredBuffer<vector<half, 7> >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A" ; line:176 col:3
+  %tmp130 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<half, 7> >" %tmp129) ; line:176 col:3
+  %tmp131 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32 14, %dx.types.Handle %tmp130, %dx.types.ResourceProperties { i32 4108, i32 14 }, %"class.RWStructuredBuffer<vector<half, 7> >" zeroinitializer) ; line:176 col:3
+  %tmp132 = call <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp131, i32 0) ; line:176 col:3
+  store <7 x half> %tmp46, <7 x half>* %tmp132 ; line:176 col:11
+  %tmp133 = load %"class.RWStructuredBuffer<vector<float, 7> >", %"class.RWStructuredBuffer<vector<float, 7> >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A" ; line:177 col:3
+  %tmp134 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<float, 7> >" %tmp133) ; line:177 col:3
+  %tmp135 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32 14, %dx.types.Handle %tmp134, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<float, 7> >" zeroinitializer) ; line:177 col:3
+  %tmp136 = call <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp135, i32 0) ; line:177 col:3
+  store <7 x float> %tmp112, <7 x float>* %tmp136 ; line:177 col:11
+  %tmp137 = load %"class.RWStructuredBuffer<vector<double, 7> >", %"class.RWStructuredBuffer<vector<double, 7> >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A" ; line:178 col:3
+  %tmp138 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<double, 7> >" %tmp137) ; line:178 col:3
+  %tmp139 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32 14, %dx.types.Handle %tmp138, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer<vector<double, 7> >" zeroinitializer) ; line:178 col:3
+  %tmp140 = call <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp139, i32 0) ; line:178 col:3
+  store <7 x double> %tmp128, <7 x double>* %tmp140 ; line:178 col:11
+  %tmp141 = load %"class.RWStructuredBuffer<vector<unsigned int, 7> >", %"class.RWStructuredBuffer<vector<unsigned int, 7> >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A" ; line:179 col:3
+  %tmp142 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<unsigned int, 7> >" %tmp141) ; line:179 col:3
+  %tmp143 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32 14, %dx.types.Handle %tmp142, %dx.types.ResourceProperties { i32 4108, i32 28 }, %"class.RWStructuredBuffer<vector<unsigned int, 7> >" zeroinitializer) ; line:179 col:3
+  %tmp144 = call <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp143, i32 0) ; line:179 col:3
+  store <7 x i32> %tmp110, <7 x i32>* %tmp144 ; line:179 col:11
+  %tmp145 = load %"class.RWStructuredBuffer<vector<long long, 7> >", %"class.RWStructuredBuffer<vector<long long, 7> >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A" ; line:180 col:3
+  %tmp146 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32 0, %"class.RWStructuredBuffer<vector<long long, 7> >" %tmp145) ; line:180 col:3
+  %tmp147 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32 14, %dx.types.Handle %tmp146, %dx.types.ResourceProperties { i32 4108, i32 56 }, %"class.RWStructuredBuffer<vector<long long, 7> >" zeroinitializer) ; line:180 col:3
+  %tmp148 = call <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %tmp147, i32 0) ; line:180 col:3
+  store <7 x i64> %tmp102, <7 x i64>* %tmp148 ; line:180 col:11
+  ret void ; line:181 col:1
+}
+
+declare <7 x float>* @"dx.hl.subscript.[].rn.<7 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32, %"class.RWStructuredBuffer<vector<float, 7> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<float, 7> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<float, 7> >") #1
+declare <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>, <7 x float>, <7 x float>)"(i32, <7 x float>, <7 x float>, <7 x float>) #1
+declare <7 x half>* @"dx.hl.subscript.[].rn.<7 x half>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32, %"class.RWStructuredBuffer<vector<half, 7> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<half, 7> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<half, 7> >") #1
+declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>)"(i32, <7 x half>, <7 x half>) #1
+declare <7 x float> @"dx.hl.op.rn.<7 x float> (i32, <7 x float>)"(i32, <7 x float>) #1
+declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>)"(i32, <7 x half>) #1
+declare <7 x float> @"dx.hl.op..<7 x float> (i32, <7 x float>, <7 x float>*)"(i32, <7 x float>, <7 x float>*) #0
+declare <7 x half> @"dx.hl.op.rn.<7 x half> (i32, <7 x half>, <7 x half>, <7 x half>)"(i32, <7 x half>, <7 x half>, <7 x half>) #1
+declare <7 x i32>* @"dx.hl.subscript.[].rn.<7 x i32>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32, %"class.RWStructuredBuffer<vector<unsigned int, 7> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<unsigned int, 7> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<unsigned int, 7> >") #1
+declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>)"(i32, <7 x i32>) #1
+declare <7 x i64>* @"dx.hl.subscript.[].rn.<7 x i64>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32, %"class.RWStructuredBuffer<vector<long long, 7> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<long long, 7> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<long long, 7> >") #1
+declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i64>)"(i32, <7 x i64>) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32, %"class.RWStructuredBuffer<vector<bool, 7> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<bool, 7> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<bool, 7> >") #1
+declare <7 x i1> @"dx.hl.op.rn.<7 x i1> (i32, <7 x i1>, <7 x i1>)"(i32, <7 x i1>, <7 x i1>) #1
+declare <7 x i64> @"dx.hl.op.rn.<7 x i64> (i32, <7 x i1>, <7 x i64>, <7 x i64>)"(i32, <7 x i1>, <7 x i64>, <7 x i64>) #1
+declare float @"dx.hl.op.rn.float (i32, <7 x float>, <7 x float>)"(i32, <7 x float>, <7 x float>) #1
+declare <7 x i32> @"dx.hl.op.rn.<7 x i32> (i32, <7 x i32>, <7 x i32>)"(i32, <7 x i32>, <7 x i32>) #1
+declare <7 x double>* @"dx.hl.subscript.[].rn.<7 x double>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32, %"class.RWStructuredBuffer<vector<double, 7> >") #1
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<vector<double, 7> >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer<vector<double, 7> >") #1
+declare <7 x double> @"dx.hl.op.rn.<7 x double> (i32, <7 x double>, <7 x double>, <7 x double>)"(i32, <7 x double>, <7 x double>, <7 x double>) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!pauseresume = !{!1}
+!dx.version = !{!3}
+!dx.valver = !{!3}
+!dx.shaderModel = !{!4}
+!dx.typeAnnotations = !{!5, !36}
+!dx.entryPoints = !{!40}
+!dx.fnprops = !{!52}
+!dx.options = !{!53, !54}
+
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!3 = !{i32 1, i32 9}
+!4 = !{!"cs", i32 6, i32 9}
+!5 = !{i32 0, %"class.RWStructuredBuffer<vector<half, 7> >" undef, !6, %"class.RWStructuredBuffer<vector<float, 7> >" undef, !11, %"class.RWStructuredBuffer<vector<double, 7> >" undef, !16, %"class.RWStructuredBuffer<vector<bool, 7> >" undef, !21, %"class.RWStructuredBuffer<vector<unsigned int, 7> >" undef, !26, %"class.RWStructuredBuffer<vector<long long, 7> >" undef, !31}
+!6 = !{i32 14, !7, !8}
+!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 8, i32 13, i32 7}
+!8 = !{i32 0, !9}
+!9 = !{!10}
+!10 = !{i32 0, <7 x half> undef}
+!11 = !{i32 28, !12, !13}
+!12 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9, i32 13, i32 7}
+!13 = !{i32 0, !14}
+!14 = !{!15}
+!15 = !{i32 0, <7 x float> undef}
+!16 = !{i32 56, !17, !18}
+!17 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 10, i32 13, i32 7}
+!18 = !{i32 0, !19}
+!19 = !{!20}
+!20 = !{i32 0, <7 x double> undef}
+!21 = !{i32 28, !22, !23}
+!22 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 1, i32 13, i32 7}
+!23 = !{i32 0, !24}
+!24 = !{!25}
+!25 = !{i32 0, <7 x i1> undef}
+!26 = !{i32 28, !27, !28}
+!27 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5, i32 13, i32 7}
+!28 = !{i32 0, !29}
+!29 = !{!30}
+!30 = !{i32 0, <7 x i32> undef}
+!31 = !{i32 56, !32, !33}
+!32 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 6, i32 13, i32 7}
+!33 = !{i32 0, !34}
+!34 = !{!35}
+!35 = !{i32 0, <7 x i64> undef}
+!36 = !{i32 1, void ()* @main, !37}
+!37 = !{!38}
+!38 = !{i32 1, !39, !39}
+!39 = !{}
+!40 = !{void ()* @main, !"main", null, !41, null}
+!41 = !{null, !42, null, null}
+!42 = !{!43, !45, !47, !49, !50, !51}
+!43 = !{i32 0, %"class.RWStructuredBuffer<vector<half, 7> >"* @"\01?hBuf@@3V?$RWStructuredBuffer@V?$vector@$f16@$06@@@@A", !"hBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !44}
+!44 = !{i32 1, i32 14}
+!45 = !{i32 1, %"class.RWStructuredBuffer<vector<float, 7> >"* @"\01?fBuf@@3V?$RWStructuredBuffer@V?$vector@M$06@@@@A", !"fBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46}
+!46 = !{i32 1, i32 28}
+!47 = !{i32 2, %"class.RWStructuredBuffer<vector<double, 7> >"* @"\01?dBuf@@3V?$RWStructuredBuffer@V?$vector@N$06@@@@A", !"dBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !48}
+!48 = !{i32 1, i32 56}
+!49 = !{i32 3, %"class.RWStructuredBuffer<vector<bool, 7> >"* @"\01?bBuf@@3V?$RWStructuredBuffer@V?$vector@_N$06@@@@A", !"bBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46}
+!50 = !{i32 4, %"class.RWStructuredBuffer<vector<unsigned int, 7> >"* @"\01?uBuf@@3V?$RWStructuredBuffer@V?$vector@I$06@@@@A", !"uBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !46}
+!51 = !{i32 5, %"class.RWStructuredBuffer<vector<long long, 7> >"* @"\01?lBuf@@3V?$RWStructuredBuffer@V?$vector@_J$06@@@@A", !"lBuf", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !48}
+!52 = !{void ()* @main, i32 5, i32 8, i32 1, i32 1}
+!53 = !{i32 0}
+!54 = !{i32 -1}
+!59 = !{!60, !60, i64 0}
+!60 = !{!"omnipotent char", !61, i64 0}
+!61 = !{!"Simple C/C++ TBAA"}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll
new file mode 100644
index 0000000000..f9a9b3d677
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-load-stores-scalarizevecldst.ll
@@ -0,0 +1,478 @@
+; RUN: %dxopt %s -hlsl-passes-resume -hlsl-dxil-scalarize-vector-load-stores -S | FileCheck %s
+
+; Verify that scalarize vector load stores pass will convert raw buffer vector operations
+; into the equivalent collection of scalar load store calls.
+; Sourced from buffer-load-stors-sm69.hlsl.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.ResRet.v17f32 = type { <17 x float>, i32 }
+%struct.ByteAddressBuffer = type { i32 }
+%"class.StructuredBuffer<vector<float, 17> >" = type { <17 x float> }
+%struct.RWByteAddressBuffer = type { i32 }
+%"class.RWStructuredBuffer<vector<float, 17> >" = type { <17 x float> }
+%"class.ConsumeStructuredBuffer<vector<float, 17> >" = type { <17 x float> }
+%"class.AppendStructuredBuffer<vector<float, 17> >" = type { <17 x float> }
+
+@"\01?RoByBuf@@3UByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4
+@"\01?RwByBuf@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4
+@"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4
+@"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4
+@"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4
+@"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A" = external constant %dx.types.Handle, align 4
+
+define void @main() {
+bb:
+  %tmp = load %dx.types.Handle, %dx.types.Handle* @"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A", align 4
+  %tmp1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RoByBuf@@3UByteAddressBuffer@@A", align 4
+  %tmp2 = load %dx.types.Handle, %dx.types.Handle* @"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4
+  %tmp3 = load %dx.types.Handle, %dx.types.Handle* @"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4
+  %tmp4 = load %dx.types.Handle, %dx.types.Handle* @"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A", align 4
+  %tmp5 = load %dx.types.Handle, %dx.types.Handle* @"\01?RwByBuf@@3URWByteAddressBuffer@@A", align 4
+  %tmp6 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %tmp7 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp5)
+  %tmp8 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix1]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix2:%.*]] = add i32 [[ix1]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix2]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix3:%.*]] = add i32 [[ix2]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix3]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix4:%.*]] = add i32 [[ix3]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp8, i32 [[ix4]], i32 undef, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp9 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, i32 4)
+  %tmp10 = extractvalue %dx.types.ResRet.v17f32 %tmp9, 0
+  %tmp11 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp1)
+  %tmp12 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp11, %dx.types.ResourceProperties { i32 11, i32 0 })
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 %tmp6, i32 undef, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix1]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix2:%.*]] = add i32 [[ix1]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix2]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix3:%.*]] = add i32 [[ix2]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix3]], i32 undef, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ix4:%.*]] = add i32 [[ix3]], 16
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp12, i32 [[ix4]], i32 undef, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp13 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp12, i32 %tmp6, i32 undef, i32 4)
+  %tmp14 = extractvalue %dx.types.ResRet.v17f32 %tmp13, 0
+  %tmp15 = fadd fast <17 x float> %tmp14, %tmp10
+
+  ; CHECK: [[val0:%.*]] = extractelement <17 x float> %tmp15, i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <17 x float> %tmp15, i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <17 x float> %tmp15, i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <17 x float> %tmp15, i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4)
+  ; CHECK: [[ix1:%.*]] = add i32 %tmp6, 16
+  ; CHECK: [[val4:%.*]] = extractelement <17 x float> %tmp15, i64 4
+  ; CHECK: [[val5:%.*]] = extractelement <17 x float> %tmp15, i64 5
+  ; CHECK: [[val6:%.*]] = extractelement <17 x float> %tmp15, i64 6
+  ; CHECK: [[val7:%.*]] = extractelement <17 x float> %tmp15, i64 7
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix1]], i32 undef, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4)
+  ; CHECK: [[ix2:%.*]] = add i32 %80, 16
+  ; CHECK: [[val8:%.*]] = extractelement <17 x float> %tmp15, i64 8
+  ; CHECK: [[val9:%.*]] = extractelement <17 x float> %tmp15, i64 9
+  ; CHECK: [[val10:%.*]] = extractelement <17 x float> %tmp15, i64 10
+  ; CHECK: [[val11:%.*]] = extractelement <17 x float> %tmp15, i64 11
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix2]], i32 undef, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4)
+  ; CHECK: [[ix3:%.*]] = add i32 %85, 16
+  ; CHECK: [[val12:%.*]] = extractelement <17 x float> %tmp15, i64 12
+  ; CHECK: [[val13:%.*]] = extractelement <17 x float> %tmp15, i64 13
+  ; CHECK: [[val14:%.*]] = extractelement <17 x float> %tmp15, i64 14
+  ; CHECK: [[val15:%.*]] = extractelement <17 x float> %tmp15, i64 15
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix3]], i32 undef, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4)
+  ; CHECK: [[ix4:%.*]] = add i32 %90, 16
+  ; CHECK: [[val16:%.*]] = extractelement <17 x float> %tmp15, i64 16
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp8, i32 [[ix4]], i32 undef, float [[val16]], float undef, float undef, float undef, i8 1, i32 4)
+  call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp8, i32 %tmp6, i32 undef, <17 x float> %tmp15, i32 4)
+  %tmp16 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp4)
+  %tmp17 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp16, %dx.types.ResourceProperties { i32 4108, i32 68 })
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 16, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 32, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 48, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp6, i32 64, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp18 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, i32 4)
+  %tmp19 = extractvalue %dx.types.ResRet.v17f32 %tmp18, 0
+  %tmp20 = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 1, i8 0, i32 undef)
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 16, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 32, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 48, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp17, i32 %tmp20, i32 64, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp21 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp17, i32 %tmp20, i32 0, i32 4)
+  %tmp22 = extractvalue %dx.types.ResRet.v17f32 %tmp21, 0
+  %tmp23 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp)
+  %tmp24 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp23, %dx.types.ResourceProperties { i32 12, i32 68 })
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 16, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 32, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 48, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp6, i32 64, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp25 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp24, i32 %tmp6, i32 0, i32 4)
+  %tmp26 = extractvalue %dx.types.ResRet.v17f32 %tmp25, 0
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 16, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 32, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 48, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp24, i32 %tmp20, i32 64, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp27 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp24, i32 %tmp20, i32 0, i32 4)
+  %tmp28 = extractvalue %dx.types.ResRet.v17f32 %tmp27, 0
+  %tmp29 = fadd fast <17 x float> %tmp22, %tmp19
+  %tmp30 = fadd fast <17 x float> %tmp29, %tmp26
+  %tmp31 = fadd fast <17 x float> %tmp30, %tmp28
+
+  ; CHECK: [[val0:%.*]] = extractelement <17 x float> %tmp31, i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <17 x float> %tmp31, i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <17 x float> %tmp31, i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <17 x float> %tmp31, i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractelement <17 x float> %tmp31, i64 4
+  ; CHECK: [[val5:%.*]] = extractelement <17 x float> %tmp31, i64 5
+  ; CHECK: [[val6:%.*]] = extractelement <17 x float> %tmp31, i64 6
+  ; CHECK: [[val7:%.*]] = extractelement <17 x float> %tmp31, i64 7
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 16, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractelement <17 x float> %tmp31, i64 8
+  ; CHECK: [[val9:%.*]] = extractelement <17 x float> %tmp31, i64 9
+  ; CHECK: [[val10:%.*]] = extractelement <17 x float> %tmp31, i64 10
+  ; CHECK: [[val11:%.*]] = extractelement <17 x float> %tmp31, i64 11
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 32, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractelement <17 x float> %tmp31, i64 12
+  ; CHECK: [[val13:%.*]] = extractelement <17 x float> %tmp31, i64 13
+  ; CHECK: [[val14:%.*]] = extractelement <17 x float> %tmp31, i64 14
+  ; CHECK: [[val15:%.*]] = extractelement <17 x float> %tmp31, i64 15
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 48, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4)
+  ; CHECK: [[val16:%.*]] = extractelement <17 x float> %tmp31, i64 16
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp17, i32 %tmp6, i32 64, float [[val16]], float undef, float undef, float undef, i8 1, i32 4)
+  call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp17, i32 %tmp6, i32 0, <17 x float> %tmp31, i32 4)
+  %tmp32 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp3)
+  %tmp33 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp32, %dx.types.ResourceProperties { i32 36876, i32 68 })
+  %tmp34 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %tmp33, i8 -1)
+
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 0, i8 15, i32 4)
+  ; CHECK: [[val0:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val1:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val2:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val3:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 16, i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val5:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val6:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val7:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 32, i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val9:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val10:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val11:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 48, i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[val13:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 1
+  ; CHECK: [[val14:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 2
+  ; CHECK: [[val15:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 3
+  ; CHECK: [[ld:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp33, i32 %tmp34, i32 64, i8 1, i32 4)
+  ; CHECK: [[val16:%.*]] = extractvalue %dx.types.ResRet.f32 [[ld]], 0
+  ; CHECK: [[vec0:%.*]] = insertelement <17 x float> undef, float [[val0]], i64 0
+  ; CHECK: [[vec1:%.*]] = insertelement <17 x float> [[vec0]], float [[val1]], i64 1
+  ; CHECK: [[vec2:%.*]] = insertelement <17 x float> [[vec1]], float [[val2]], i64 2
+  ; CHECK: [[vec3:%.*]] = insertelement <17 x float> [[vec2]], float [[val3]], i64 3
+  ; CHECK: [[vec4:%.*]] = insertelement <17 x float> [[vec3]], float [[val4]], i64 4
+  ; CHECK: [[vec5:%.*]] = insertelement <17 x float> [[vec4]], float [[val5]], i64 5
+  ; CHECK: [[vec6:%.*]] = insertelement <17 x float> [[vec5]], float [[val6]], i64 6
+  ; CHECK: [[vec7:%.*]] = insertelement <17 x float> [[vec6]], float [[val7]], i64 7
+  ; CHECK: [[vec8:%.*]] = insertelement <17 x float> [[vec7]], float [[val8]], i64 8
+  ; CHECK: [[vec9:%.*]] = insertelement <17 x float> [[vec8]], float [[val9]], i64 9
+  ; CHECK: [[vec10:%.*]] = insertelement <17 x float> [[vec9]], float [[val10]], i64 10
+  ; CHECK: [[vec11:%.*]] = insertelement <17 x float> [[vec10]], float [[val11]], i64 11
+  ; CHECK: [[vec12:%.*]] = insertelement <17 x float> [[vec11]], float [[val12]], i64 12
+  ; CHECK: [[vec13:%.*]] = insertelement <17 x float> [[vec12]], float [[val13]], i64 13
+  ; CHECK: [[vec14:%.*]] = insertelement <17 x float> [[vec13]], float [[val14]], i64 14
+  ; CHECK: [[vec15:%.*]] = insertelement <17 x float> [[vec14]], float [[val15]], i64 15
+  ; CHECK: [[vec16:%.*]] = insertelement <17 x float> [[vec15]], float [[val16]], i64 16
+  %tmp35 = call %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32 303, %dx.types.Handle %tmp33, i32 %tmp34, i32 0, i32 4)
+  %tmp36 = extractvalue %dx.types.ResRet.v17f32 %tmp35, 0
+  %tmp37 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %tmp2)
+  %tmp38 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp37, %dx.types.ResourceProperties { i32 36876, i32 68 })
+  %tmp39 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %tmp38, i8 1)
+
+  ; CHECK: [[val0:%.*]] = extractelement <17 x float> [[vec16]], i64 0
+  ; CHECK: [[val1:%.*]] = extractelement <17 x float> [[vec16]], i64 1
+  ; CHECK: [[val2:%.*]] = extractelement <17 x float> [[vec16]], i64 2
+  ; CHECK: [[val3:%.*]] = extractelement <17 x float> [[vec16]], i64 3
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 0, float [[val0]], float [[val1]], float [[val2]], float [[val3]], i8 15, i32 4)
+  ; CHECK: [[val4:%.*]] = extractelement <17 x float> [[vec16]], i64 4
+  ; CHECK: [[val5:%.*]] = extractelement <17 x float> [[vec16]], i64 5
+  ; CHECK: [[val6:%.*]] = extractelement <17 x float> [[vec16]], i64 6
+  ; CHECK: [[val7:%.*]] = extractelement <17 x float> [[vec16]], i64 7
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 16, float [[val4]], float [[val5]], float [[val6]], float [[val7]], i8 15, i32 4)
+  ; CHECK: [[val8:%.*]] = extractelement <17 x float> [[vec16]], i64 8
+  ; CHECK: [[val9:%.*]] = extractelement <17 x float> [[vec16]], i64 9
+  ; CHECK: [[val10:%.*]] = extractelement <17 x float> [[vec16]], i64 10
+  ; CHECK: [[val11:%.*]] = extractelement <17 x float> [[vec16]], i64 11
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 32, float [[val8]], float [[val9]], float [[val10]], float [[val11]], i8 15, i32 4)
+  ; CHECK: [[val12:%.*]] = extractelement <17 x float> [[vec16]], i64 12
+  ; CHECK: [[val13:%.*]] = extractelement <17 x float> [[vec16]], i64 13
+  ; CHECK: [[val14:%.*]] = extractelement <17 x float> [[vec16]], i64 14
+  ; CHECK: [[val15:%.*]] = extractelement <17 x float> [[vec16]], i64 15
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 48, float [[val12]], float [[val13]], float [[val14]], float [[val15]], i8 15, i32 4)
+  ; CHECK: [[val16:%.*]] = extractelement <17 x float> [[vec16]], i64 16
+  ; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp38, i32 %tmp39, i32 64, float [[val16]], float undef, float undef, float undef, i8 1, i32 4)
+  call void @dx.op.rawBufferVectorStore.v17f32(i32 304, %dx.types.Handle %tmp38, i32 %tmp39, i32 0, <17 x float> %tmp36, i32 4)
+  ret void
+}
+
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #0
+declare %dx.types.ResRet.v17f32 @dx.op.rawBufferVectorLoad.v17f32(i32, %dx.types.Handle, i32, i32, i32) #1
+declare void @dx.op.rawBufferVectorStore.v17f32(i32, %dx.types.Handle, i32, i32, <17 x float>, i32) #2
+declare i32 @dx.op.bufferUpdateCounter(i32, %dx.types.Handle, i8) #2
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #0
+declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind }
+
+!dx.version = !{!1}
+!dx.valver = !{!1}
+!dx.shaderModel = !{!2}
+!dx.resources = !{!3}
+!dx.typeAnnotations = !{!13}
+!dx.entryPoints = !{!17, !19}
+
+!1 = !{i32 1, i32 8}
+!2 = !{!"lib", i32 6, i32 8}
+!3 = !{!4, !8, null, null}
+!4 = !{!5, !6}
+!5 = !{i32 0, %struct.ByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?RoByBuf@@3UByteAddressBuffer@@A" to %struct.ByteAddressBuffer*), !"RoByBuf", i32 0, i32 1, i32 1, i32 11, i32 0, null}
+!6 = !{i32 1, %"class.StructuredBuffer<vector<float, 17> >"* bitcast (%dx.types.Handle* @"\01?RoStBuf@@3V?$StructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.StructuredBuffer<vector<float, 17> >"*), !"RoStBuf", i32 0, i32 2, i32 1, i32 12, i32 0, !7}
+!7 = !{i32 1, i32 68}
+!8 = !{!9, !10, !11, !12}
+!9 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?RwByBuf@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"RwByBuf", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!10 = !{i32 1, %"class.RWStructuredBuffer<vector<float, 17> >"* bitcast (%dx.types.Handle* @"\01?RwStBuf@@3V?$RWStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.RWStructuredBuffer<vector<float, 17> >"*), !"RwStBuf", i32 0, i32 2, i32 1, i32 12, i1 false, i1 false, i1 false, !7}
+!11 = !{i32 2, %"class.ConsumeStructuredBuffer<vector<float, 17> >"* bitcast (%dx.types.Handle* @"\01?CnStBuf@@3V?$ConsumeStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.ConsumeStructuredBuffer<vector<float, 17> >"*), !"CnStBuf", i32 0, i32 4, i32 1, i32 12, i1 false, i1 true, i1 false, !7}
+!12 = !{i32 3, %"class.AppendStructuredBuffer<vector<float, 17> >"* bitcast (%dx.types.Handle* @"\01?ApStBuf@@3V?$AppendStructuredBuffer@V?$vector@M$0BB@@@@@A" to %"class.AppendStructuredBuffer<vector<float, 17> >"*), !"ApStBuf", i32 0, i32 5, i32 1, i32 12, i1 false, i1 true, i1 false, !7}
+!13 = !{i32 1, void ()* @main, !14}
+!14 = !{!15}
+!15 = !{i32 0, !16, !16}
+!16 = !{}
+!17 = !{null, !"", null, !3, !18}
+!18 = !{i32 0, i64 8589934608}
+!19 = !{void ()* @main, !"main", !20, null, !24}
+!20 = !{!21, null, null}
+!21 = !{!22}
+!22 = !{i32 0, !"IX", i8 5, i8 0, !23, i8 0, i32 2, i8 1, i32 0, i8 0, null}
+!23 = !{i32 0, i32 1}
+!24 = !{i32 8, i32 1, i32 5, !25}
+!25 = !{i32 0}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll
new file mode 100644
index 0000000000..1fe7c17621
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-scalarizer.ll
@@ -0,0 +1,660 @@
+; RUN: %dxopt %s -scalarizer -S | FileCheck %s
+
+; Vectors of length greather than 1 should get no changes from scalarizer,
+; so this unusual test, verifies that the pass makes no changes at all.
+; Still justified because prior to 6.9, many changes would result.
+; Compiled mostly for float7 vectors with int7 for the integer specific parts.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%"class.RWStructuredBuffer<float>" = type { float }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
+
+@"\01?buf@@3PAV?$RWStructuredBuffer@M@@A" = external global [7 x %"class.RWStructuredBuffer<float>"], align 4
+@llvm.used = appending global [1 x i8*] [i8* bitcast ([7 x %"class.RWStructuredBuffer<float>"]* @"\01?buf@@3PAV?$RWStructuredBuffer@M@@A" to i8*)], section "llvm.metadata"
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?assignments
+define void @"\01?assignments@@YAXY09$$CAV?$vector@M$06@@@Z"([10 x <7 x float>]* noalias %things) #0 {
+bb:
+  %tmp = load %"class.RWStructuredBuffer<float>", %"class.RWStructuredBuffer<float>"* getelementptr inbounds ([7 x %"class.RWStructuredBuffer<float>"], [7 x %"class.RWStructuredBuffer<float>"]* @"\01?buf@@3PAV?$RWStructuredBuffer@M@@A", i32 0, i32 0)
+  %tmp1 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<float>"(i32 160, %"class.RWStructuredBuffer<float>" %tmp)
+  %tmp2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+
+  ; CHECK: [[buf:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 1, i32 0, i8 1, i32 4)
+  ; CHECK: [[val:%.*]] = extractvalue %dx.types.ResRet.f32 [[buf]], 0
+  ; CHECK: [[vec:%.*]] = insertelement <7 x float> undef, float [[val]], i32 0
+  ; CHECK: [[res0:%.*]] = shufflevector <7 x float> [[vec]], <7 x float> undef, <7 x i32> zeroinitializer
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0
+  ; CHECK: store <7 x float> [[res0]], <7 x float>* [[adr0]], align 4
+  %RawBufferLoad = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp2, i32 1, i32 0, i8 1, i32 4)
+  %tmp3 = extractvalue %dx.types.ResRet.f32 %RawBufferLoad, 0
+  %tmp4 = insertelement <7 x float> undef, float %tmp3, i32 0
+  %tmp5 = shufflevector <7 x float> %tmp4, <7 x float> undef, <7 x i32> zeroinitializer
+  %tmp6 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0
+  store <7 x float> %tmp5, <7 x float>* %tmp6, align 4
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4
+  ; CHECK: [[res1:%.*]] = fadd fast <7 x float> [[ld1]], [[ld5]]
+  ; CHECK: store <7 x float> [[res1]], <7 x float>* [[adr1]], align 4
+  %tmp7 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5
+  %tmp8 = load <7 x float>, <7 x float>* %tmp7, align 4
+  %tmp9 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 1
+  %tmp10 = load <7 x float>, <7 x float>* %tmp9, align 4
+  %tmp11 = fadd fast <7 x float> %tmp10, %tmp8
+  store <7 x float> %tmp11, <7 x float>* %tmp9, align 4
+
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x float>, <7 x float>* [[adr6]], align 4
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[res2:%.*]] = fsub fast <7 x float> [[ld2]], [[ld6]]
+  ; CHECK: store <7 x float> [[res2]], <7 x float>* [[adr2]], align 4
+  %tmp12 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 6
+  %tmp13 = load <7 x float>, <7 x float>* %tmp12, align 4
+  %tmp14 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2
+  %tmp15 = load <7 x float>, <7 x float>* %tmp14, align 4
+  %tmp16 = fsub fast <7 x float> %tmp15, %tmp13
+  store <7 x float> %tmp16, <7 x float>* %tmp14, align 4
+
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <7 x float>, <7 x float>* [[adr7]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4
+  ; CHECK: [[res3:%.*]] = fmul fast <7 x float> [[ld3]], [[ld7]]
+  ; CHECK: store <7 x float> [[res3]], <7 x float>* [[adr3]], align 4
+  %tmp17 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 7
+  %tmp18 = load <7 x float>, <7 x float>* %tmp17, align 4
+  %tmp19 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 3
+  %tmp20 = load <7 x float>, <7 x float>* %tmp19, align 4
+  %tmp21 = fmul fast <7 x float> %tmp20, %tmp18
+  store <7 x float> %tmp21, <7 x float>* %tmp19, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <7 x float>, <7 x float>* [[adr8]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4
+  ; CHECK: [[res4:%.*]] = fdiv fast <7 x float> [[ld4]], [[ld8]]
+  ; CHECK: store <7 x float> [[res4]], <7 x float>* [[adr4]], align 4
+  %tmp22 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 8
+  %tmp23 = load <7 x float>, <7 x float>* %tmp22, align 4
+  %tmp24 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 4
+  %tmp25 = load <7 x float>, <7 x float>* %tmp24, align 4
+  %tmp26 = fdiv fast <7 x float> %tmp25, %tmp23
+  store <7 x float> %tmp26, <7 x float>* %tmp24, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load <7 x float>, <7 x float>* [[adr9]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[res5:%.*]] = frem fast <7 x float> [[ld5]], [[ld9]]
+  ; CHECK: store <7 x float> [[res5]], <7 x float>* [[adr5]], align 4
+  %tmp27 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 9
+  %tmp28 = load <7 x float>, <7 x float>* %tmp27, align 4
+  %tmp29 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 5
+  %tmp30 = load <7 x float>, <7 x float>* %tmp29, align 4
+  %tmp31 = frem fast <7 x float> %tmp30, %tmp28
+  store <7 x float> %tmp31, <7 x float>* %tmp29, align 4
+
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?arithmetic
+define void @"\01?arithmetic@@YA$$BY0L@V?$vector@M$06@@Y0L@$$CAV1@@Z"([11 x <7 x float>]* noalias sret %agg.result, [11 x <7 x float>]* noalias %things) #0 {
+bb:
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4
+  ; CHECK: [[res0:%.*]] = fsub fast <7 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[ld0]]
+  %tmp = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0
+  %tmp1 = load <7 x float>, <7 x float>* %tmp, align 4
+  %tmp2 = fsub fast <7 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp1
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[res1:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4
+  %tmp3 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 0
+  %tmp4 = load <7 x float>, <7 x float>* %tmp3, align 4
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[res2:%.*]] = fadd fast <7 x float> [[ld1]], [[ld2]]
+  %tmp5 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 1
+  %tmp6 = load <7 x float>, <7 x float>* %tmp5, align 4
+  %tmp7 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2
+  %tmp8 = load <7 x float>, <7 x float>* %tmp7, align 4
+  %tmp9 = fadd fast <7 x float> %tmp6, %tmp8
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4
+  ; CHECK: [[res3:%.*]] = fsub fast <7 x float> [[ld2]], [[ld3]]
+  %tmp10 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 2
+  %tmp11 = load <7 x float>, <7 x float>* %tmp10, align 4
+  %tmp12 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3
+  %tmp13 = load <7 x float>, <7 x float>* %tmp12, align 4
+  %tmp14 = fsub fast <7 x float> %tmp11, %tmp13
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4
+  ; CHECK: [[res4:%.*]] = fmul fast <7 x float> [[ld3]], [[ld4]]
+  %tmp15 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 3
+  %tmp16 = load <7 x float>, <7 x float>* %tmp15, align 4
+  %tmp17 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4
+  %tmp18 = load <7 x float>, <7 x float>* %tmp17, align 4
+  %tmp19 = fmul fast <7 x float> %tmp16, %tmp18
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[res5:%.*]] = fdiv fast <7 x float> [[ld4]], [[ld5]]
+  %tmp20 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 4
+  %tmp21 = load <7 x float>, <7 x float>* %tmp20, align 4
+  %tmp22 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5
+  %tmp23 = load <7 x float>, <7 x float>* %tmp22, align 4
+  %tmp24 = fdiv fast <7 x float> %tmp21, %tmp23
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x float>, <7 x float>* [[adr6]], align 4
+  ; CHECK: [[res6:%.*]] = frem fast <7 x float> [[ld5]], [[ld6]]
+  %tmp25 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 5
+  %tmp26 = load <7 x float>, <7 x float>* %tmp25, align 4
+  %tmp27 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 6
+  %tmp28 = load <7 x float>, <7 x float>* %tmp27, align 4
+  %tmp29 = frem fast <7 x float> %tmp26, %tmp28
+
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <7 x float>, <7 x float>* [[adr7]], align 4
+  ; CHECK: [[res7:%.*]] = fadd fast <7 x float> [[ld7]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  ; CHECK: store <7 x float> [[res7]], <7 x float>* [[adr7]], align 4
+  %tmp30 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 7
+  %tmp31 = load <7 x float>, <7 x float>* %tmp30, align 4
+  %tmp32 = fadd fast <7 x float> %tmp31, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  store <7 x float> %tmp32, <7 x float>* %tmp30, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <7 x float>, <7 x float>* [[adr8]], align 4
+  ; CHECK: [[res8:%.*]] = fadd fast <7 x float> [[ld8]], <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  ; CHECK: store <7 x float> [[res8]], <7 x float>* [[adr8]], align 4
+  %tmp33 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 8
+  %tmp34 = load <7 x float>, <7 x float>* %tmp33, align 4
+  %tmp35 = fadd fast <7 x float> %tmp34, <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  store <7 x float> %tmp35, <7 x float>* %tmp33, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load <7 x float>, <7 x float>* [[adr9]], align 4
+  ; CHECK: [[res9:%.*]] = fadd fast <7 x float> [[ld9]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  ; CHECK: store <7 x float> [[res9]], <7 x float>* [[adr9]], align 4
+  %tmp36 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 9
+  %tmp37 = load <7 x float>, <7 x float>* %tmp36, align 4
+  %tmp38 = fadd fast <7 x float> %tmp37, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  store <7 x float> %tmp38, <7 x float>* %tmp36, align 4
+
+  ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 10
+  ; CHECK: [[ld10:%.*]] = load <7 x float>, <7 x float>* [[adr10]], align 4
+  ; CHECK: [[res10:%.*]] = fadd fast <7 x float> [[ld10]], <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  ; CHECK: store <7 x float> [[res10]], <7 x float>* [[adr10]], align 4
+  %tmp39 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %things, i32 0, i32 10
+  %tmp40 = load <7 x float>, <7 x float>* %tmp39, align 4
+  %tmp41 = fadd fast <7 x float> %tmp40, <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  store <7 x float> %tmp41, <7 x float>* %tmp39, align 4
+
+  %tmp42 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 0
+  store <7 x float> %tmp2, <7 x float>* %tmp42
+  %tmp43 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 1
+  store <7 x float> %tmp4, <7 x float>* %tmp43
+  %tmp44 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 2
+  store <7 x float> %tmp9, <7 x float>* %tmp44
+  %tmp45 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 3
+  store <7 x float> %tmp14, <7 x float>* %tmp45
+  %tmp46 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 4
+  store <7 x float> %tmp19, <7 x float>* %tmp46
+  %tmp47 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 5
+  store <7 x float> %tmp24, <7 x float>* %tmp47
+  %tmp48 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 6
+  store <7 x float> %tmp29, <7 x float>* %tmp48
+  %tmp49 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 7
+  store <7 x float> %tmp31, <7 x float>* %tmp49
+  %tmp50 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 8
+  store <7 x float> %tmp34, <7 x float>* %tmp50
+  %tmp51 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 9
+  store <7 x float> %tmp38, <7 x float>* %tmp51
+  %tmp52 = getelementptr inbounds [11 x <7 x float>], [11 x <7 x float>]* %agg.result, i32 0, i32 10
+  store <7 x float> %tmp41, <7 x float>* %tmp52
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?logic
+define void @"\01?logic@@YA$$BY09V?$vector@_N$06@@Y09V1@Y09V?$vector@M$06@@@Z"([10 x <7 x i32>]* noalias sret %agg.result, [10 x <7 x i32>]* %truth, [10 x <7 x float>]* %consequences) #0 {
+bb:
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <7 x i32>, <7 x i32>* [[adr0]], align 4
+  ; CHECK: [[nres0:%.*]] = icmp ne <7 x i32> [[ld0]], zeroinitializer
+  ; CHECK: [[bres0:%.*]] = icmp eq <7 x i1> [[nres0:%.*]], zeroinitializer
+  ; CHECK: [[res0:%.*]] = zext <7 x i1> [[bres0]] to <7 x i32>
+  %tmp = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 0
+  %tmp1 = load <7 x i32>, <7 x i32>* %tmp, align 4
+  %tmp2 = icmp ne <7 x i32> %tmp1, zeroinitializer
+  %tmp3 = icmp eq <7 x i1> %tmp2, zeroinitializer
+  %tmp4 = zext <7 x i1> %tmp3 to <7 x i32>
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x i32>, <7 x i32>* [[adr1]], align 4
+  ; CHECK: [[bld1:%.*]] = icmp ne <7 x i32> [[ld1]], zeroinitializer
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x i32>, <7 x i32>* [[adr2]], align 4
+  ; CHECK: [[bld2:%.*]] = icmp ne <7 x i32> [[ld2]], zeroinitializer
+  ; CHECK: [[bres1:%.*]] = or <7 x i1> [[bld1]], [[bld2]]
+  ; CHECK: [[res1:%.*]] = zext <7 x i1> [[bres1]] to <7 x i32>
+  %tmp5 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 1
+  %tmp6 = load <7 x i32>, <7 x i32>* %tmp5, align 4
+  %tmp7 = icmp ne <7 x i32> %tmp6, zeroinitializer
+  %tmp8 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2
+  %tmp9 = load <7 x i32>, <7 x i32>* %tmp8, align 4
+  %tmp10 = icmp ne <7 x i32> %tmp9, zeroinitializer
+  %tmp11 = or <7 x i1> %tmp7, %tmp10
+  %tmp12 = zext <7 x i1> %tmp11 to <7 x i32>
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x i32>, <7 x i32>* [[adr2]], align 4
+  ; CHECK: [[bld2:%.*]] = icmp ne <7 x i32> [[ld2]], zeroinitializer
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4
+  ; CHECK: [[bld3:%.*]] = icmp ne <7 x i32> [[ld3]], zeroinitializer
+  ; CHECK: [[bres2:%.*]] = and <7 x i1> [[bld2]], [[bld3]]
+  ; CHECK: [[res2:%.*]] = zext <7 x i1> [[bres2]] to <7 x i32>
+  %tmp13 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 2
+  %tmp14 = load <7 x i32>, <7 x i32>* %tmp13, align 4
+  %tmp15 = icmp ne <7 x i32> %tmp14, zeroinitializer
+  %tmp16 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3
+  %tmp17 = load <7 x i32>, <7 x i32>* %tmp16, align 4
+  %tmp18 = icmp ne <7 x i32> %tmp17, zeroinitializer
+  %tmp19 = and <7 x i1> %tmp15, %tmp18
+  %tmp20 = zext <7 x i1> %tmp19 to <7 x i32>
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4
+  ; CHECK: [[bld3:%.*]] = icmp ne <7 x i32> [[ld3]], zeroinitializer
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x i32>, <7 x i32>* [[adr4]], align 4
+  ; CHECK: [[bld4:%.*]] = icmp ne <7 x i32> [[ld4]], zeroinitializer
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x i32>, <7 x i32>* [[adr5]], align 4
+  ; CHECK: [[bld5:%.*]] = icmp ne <7 x i32> [[ld5]], zeroinitializer
+  ; CHECK: [[bres3:%.*]] = select <7 x i1> [[bld3]], <7 x i1> [[bld4]], <7 x i1> [[bld5]]
+  ; CHECK: [[res3:%.*]] = zext <7 x i1> [[bres3]] to <7 x i32>
+  %tmp21 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 3
+  %tmp22 = load <7 x i32>, <7 x i32>* %tmp21, align 4
+  %tmp23 = icmp ne <7 x i32> %tmp22, zeroinitializer
+  %tmp24 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 4
+  %tmp25 = load <7 x i32>, <7 x i32>* %tmp24, align 4
+  %tmp26 = icmp ne <7 x i32> %tmp25, zeroinitializer
+  %tmp27 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %truth, i32 0, i32 5
+  %tmp28 = load <7 x i32>, <7 x i32>* %tmp27, align 4
+  %tmp29 = icmp ne <7 x i32> %tmp28, zeroinitializer
+  %tmp30 = select <7 x i1> %tmp23, <7 x i1> %tmp26, <7 x i1> %tmp29
+  %tmp31 = zext <7 x i1> %tmp30 to <7 x i32>
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4
+  ; CHECK: [[bres1:%.*]] = fcmp fast oeq <7 x float> [[ld0]], [[ld1]]
+  ; CHECK: [[res1:%.*]] = zext <7 x i1> [[bres1]] to <7 x i32>
+  %tmp32 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 0
+  %tmp33 = load <7 x float>, <7 x float>* %tmp32, align 4
+  %tmp34 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1
+  %tmp35 = load <7 x float>, <7 x float>* %tmp34, align 4
+  %tmp36 = fcmp fast oeq <7 x float> %tmp33, %tmp35
+  %tmp37 = zext <7 x i1> %tmp36 to <7 x i32>
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x float>, <7 x float>* [[adr1]], align 4
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[bres2:%.*]] = fcmp fast une <7 x float> [[ld1]], [[ld2]]
+  ; CHECK: [[res2:%.*]] = zext <7 x i1> [[bres2]] to <7 x i32>
+  %tmp38 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 1
+  %tmp39 = load <7 x float>, <7 x float>* %tmp38, align 4
+  %tmp40 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2
+  %tmp41 = load <7 x float>, <7 x float>* %tmp40, align 4
+  %tmp42 = fcmp fast une <7 x float> %tmp39, %tmp41
+  %tmp43 = zext <7 x i1> %tmp42 to <7 x i32>
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4
+  ; CHECK: [[bres3:%.*]] = fcmp fast olt <7 x float> [[ld2]], [[ld3]]
+  ; CHECK: [[res3:%.*]] = zext <7 x i1> [[bres3]] to <7 x i32>
+  %tmp44 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 2
+  %tmp45 = load <7 x float>, <7 x float>* %tmp44, align 4
+  %tmp46 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3
+  %tmp47 = load <7 x float>, <7 x float>* %tmp46, align 4
+  %tmp48 = fcmp fast olt <7 x float> %tmp45, %tmp47
+  %tmp49 = zext <7 x i1> %tmp48 to <7 x i32>
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x float>, <7 x float>* [[adr3]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4
+  ; CHECK: [[bres4:%.*]] = fcmp fast ogt <7 x float> [[ld3]], [[ld4]]
+  ; CHECK: [[res4:%.*]] = zext <7 x i1> [[bres4]] to <7 x i32>
+  %tmp50 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 3
+  %tmp51 = load <7 x float>, <7 x float>* %tmp50, align 4
+  %tmp52 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4
+  %tmp53 = load <7 x float>, <7 x float>* %tmp52, align 4
+  %tmp54 = fcmp fast ogt <7 x float> %tmp51, %tmp53
+  %tmp55 = zext <7 x i1> %tmp54 to <7 x i32>
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x float>, <7 x float>* [[adr4]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[bres5:%.*]] = fcmp fast ole <7 x float> [[ld4]], [[ld5]]
+  ; CHECK: [[res5:%.*]] = zext <7 x i1> [[bres5]] to <7 x i32>
+  %tmp56 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 4
+  %tmp57 = load <7 x float>, <7 x float>* %tmp56, align 4
+  %tmp58 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5
+  %tmp59 = load <7 x float>, <7 x float>* %tmp58, align 4
+  %tmp60 = fcmp fast ole <7 x float> %tmp57, %tmp59
+  %tmp61 = zext <7 x i1> %tmp60 to <7 x i32>
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x float>, <7 x float>* [[adr5]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x float>, <7 x float>* [[adr6]], align 4
+  ; CHECK: [[bres6:%.*]] = fcmp fast oge <7 x float> [[ld5]], [[ld6]]
+  ; CHECK: [[res6:%.*]] = zext <7 x i1> [[bres6]] to <7 x i32>
+  %tmp62 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 5
+  %tmp63 = load <7 x float>, <7 x float>* %tmp62, align 4
+  %tmp64 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %consequences, i32 0, i32 6
+  %tmp65 = load <7 x float>, <7 x float>* %tmp64, align 4
+  %tmp66 = fcmp fast oge <7 x float> %tmp63, %tmp65
+  %tmp67 = zext <7 x i1> %tmp66 to <7 x i32>
+
+  %tmp68 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 0
+  store <7 x i32> %tmp4, <7 x i32>* %tmp68
+  %tmp69 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 1
+  store <7 x i32> %tmp12, <7 x i32>* %tmp69
+  %tmp70 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 2
+  store <7 x i32> %tmp20, <7 x i32>* %tmp70
+  %tmp71 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 3
+  store <7 x i32> %tmp31, <7 x i32>* %tmp71
+  %tmp72 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 4
+  store <7 x i32> %tmp37, <7 x i32>* %tmp72
+  %tmp73 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 5
+  store <7 x i32> %tmp43, <7 x i32>* %tmp73
+  %tmp74 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 6
+  store <7 x i32> %tmp49, <7 x i32>* %tmp74
+  %tmp75 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 7
+  store <7 x i32> %tmp55, <7 x i32>* %tmp75
+  %tmp76 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 8
+  store <7 x i32> %tmp61, <7 x i32>* %tmp76
+  %tmp77 = getelementptr inbounds [10 x <7 x i32>], [10 x <7 x i32>]* %agg.result, i32 0, i32 9
+  store <7 x i32> %tmp67, <7 x i32>* %tmp77
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?index
+define void @"\01?index@@YA$$BY09V?$vector@M$06@@Y09V1@H@Z"([10 x <7 x float>]* noalias sret %agg.result, [10 x <7 x float>]* %things, i32 %i) #0 {
+bb:
+  %res = alloca [10 x <7 x float>], align 4
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 0
+  ; CHECK: store <7 x float> zeroinitializer, <7 x float>* [[adr0]], align 4
+  %tmp1 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 0
+  store <7 x float> zeroinitializer, <7 x float>* %tmp1, align 4
+
+  ; CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 %i
+  ; CHECK: store <7 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <7 x float>* [[adri]], align 4
+  %tmp2 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 %i
+  store <7 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <7 x float>* %tmp2, align 4
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 2
+  ; CHECK: store <7 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <7 x float>* [[adr2]], align 4
+  %tmp3 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 2
+  store <7 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <7 x float>* %tmp3, align 4
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[res3:%.*]] = load <7 x float>, <7 x float>* [[adr0]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 3
+  ; CHECK: store <7 x float> [[res3]], <7 x float>* [[adr3]], align 4
+  %tmp4 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 0
+  %tmp5 = load <7 x float>, <7 x float>* %tmp4, align 4
+  %tmp6 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 3
+  store <7 x float> %tmp5, <7 x float>* %tmp6, align 4
+
+  ; CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 %i
+  ; CHECK: [[res4:%.*]] = load <7 x float>, <7 x float>* [[adri]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 4
+  ; CHECK: store <7 x float> [[res4]], <7 x float>* [[adr4]], align 4
+  %tmp7 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 %i
+  %tmp8 = load <7 x float>, <7 x float>* %tmp7, align 4
+  %tmp9 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 4
+  store <7 x float> %tmp8, <7 x float>* %tmp9, align 4
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[res5:%.*]] = load <7 x float>, <7 x float>* [[adr2]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 5
+  ; CHECK: store <7 x float> [[res5]], <7 x float>* [[adr5]], align 4
+  %tmp10 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %things, i32 0, i32 2
+  %tmp11 = load <7 x float>, <7 x float>* %tmp10, align 4
+  %tmp12 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 5
+  store <7 x float> %tmp11, <7 x float>* %tmp12, align 4
+
+  %tmp13 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 0
+  %tmp14 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 0
+  %tmp15 = load <7 x float>, <7 x float>* %tmp14
+  store <7 x float> %tmp15, <7 x float>* %tmp13
+
+  %tmp16 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 1
+  %tmp17 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 1
+  %tmp18 = load <7 x float>, <7 x float>* %tmp17
+  store <7 x float> %tmp18, <7 x float>* %tmp16
+
+  %tmp19 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 2
+  %tmp20 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 2
+  %tmp21 = load <7 x float>, <7 x float>* %tmp20
+  store <7 x float> %tmp21, <7 x float>* %tmp19
+
+  %tmp22 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 3
+  %tmp23 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 3
+  %tmp24 = load <7 x float>, <7 x float>* %tmp23
+  store <7 x float> %tmp24, <7 x float>* %tmp22
+
+  %tmp25 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 4
+  %tmp26 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 4
+  %tmp27 = load <7 x float>, <7 x float>* %tmp26
+  store <7 x float> %tmp27, <7 x float>* %tmp25
+
+  %tmp28 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 5
+  %tmp29 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 5
+  %tmp30 = load <7 x float>, <7 x float>* %tmp29
+  store <7 x float> %tmp30, <7 x float>* %tmp28
+
+  %tmp31 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 6
+  %tmp32 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 6
+  %tmp33 = load <7 x float>, <7 x float>* %tmp32
+  store <7 x float> %tmp33, <7 x float>* %tmp31
+
+  %tmp34 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 7
+  %tmp35 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 7
+  %tmp36 = load <7 x float>, <7 x float>* %tmp35
+  store <7 x float> %tmp36, <7 x float>* %tmp34
+
+  %tmp37 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 8
+  %tmp38 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 8
+  %tmp39 = load <7 x float>, <7 x float>* %tmp38
+  store <7 x float> %tmp39, <7 x float>* %tmp37
+
+  %tmp40 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %agg.result, i32 0, i32 9
+  %tmp41 = getelementptr inbounds [10 x <7 x float>], [10 x <7 x float>]* %res, i32 0, i32 9
+  %tmp42 = load <7 x float>, <7 x float>* %tmp41
+  store <7 x float> %tmp42, <7 x float>* %tmp40
+
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?bittwiddlers
+define void @"\01?bittwiddlers@@YAXY0L@$$CAV?$vector@I$06@@@Z"([11 x <7 x i32>]* noalias %things) #0 {
+bb:
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <7 x i32>, <7 x i32>* [[adr1]], align 4
+  ; CHECK: [[res0:%.*]] = xor <7 x i32> [[ld1]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 0
+  ; CHECK: store <7 x i32> [[res0]], <7 x i32>* [[adr0]], align 4
+  %tmp = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1
+  %tmp1 = load <7 x i32>, <7 x i32>* %tmp, align 4
+  %tmp2 = xor <7 x i32> %tmp1, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %tmp3 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 0
+  store <7 x i32> %tmp2, <7 x i32>* %tmp3, align 4
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <7 x i32>, <7 x i32>* [[adr2]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4
+  ; CHECK: [[res1:%.*]] = or <7 x i32> [[ld2]], [[ld3]]
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1
+  ; CHECK: store <7 x i32> [[res1]], <7 x i32>* [[adr1]], align 4
+  %tmp4 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2
+  %tmp5 = load <7 x i32>, <7 x i32>* %tmp4, align 4
+  %tmp6 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  %tmp7 = load <7 x i32>, <7 x i32>* %tmp6, align 4
+  %tmp8 = or <7 x i32> %tmp5, %tmp7
+  %tmp9 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 1
+  store <7 x i32> %tmp8, <7 x i32>* %tmp9, align 4
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <7 x i32>, <7 x i32>* [[adr3]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x i32>, <7 x i32>* [[adr4]], align 4
+  ; CHECK: [[res2:%.*]] = and <7 x i32> [[ld3]], [[ld4]]
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2
+  ; CHECK: store <7 x i32> [[res2]], <7 x i32>* [[adr2]], align 4
+  %tmp10 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  %tmp11 = load <7 x i32>, <7 x i32>* %tmp10, align 4
+  %tmp12 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  %tmp13 = load <7 x i32>, <7 x i32>* %tmp12, align 4
+  %tmp14 = and <7 x i32> %tmp11, %tmp13
+  %tmp15 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 2
+  store <7 x i32> %tmp14, <7 x i32>* %tmp15, align 4
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <7 x i32>, <7 x i32>* [[adr4]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x i32>, <7 x i32>* [[adr5]], align 4
+  ; CHECK: [[res3:%.*]] = xor <7 x i32> [[ld4]], [[ld5]]
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  ; CHECK: store <7 x i32> [[res3]], <7 x i32>* [[adr3]], align 4
+  %tmp16 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  %tmp17 = load <7 x i32>, <7 x i32>* %tmp16, align 4
+  %tmp18 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  %tmp19 = load <7 x i32>, <7 x i32>* %tmp18, align 4
+  %tmp20 = xor <7 x i32> %tmp17, %tmp19
+  %tmp21 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 3
+  store <7 x i32> %tmp20, <7 x i32>* %tmp21, align 4
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <7 x i32>, <7 x i32>* [[adr5]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x i32>, <7 x i32>* [[adr6]], align 4
+  ; CHECK: [[shv6:%.*]] = and <7 x i32> [[ld6]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  ; CHECK: [[res4:%.*]] = shl <7 x i32> [[ld5]], [[shv6]]
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  ; CHECK: store <7 x i32> [[res4]], <7 x i32>* [[adr4]], align 4
+  %tmp22 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  %tmp23 = load <7 x i32>, <7 x i32>* %tmp22, align 4
+  %tmp24 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  %tmp25 = load <7 x i32>, <7 x i32>* %tmp24, align 4
+  %tmp26 = and <7 x i32> %tmp25, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  %tmp27 = shl <7 x i32> %tmp23, %tmp26
+  %tmp28 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 4
+  store <7 x i32> %tmp27, <7 x i32>* %tmp28, align 4
+
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x i32>, <7 x i32>* [[adr6]], align 4
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <7 x i32>, <7 x i32>* [[adr7]], align 4
+  ; CHECK: [[shv7:%.*]] = and <7 x i32> [[ld7]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  ; CHECK: [[res5:%.*]] = lshr <7 x i32> [[ld6]], [[shv7]]
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  ; CHECK: store <7 x i32> [[res5]], <7 x i32>* [[adr5]], align 4
+  %tmp29 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  %tmp30 = load <7 x i32>, <7 x i32>* %tmp29, align 4
+  %tmp31 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7
+  %tmp32 = load <7 x i32>, <7 x i32>* %tmp31, align 4
+  %tmp33 = and <7 x i32> %tmp32, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  %tmp34 = lshr <7 x i32> %tmp30, %tmp33
+  %tmp35 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 5
+  store <7 x i32> %tmp34, <7 x i32>* %tmp35, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <7 x i32>, <7 x i32>* [[adr8]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <7 x i32>, <7 x i32>* [[adr6]], align 4
+  ; CHECK: [[res6:%.*]] = or <7 x i32> [[ld6]], [[ld8]]
+  ; CHECK: store <7 x i32> [[res6]], <7 x i32>* [[adr6]], align 4
+  %tmp36 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8
+  %tmp37 = load <7 x i32>, <7 x i32>* %tmp36, align 4
+  %tmp38 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 6
+  %tmp39 = load <7 x i32>, <7 x i32>* %tmp38, align 4
+  %tmp40 = or <7 x i32> %tmp39, %tmp37
+  store <7 x i32> %tmp40, <7 x i32>* %tmp38, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load <7 x i32>, <7 x i32>* [[adr9]], align 4
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <7 x i32>, <7 x i32>* [[adr7]], align 4
+  ; CHECK: [[res7:%.*]] = and <7 x i32> [[ld7]], [[ld9]]
+  ; CHECK: store <7 x i32> [[res7]], <7 x i32>* [[adr7]], align 4
+  %tmp41 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 9
+  %tmp42 = load <7 x i32>, <7 x i32>* %tmp41, align 4
+  %tmp43 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 7
+  %tmp44 = load <7 x i32>, <7 x i32>* %tmp43, align 4
+  %tmp45 = and <7 x i32> %tmp44, %tmp42
+  store <7 x i32> %tmp45, <7 x i32>* %tmp43, align 4
+
+  ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 10
+  ; CHECK: [[ld10:%.*]] = load <7 x i32>, <7 x i32>* [[adr10]], align 4
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <7 x i32>, <7 x i32>* [[adr8]], align 4
+  ; CHECK: [[res8:%.*]] = xor <7 x i32> [[ld8]], [[ld10]]
+  ; CHECK: store <7 x i32> [[res8]], <7 x i32>* [[adr8]], align 4
+  %tmp46 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 10
+  %tmp47 = load <7 x i32>, <7 x i32>* %tmp46, align 4
+  %tmp48 = getelementptr inbounds [11 x <7 x i32>], [11 x <7 x i32>]* %things, i32 0, i32 8
+  %tmp49 = load <7 x i32>, <7 x i32>* %tmp48, align 4
+  %tmp50 = xor <7 x i32> %tmp49, %tmp47
+  store <7 x i32> %tmp50, <7 x i32>* %tmp48, align 4
+
+  ret void
+}
+
+declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
+declare %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<float>"(i32, %"class.RWStructuredBuffer<float>") #1
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
+
+!dx.version = !{!3}
+
+!3 = !{i32 1, i32 9}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
new file mode 100644
index 0000000000..9734b85b12
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1-scalarizer.ll
@@ -0,0 +1,745 @@
+; RUN: %dxopt %s -scalarizer -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%"class.RWStructuredBuffer<vector<float, 1> >" = type { <1 x float> }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
+
+@"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A" = external global %"class.RWStructuredBuffer<vector<float, 1> >", align 4
+@llvm.used = appending global [1 x i8*] [i8* bitcast (%"class.RWStructuredBuffer<vector<float, 1> >"* @"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A" to i8*)], section "llvm.metadata"
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?assignments
+define void @"\01?assignments@@YAXY09$$CAV?$vector@M$00@@@Z"([10 x <1 x float>]* noalias %things) #0 {
+bb:
+  %tmp = load %"class.RWStructuredBuffer<vector<float, 1> >", %"class.RWStructuredBuffer<vector<float, 1> >"* @"\01?buf@@3V?$RWStructuredBuffer@V?$vector@M$00@@@@A"
+  %tmp1 = call %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 1> >"(i32 160, %"class.RWStructuredBuffer<vector<float, 1> >" %tmp)
+  %tmp2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  %RawBufferLoad = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tmp2, i32 1, i32 0, i8 1, i32 4)
+  %tmp3 = extractvalue %dx.types.ResRet.f32 %RawBufferLoad, 0
+  %tmp4 = insertelement <1 x float> undef, float %tmp3, i64 0
+  %tmp5 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 0
+  store <1 x float> %tmp4, <1 x float>* %tmp5, align 4
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]]
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]]
+  ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0
+  ; CHECK: [[res1:%.*]] = fadd fast float [[val1]], [[val5]]
+  ; CHECK: [[vec1:%.*]] = insertelement <1 x float> undef, float [[res1]], i32 0
+  ; CHECK: store <1 x float> [[vec1]], <1 x float>* [[adr1]], align 4
+  %tmp6 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5
+  %tmp7 = load <1 x float>, <1 x float>* %tmp6, align 4
+  %tmp8 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 1
+  %tmp9 = load <1 x float>, <1 x float>* %tmp8, align 4
+  %tmp10 = fadd fast <1 x float> %tmp9, %tmp7
+  store <1 x float> %tmp10, <1 x float>* %tmp8, align 4
+
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <1 x float>, <1 x float>* [[adr6]]
+  ; CHECK: [[val6:%.*]] = extractelement <1 x float> [[ld6]], i32 0
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]]
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0
+  ; CHECK: [[res2:%.*]] = fsub fast float [[val2]], [[val6]]
+  ; CHECK: [[vec2:%.*]] = insertelement <1 x float> undef, float [[res2]], i32 0
+  ; CHECK: store <1 x float> [[vec2]], <1 x float>* [[adr2]], align 4
+  %tmp11 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 6
+  %tmp12 = load <1 x float>, <1 x float>* %tmp11, align 4
+  %tmp13 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2
+  %tmp14 = load <1 x float>, <1 x float>* %tmp13, align 4
+  %tmp15 = fsub fast <1 x float> %tmp14, %tmp12
+  store <1 x float> %tmp15, <1 x float>* %tmp13, align 4
+
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <1 x float>, <1 x float>* [[adr7]]
+  ; CHECK: [[val7:%.*]] = extractelement <1 x float> [[ld7]], i32 0
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]]
+  ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0
+  ; CHECK: [[res3:%.*]] = fmul fast float [[val3]], [[val7]]
+  ; CHECK: [[vec3:%.*]] = insertelement <1 x float> undef, float [[res3]], i32 0
+  ; CHECK: store <1 x float> [[vec3]], <1 x float>* [[adr3]], align 4
+  %tmp16 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 7
+  %tmp17 = load <1 x float>, <1 x float>* %tmp16, align 4
+  %tmp18 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 3
+  %tmp19 = load <1 x float>, <1 x float>* %tmp18, align 4
+  %tmp20 = fmul fast <1 x float> %tmp19, %tmp17
+  store <1 x float> %tmp20, <1 x float>* %tmp18, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <1 x float>, <1 x float>* [[adr8]]
+  ; CHECK: [[val8:%.*]] = extractelement <1 x float> [[ld8]], i32 0
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]]
+  ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0
+  ; CHECK: [[res4:%.*]] = fdiv fast float [[val4]], [[val8]]
+  ; CHECK: [[vec4:%.*]] = insertelement <1 x float> undef, float [[res4]], i32 0
+  ; CHECK: store <1 x float> [[vec4]], <1 x float>* [[adr4]], align 4
+  %tmp21 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 8
+  %tmp22 = load <1 x float>, <1 x float>* %tmp21, align 4
+  %tmp23 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 4
+  %tmp24 = load <1 x float>, <1 x float>* %tmp23, align 4
+  %tmp25 = fdiv fast <1 x float> %tmp24, %tmp22
+  store <1 x float> %tmp25, <1 x float>* %tmp23, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load <1 x float>, <1 x float>* [[adr9]]
+  ; CHECK: [[val9:%.*]] = extractelement <1 x float> [[ld9]], i32 0
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]]
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[res5:%.*]] = frem fast float [[val5]], [[val9]]
+  ; CHECK: [[vec5:%.*]] = insertelement <1 x float> undef, float [[res5]], i32 0
+  ; CHECK: store <1 x float> [[vec5]], <1 x float>* [[adr5]], align 4
+  %tmp26 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 9
+  %tmp27 = load <1 x float>, <1 x float>* %tmp26, align 4
+  %tmp28 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 5
+  %tmp29 = load <1 x float>, <1 x float>* %tmp28, align 4
+  %tmp30 = frem fast <1 x float> %tmp29, %tmp27
+  store <1 x float> %tmp30, <1 x float>* %tmp28, align 4
+
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?arithmetic
+define void @"\01?arithmetic@@YA$$BY0L@V?$vector@M$00@@Y0L@$$CAV1@@Z"([11 x <1 x float>]* noalias sret %agg.result, [11 x <1 x float>]* noalias %things) #0 {
+bb:
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]], align 4
+  ; CHECK-DAG: [[zero:%.*]] = extractelement <1 x float> <float -0.000000e+00>, i32 0
+  ; CHECK-DAG: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i32 0
+  ; CHECK: [[sub0:%.*]] = fsub fast float [[zero]], [[val0]]
+  ; CHECK: [[res0:%.*]] = insertelement <1 x float> undef, float [[sub0]], i32 0
+  %tmp = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0
+  %tmp1 = load <1 x float>, <1 x float>* %tmp, align 4
+  %tmp2 = fsub fast <1 x float> <float -0.000000e+00>, %tmp1
+  %tmp3 = extractelement <1 x float> %tmp2, i64 0
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[res1:%.*]] = load <1 x float>, <1 x float>* [[adr0]], align 4
+  ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[res1]], i64 0
+  %tmp4 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 0
+  %tmp5 = load <1 x float>, <1 x float>* %tmp4, align 4
+  %tmp6 = extractelement <1 x float> %tmp5, i64 0
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]], align 4
+  ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]], align 4
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0
+  ; CHECK: [[add1:%.*]] = fadd fast float [[val1]], [[val2]]
+  ; CHECK: [[res1:%.*]] = insertelement <1 x float> undef, float [[add1]], i32 0
+  %tmp7 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 1
+  %tmp8 = load <1 x float>, <1 x float>* %tmp7, align 4
+  %tmp9 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2
+  %tmp10 = load <1 x float>, <1 x float>* %tmp9, align 4
+  %tmp11 = fadd fast <1 x float> %tmp8, %tmp10
+  %tmp12 = extractelement <1 x float> %tmp11, i64 0
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]], align 4
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]], align 4
+  ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0
+  ; CHECK: [[sub2:%.*]] = fsub fast float [[val2]], [[val3]]
+  ; CHECK: [[res2:%.*]] = insertelement <1 x float> undef, float [[sub2]], i32 0
+  %tmp13 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 2
+  %tmp14 = load <1 x float>, <1 x float>* %tmp13, align 4
+  %tmp15 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3
+  %tmp16 = load <1 x float>, <1 x float>* %tmp15, align 4
+  %tmp17 = fsub fast <1 x float> %tmp14, %tmp16
+  %tmp18 = extractelement <1 x float> %tmp17, i64 0
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]], align 4
+  ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]], align 4
+  ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0
+  ; CHECK: [[mul3:%.*]] = fmul fast float [[val3]], [[val4]]
+  ; CHECK: [[res3:%.*]] = insertelement <1 x float> undef, float [[mul3]], i32 0
+  %tmp19 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 3
+  %tmp20 = load <1 x float>, <1 x float>* %tmp19, align 4
+  %tmp21 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4
+  %tmp22 = load <1 x float>, <1 x float>* %tmp21, align 4
+  %tmp23 = fmul fast <1 x float> %tmp20, %tmp22
+  %tmp24 = extractelement <1 x float> %tmp23, i64 0
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]], align 4
+  ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]], align 4
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[div4:%.*]] = fdiv fast float [[val4]], [[val5]]
+  ; CHECK: [[res4:%.*]] = insertelement <1 x float> undef, float [[div4]], i32 0
+  %tmp25 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 4
+  %tmp26 = load <1 x float>, <1 x float>* %tmp25, align 4
+  %tmp27 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5
+  %tmp28 = load <1 x float>, <1 x float>* %tmp27, align 4
+  %tmp29 = fdiv fast <1 x float> %tmp26, %tmp28
+  %tmp30 = extractelement <1 x float> %tmp29, i64 0
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]], align 4
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <1 x float>, <1 x float>* [[adr6]], align 4
+  ; CHECK: [[val6:%.*]] = extractelement <1 x float> [[ld6]], i32 0
+  ; CHECK: [[rem5:%.*]] = frem fast float [[val5]], [[val6]]
+  ; CHECK: [[res5:%.*]] = insertelement <1 x float> undef, float [[rem5]], i32 0
+  %tmp31 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 5
+  %tmp32 = load <1 x float>, <1 x float>* %tmp31, align 4
+  %tmp33 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 6
+  %tmp34 = load <1 x float>, <1 x float>* %tmp33, align 4
+  %tmp35 = frem fast <1 x float> %tmp32, %tmp34
+  %tmp36 = extractelement <1 x float> %tmp35, i64 0
+
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load <1 x float>, <1 x float>* [[adr7]], align 4
+  ; CHECK-DAG: [[val7:%.*]] = extractelement <1 x float> [[ld7]], i32 0
+  ; CHECK-DAG: [[pos1:%.*]] = extractelement <1 x float> <float 1.000000e+00>, i32 0
+  ; CHECK: [[add6:%.*]] = fadd fast float [[val7]], [[pos1]]
+  ; CHECK: [[res6:%.*]] = insertelement <1 x float> undef, float [[add6]], i32 0
+  %tmp37 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 7
+  %tmp38 = load <1 x float>, <1 x float>* %tmp37, align 4
+  %tmp39 = fadd fast <1 x float> %tmp38, <float 1.000000e+00>
+  store <1 x float> %tmp39, <1 x float>* %tmp37, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load <1 x float>, <1 x float>* [[adr8]], align 4
+  ; CHECK-DAG: [[val8:%.*]] = extractelement <1 x float> [[ld8]], i32 0
+  ; CHECK-DAG: [[neg1:%.*]] = extractelement <1 x float> <float -1.000000e+00>, i32 0
+  ; CHECK: [[add7:%.*]] = fadd fast float [[val8]], [[neg1]]
+  ; CHECK: [[res7:%.*]] = insertelement <1 x float> undef, float [[add7]], i32 0
+  %tmp40 = extractelement <1 x float> %tmp38, i64 0
+  %tmp41 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 8
+  %tmp42 = load <1 x float>, <1 x float>* %tmp41, align 4
+  %tmp43 = fadd fast <1 x float> %tmp42, <float -1.000000e+00>
+  store <1 x float> %tmp43, <1 x float>* %tmp41, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load <1 x float>, <1 x float>* [[adr9]], align 4
+  ; CHECK-DAG: [[val9:%.*]] = extractelement <1 x float> [[ld9]], i32 0
+  ; CHECK-DAG: [[pos1:%.*]] = extractelement <1 x float> <float 1.000000e+00>, i32 0
+  ; CHECK: [[add8:%.*]] = fadd fast float [[val9]], [[pos1]]
+  ; CHECK: [[res8:%.*]] = insertelement <1 x float> undef, float [[add8]], i32 0
+  %tmp44 = extractelement <1 x float> %tmp42, i64 0
+  %tmp45 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 9
+  %tmp46 = load <1 x float>, <1 x float>* %tmp45, align 4
+  %tmp47 = fadd fast <1 x float> %tmp46, <float 1.000000e+00>
+  store <1 x float> %tmp47, <1 x float>* %tmp45, align 4
+
+  ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 10
+  ; CHECK: [[ld10:%.*]] = load <1 x float>, <1 x float>* [[adr10]], align 4
+  ; CHECK-DAG: [[val10:%.*]] = extractelement <1 x float> [[ld10]], i32 0
+  ; CHECK-DAG: [[neg1:%.*]] = extractelement <1 x float> <float -1.000000e+00>, i32 0
+  ; CHECK: [[add9:%.*]] = fadd fast float [[val10]], [[neg1]]
+  ; CHECK: [[res9:%.*]] = insertelement <1 x float> undef, float [[add9]], i32 0
+  %tmp48 = extractelement <1 x float> %tmp47, i64 0
+  %tmp49 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %things, i32 0, i32 10
+  %tmp50 = load <1 x float>, <1 x float>* %tmp49, align 4
+  %tmp51 = fadd fast <1 x float> %tmp50, <float -1.000000e+00>
+  store <1 x float> %tmp51, <1 x float>* %tmp49, align 4
+
+  %tmp52 = extractelement <1 x float> %tmp51, i64 0
+  %tmp53 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 0
+  %insert20 = insertelement <1 x float> undef, float %tmp3, i64 0
+  store <1 x float> %insert20, <1 x float>* %tmp53
+  %tmp54 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 1
+  %insert18 = insertelement <1 x float> undef, float %tmp6, i64 0
+  store <1 x float> %insert18, <1 x float>* %tmp54
+  %tmp55 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 2
+  %insert16 = insertelement <1 x float> undef, float %tmp12, i64 0
+  store <1 x float> %insert16, <1 x float>* %tmp55
+  %tmp56 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 3
+  %insert14 = insertelement <1 x float> undef, float %tmp18, i64 0
+  store <1 x float> %insert14, <1 x float>* %tmp56
+  %tmp57 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 4
+  %insert12 = insertelement <1 x float> undef, float %tmp24, i64 0
+  store <1 x float> %insert12, <1 x float>* %tmp57
+  %tmp58 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 5
+  %insert10 = insertelement <1 x float> undef, float %tmp30, i64 0
+  store <1 x float> %insert10, <1 x float>* %tmp58
+  %tmp59 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 6
+  %insert8 = insertelement <1 x float> undef, float %tmp36, i64 0
+  store <1 x float> %insert8, <1 x float>* %tmp59
+  %tmp60 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 7
+  %insert6 = insertelement <1 x float> undef, float %tmp40, i64 0
+  store <1 x float> %insert6, <1 x float>* %tmp60
+  %tmp61 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 8
+  %insert4 = insertelement <1 x float> undef, float %tmp44, i64 0
+  store <1 x float> %insert4, <1 x float>* %tmp61
+  %tmp62 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 9
+  %insert2 = insertelement <1 x float> undef, float %tmp48, i64 0
+  store <1 x float> %insert2, <1 x float>* %tmp62
+  %tmp63 = getelementptr inbounds [11 x <1 x float>], [11 x <1 x float>]* %agg.result, i32 0, i32 10
+  %insert = insertelement <1 x float> undef, float %tmp52, i64 0
+  store <1 x float> %insert, <1 x float>* %tmp63
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?logic
+define void @"\01?logic@@YA$$BY09_NY09_NY09V?$vector@M$00@@@Z"([10 x i32]* noalias sret %agg.result, [10 x i32]* %truth, [10 x <1 x float>]* %consequences) #0 {
+bb:
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load i32, i32* [[adr0]], align 4
+  ; CHECK: [[cmp0:%.*]] = icmp ne i32 [[ld0]], 0
+  ; CHECK: [[bres0:%.*]] = xor i1 [[cmp0]], true
+  ; CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32
+  %tmp = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = xor i1 %tmp2, true
+  %tmp4 = zext i1 %tmp3 to i32
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]], align 4
+  ; CHECK: [[cmp1:%.*]] = icmp ne i32 [[ld1]], 0
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4
+  ; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0
+  ; CHECK: [[bres1:%.*]] = or i1 [[cmp1]], [[cmp2]]
+  ; CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32
+  %tmp5 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  %tmp6 = load i32, i32* %tmp5, align 4
+  %tmp7 = icmp ne i32 %tmp6, 0
+  %tmp9 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  %tmp10 = load i32, i32* %tmp9, align 4
+  %tmp11 = icmp ne i32 %tmp10, 0
+  %tmp13 = or i1 %tmp7, %tmp11
+  %tmp14 = zext i1 %tmp13 to i32
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4
+  ; CHECK: [[cmp2:%.*]] = icmp ne i32 [[ld2]], 0
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
+  ; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0
+  ; CHECK: [[bres2:%.*]] = and i1 [[cmp2]], [[cmp3]]
+  ; CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  %tmp15 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  %tmp16 = load i32, i32* %tmp15, align 4
+  %tmp17 = icmp ne i32 %tmp16, 0
+  %tmp19 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  %tmp20 = load i32, i32* %tmp19, align 4
+  %tmp21 = icmp ne i32 %tmp20, 0
+  %tmp23 = and i1 %tmp17, %tmp21
+  %tmp24 = zext i1 %tmp23 to i32
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
+  ; CHECK: [[cmp3:%.*]] = icmp ne i32 [[ld3]], 0
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4
+  ; CHECK: [[cmp4:%.*]] = icmp ne i32 [[ld4]], 0
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4
+  ; CHECK: [[cmp5:%.*]] = icmp ne i32 [[ld5]], 0
+  ; CHECK: [[bres3:%.*]] = select i1 [[cmp3]], i1 [[cmp4]], i1 [[cmp5]]
+  ; CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
+  %tmp25 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  %tmp26 = load i32, i32* %tmp25, align 4
+  %tmp27 = icmp ne i32 %tmp26, 0
+  %tmp29 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  %tmp30 = load i32, i32* %tmp29, align 4
+  %tmp31 = icmp ne i32 %tmp30, 0
+  %tmp32 = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  %tmp33 = load i32, i32* %tmp32, align 4
+  %tmp34 = icmp ne i32 %tmp33, 0
+  %tmp35 = select i1 %tmp27, i1 %tmp31, i1 %tmp34
+  %tmp36 = zext i1 %tmp35 to i32
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]]
+  ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i32 0
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]]
+  ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0
+  ; CHECK: [[bres4:%.*]] = fcmp fast oeq float [[val0]], [[val1]]
+  ; CHECK: [[res4:%.*]] = zext i1 [[bres4]] to i32
+  %tmp37 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 0
+  %tmp38 = load <1 x float>, <1 x float>* %tmp37, align 4
+  %tmp39 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1
+  %tmp40 = load <1 x float>, <1 x float>* %tmp39, align 4
+  %tmp41 = fcmp fast oeq <1 x float> %tmp38, %tmp40
+  %tmp42 = extractelement <1 x i1> %tmp41, i64 0
+  %tmp43 = zext i1 %tmp42 to i32
+
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load <1 x float>, <1 x float>* [[adr1]]
+  ; CHECK: [[val1:%.*]] = extractelement <1 x float> [[ld1]], i32 0
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]]
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0
+  ; CHECK: [[bres5:%.*]] = fcmp fast une float [[val1]], [[val2]]
+  ; CHECK: [[res5:%.*]] = zext i1 [[bres5]] to i32
+  %tmp44 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 1
+  %tmp45 = load <1 x float>, <1 x float>* %tmp44, align 4
+  %tmp46 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2
+  %tmp47 = load <1 x float>, <1 x float>* %tmp46, align 4
+  %tmp48 = fcmp fast une <1 x float> %tmp45, %tmp47
+  %tmp49 = extractelement <1 x i1> %tmp48, i64 0
+  %tmp50 = zext i1 %tmp49 to i32
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]]
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i32 0
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]]
+  ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0
+  ; CHECK: [[bres6:%.*]] = fcmp fast olt float [[val2]], [[val3]]
+  ; CHECK: [[res6:%.*]] = zext i1 [[bres6]] to i32
+  %tmp51 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 2
+  %tmp52 = load <1 x float>, <1 x float>* %tmp51, align 4
+  %tmp53 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3
+  %tmp54 = load <1 x float>, <1 x float>* %tmp53, align 4
+  %tmp55 = fcmp fast olt <1 x float> %tmp52, %tmp54
+  %tmp56 = extractelement <1 x i1> %tmp55, i64 0
+  %tmp57 = zext i1 %tmp56 to i32
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load <1 x float>, <1 x float>* [[adr3]]
+  ; CHECK: [[val3:%.*]] = extractelement <1 x float> [[ld3]], i32 0
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]]
+  ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0
+  ; CHECK: [[bres7:%.*]] = fcmp fast ogt float [[val3]], [[val4]]
+  ; CHECK: [[res7:%.*]] = zext i1 [[bres7]] to i32
+  %tmp58 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 3
+  %tmp59 = load <1 x float>, <1 x float>* %tmp58, align 4
+  %tmp60 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4
+  %tmp61 = load <1 x float>, <1 x float>* %tmp60, align 4
+  %tmp62 = fcmp fast ogt <1 x float> %tmp59, %tmp61
+  %tmp63 = extractelement <1 x i1> %tmp62, i64 0
+  %tmp64 = zext i1 %tmp63 to i32
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load <1 x float>, <1 x float>* [[adr4]]
+  ; CHECK: [[val4:%.*]] = extractelement <1 x float> [[ld4]], i32 0
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]]
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[bres8:%.*]] = fcmp fast ole float [[val4]], [[val5]]
+  ; CHECK: [[res8:%.*]] = zext i1 [[bres8]] to i32
+  %tmp65 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 4
+  %tmp66 = load <1 x float>, <1 x float>* %tmp65, align 4
+  %tmp67 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5
+  %tmp68 = load <1 x float>, <1 x float>* %tmp67, align 4
+  %tmp69 = fcmp fast ole <1 x float> %tmp66, %tmp68
+  %tmp70 = extractelement <1 x i1> %tmp69, i64 0
+  %tmp71 = zext i1 %tmp70 to i32
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load <1 x float>, <1 x float>* [[adr5]]
+  ; CHECK: [[val5:%.*]] = extractelement <1 x float> [[ld5]], i32 0
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load <1 x float>, <1 x float>* [[adr6]]
+  ; CHECK: [[val6:%.*]] = extractelement <1 x float> [[ld6]], i32 0
+  ; CHECK: [[bres9:%.*]] = fcmp fast oge float [[val5]], [[val6]]
+  ; CHECK: [[res9:%.*]] = zext i1 [[bres9]] to i32
+  %tmp72 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 5
+  %tmp73 = load <1 x float>, <1 x float>* %tmp72, align 4
+  %tmp74 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %consequences, i32 0, i32 6
+  %tmp75 = load <1 x float>, <1 x float>* %tmp74, align 4
+  %tmp76 = fcmp fast oge <1 x float> %tmp73, %tmp75
+  %tmp77 = extractelement <1 x i1> %tmp76, i64 0
+  %tmp78 = zext i1 %tmp77 to i32
+
+  %tmp79 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 0
+  store i32 %tmp4, i32* %tmp79
+  %tmp80 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 1
+  store i32 %tmp14, i32* %tmp80
+  %tmp81 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 2
+  store i32 %tmp24, i32* %tmp81
+  %tmp82 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 3
+  store i32 %tmp36, i32* %tmp82
+  %tmp83 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 4
+  store i32 %tmp43, i32* %tmp83
+  %tmp84 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 5
+  store i32 %tmp50, i32* %tmp84
+  %tmp85 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 6
+  store i32 %tmp57, i32* %tmp85
+  %tmp86 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 7
+  store i32 %tmp64, i32* %tmp86
+  %tmp87 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 8
+  store i32 %tmp71, i32* %tmp87
+  %tmp88 = getelementptr inbounds [10 x i32], [10 x i32]* %agg.result, i32 0, i32 9
+  store i32 %tmp78, i32* %tmp88
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?index
+define void @"\01?index@@YA$$BY09V?$vector@M$00@@Y09V1@H@Z"([10 x <1 x float>]* noalias sret %agg.result, [10 x <1 x float>]* %things, i32 %i) #0 {
+bb:
+  ; CHECK: %res.0 = alloca [10 x float]
+  %res.0 = alloca [10 x float]
+
+  ; CHECK: [[adr0:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 0
+  ; CHECK: store float 0.000000e+00, float* [[adr0]]
+  %tmp1 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 0
+  store float 0.000000e+00, float* %tmp1
+
+  ; CHECK: [[adri:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 %i
+  ; CHECK: store float 1.000000e+00, float* [[adri]]
+  %tmp2 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 %i
+  store float 1.000000e+00, float* %tmp2
+
+  ; CHECK: [[adr2:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 2
+  ; CHECK: store float 2.000000e+00, float* [[adr2]]
+  %tmp3 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 2
+  store float 2.000000e+00, float* %tmp3
+
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 0
+  ; CHECK: [[ld0:%.*]] = load <1 x float>, <1 x float>* [[adr0]]
+  ; CHECK: [[adr3:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 3
+  ; CHECK: [[val0:%.*]] = extractelement <1 x float> [[ld0]], i64 0
+  ; CHECK: store float [[val0]], float* [[adr3]]
+  %tmp4 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 0
+  %tmp5 = load <1 x float>, <1 x float>* %tmp4, align 4
+  %tmp6 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 3
+  %tmp7 = extractelement <1 x float> %tmp5, i64 0
+  store float %tmp7, float* %tmp6
+
+  ; CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 %i
+  ; CHECK: [[ldi:%.*]] = load <1 x float>, <1 x float>* [[adri]]
+  ; CHECK: [[adr4:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 4
+  ; CHECK: [[vali:%.*]] = extractelement <1 x float> [[ldi]], i64 0
+  ; CHECK: store float [[vali]], float* [[adr4]]
+  %tmp8 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 %i
+  %tmp9 = load <1 x float>, <1 x float>* %tmp8, align 4
+  %tmp10 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 4
+  %tmp11 = extractelement <1 x float> %tmp9, i64 0
+  store float %tmp11, float* %tmp10
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load <1 x float>, <1 x float>* [[adr2]]
+  ; CHECK: [[adr5:%.*]] = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 5
+  ; CHECK: [[val2:%.*]] = extractelement <1 x float> [[ld2]], i64 0
+  ; CHECK: store float [[val2]], float* [[adr5]]
+  %tmp12 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %things, i32 0, i32 2
+  %tmp13 = load <1 x float>, <1 x float>* %tmp12, align 4
+  %tmp14 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 5
+  %tmp15 = extractelement <1 x float> %tmp13, i64 0
+  store float %tmp15, float* %tmp14
+
+  %tmp16 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 0
+  %tmp17 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 0
+  %load17 = load float, float* %tmp17
+  %insert18 = insertelement <1 x float> undef, float %load17, i64 0
+  store <1 x float> %insert18, <1 x float>* %tmp16
+
+  %tmp18 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 1
+  %tmp19 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 1
+  %load15 = load float, float* %tmp19
+  %insert16 = insertelement <1 x float> undef, float %load15, i64 0
+  store <1 x float> %insert16, <1 x float>* %tmp18
+
+  %tmp20 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 2
+  %tmp21 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 2
+  %load13 = load float, float* %tmp21
+  %insert14 = insertelement <1 x float> undef, float %load13, i64 0
+  store <1 x float> %insert14, <1 x float>* %tmp20
+
+  %tmp22 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 3
+  %tmp23 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 3
+  %load11 = load float, float* %tmp23
+  %insert12 = insertelement <1 x float> undef, float %load11, i64 0
+  store <1 x float> %insert12, <1 x float>* %tmp22
+
+  %tmp24 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 4
+  %tmp25 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 4
+  %load9 = load float, float* %tmp25
+  %insert10 = insertelement <1 x float> undef, float %load9, i64 0
+  store <1 x float> %insert10, <1 x float>* %tmp24
+
+  %tmp26 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 5
+  %tmp27 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 5
+  %load7 = load float, float* %tmp27
+  %insert8 = insertelement <1 x float> undef, float %load7, i64 0
+  store <1 x float> %insert8, <1 x float>* %tmp26
+
+  %tmp28 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 6
+  %tmp29 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 6
+  %load5 = load float, float* %tmp29
+  %insert6 = insertelement <1 x float> undef, float %load5, i64 0
+  store <1 x float> %insert6, <1 x float>* %tmp28
+
+  %tmp30 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 7
+  %tmp31 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 7
+  %load3 = load float, float* %tmp31
+  %insert4 = insertelement <1 x float> undef, float %load3, i64 0
+  store <1 x float> %insert4, <1 x float>* %tmp30
+
+  %tmp32 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 8
+  %tmp33 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 8
+  %load1 = load float, float* %tmp33
+  %insert2 = insertelement <1 x float> undef, float %load1, i64 0
+  store <1 x float> %insert2, <1 x float>* %tmp32
+
+  %tmp34 = getelementptr inbounds [10 x <1 x float>], [10 x <1 x float>]* %agg.result, i32 0, i32 9
+  %tmp35 = getelementptr [10 x float], [10 x float]* %res.0, i32 0, i32 9
+  %load = load float, float* %tmp35
+  %insert = insertelement <1 x float> undef, float %load, i64 0
+  store <1 x float> %insert, <1 x float>* %tmp34
+
+  ret void
+}
+
+; Function Attrs: nounwind
+; CHECK-LABEL: define void @"\01?bittwiddlers
+define void @"\01?bittwiddlers@@YAXY0L@$$CAI@Z"([11 x i32]* noalias %things) #0 {
+bb:
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1
+  ; CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]], align 4
+  ; CHECK: [[res0:%.*]] = xor i32 [[ld1]], -1
+  ; CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 0
+  ; CHECK: store i32 [[res0]], i32* [[adr0]], align 4
+  %tmp = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = xor i32 %tmp1, -1
+  %tmp3 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 0
+  store i32 %tmp2, i32* %tmp3, align 4
+
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  ; CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]], align 4
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
+  ; CHECK: [[res1:%.*]] = or i32 [[ld2]], [[ld3]]
+  ; CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1
+  ; CHECK: store i32 [[res1]], i32* [[adr1]], align 4
+  %tmp4 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  %tmp5 = load i32, i32* %tmp4, align 4
+  %tmp6 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  %tmp7 = load i32, i32* %tmp6, align 4
+  %tmp8 = or i32 %tmp5, %tmp7
+  %tmp9 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1
+  store i32 %tmp8, i32* %tmp9, align 4
+
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  ; CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]], align 4
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4
+  ; CHECK: [[res2:%.*]] = and i32 [[ld3]], [[ld4]]
+  ; CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  ; CHECK: store i32 [[res2]], i32* [[adr2]], align 4
+  %tmp10 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  %tmp11 = load i32, i32* %tmp10, align 4
+  %tmp12 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  %tmp13 = load i32, i32* %tmp12, align 4
+  %tmp14 = and i32 %tmp11, %tmp13
+  %tmp15 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  store i32 %tmp14, i32* %tmp15, align 4
+
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  ; CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]], align 4
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4
+  ; CHECK: [[res3:%.*]] = xor i32 [[ld4]], [[ld5]]
+  ; CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  ; CHECK: store i32 [[res3]], i32* [[adr3]], align 4
+  %tmp16 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  %tmp17 = load i32, i32* %tmp16, align 4
+  %tmp18 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  %tmp19 = load i32, i32* %tmp18, align 4
+  %tmp20 = xor i32 %tmp17, %tmp19
+  %tmp21 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  store i32 %tmp20, i32* %tmp21, align 4
+
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  ; CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]], align 4
+  ; CHECK: [[and4:%.*]] = and i32 [[ld6]], 31
+  ; CHECK: [[res4:%.*]] = shl i32 [[ld5]], [[and4]]
+  ; CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  ; CHECK: store i32 [[res4]], i32* [[adr4]], align 4
+  %tmp22 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  %tmp23 = load i32, i32* %tmp22, align 4
+  %tmp24 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  %tmp25 = load i32, i32* %tmp24, align 4
+  %tmp26 = and i32 %tmp25, 31
+  %tmp27 = shl i32 %tmp23, %tmp26
+  %tmp28 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  store i32 %tmp27, i32* %tmp28, align 4
+
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]], align 4
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]], align 4
+  ; CHECK: [[and5:%.*]] = and i32 [[ld7]], 31
+  ; CHECK: [[res5:%.*]] = lshr i32 [[ld6]], [[and5]]
+  ; CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  ; CHECK: store i32 [[res5]], i32* [[adr5]], align 4
+  %tmp29 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  %tmp30 = load i32, i32* %tmp29, align 4
+  %tmp31 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  %tmp32 = load i32, i32* %tmp31, align 4
+  %tmp33 = and i32 %tmp32, 31
+  %tmp34 = lshr i32 %tmp30, %tmp33
+  %tmp35 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  store i32 %tmp34, i32* %tmp35, align 4
+
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]], align 4
+  ; CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  ; CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]], align 4
+  ; CHECK: [[res6:%.*]] = or i32 [[ld6]], [[ld8]]
+  ; CHECK: store i32 [[res6]], i32* [[adr6]], align 4
+  %tmp36 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  %tmp37 = load i32, i32* %tmp36, align 4
+  %tmp38 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  %tmp39 = load i32, i32* %tmp38, align 4
+  %tmp40 = or i32 %tmp39, %tmp37
+  store i32 %tmp40, i32* %tmp38, align 4
+
+  ; CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 9
+  ; CHECK: [[ld9:%.*]] = load i32, i32* [[adr9]], align 4
+  ; CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  ; CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]], align 4
+  ; CHECK: [[res7:%.*]] = and i32 [[ld7]], [[ld9]]
+  ; CHECK: store i32 [[res7]], i32* [[adr7]], align 4
+  %tmp41 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 9
+  %tmp42 = load i32, i32* %tmp41, align 4
+  %tmp43 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  %tmp44 = load i32, i32* %tmp43, align 4
+  %tmp45 = and i32 %tmp44, %tmp42
+  store i32 %tmp45, i32* %tmp43, align 4
+
+  ; CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 10
+  ; CHECK: [[ld10:%.*]] = load i32, i32* [[adr10]], align 4
+  ; CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  ; CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]], align 4
+  ; CHECK: [[res8:%.*]] = xor i32 [[ld8]], [[ld10]]
+  ; CHECK: store i32 [[res8]], i32* [[adr8]], align 4
+  %tmp46 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 10
+  %tmp47 = load i32, i32* %tmp46, align 4
+  %tmp48 = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  %tmp49 = load i32, i32* %tmp48, align 4
+  %tmp50 = xor i32 %tmp49, %tmp47
+  store i32 %tmp50, i32* %tmp48, align 4
+
+  ret void
+}
+
+declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #2
+declare %dx.types.Handle @"dx.op.createHandleForLib.class.RWStructuredBuffer<vector<float, 1> >"(i32, %"class.RWStructuredBuffer<vector<float, 1> >") #2
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!3}
+!3 = !{i32 1, i32 9}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl
new file mode 100644
index 0000000000..66382af2d5
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators-vec1.hlsl
@@ -0,0 +1,425 @@
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float1 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int1      %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=double1   -DDBL %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=uint64_t1 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float16_t1 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int16_t1  -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+
+// Mainly a source for the vec1 scalarizer IR test.
+// Serves to verify some codegen as well.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// Need to capture once for the full vector type, again for the element type.
+// CHECK-DAG: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:<[0-9]* x [a-z0-9_]*>]] }
+// CHECK-DAG: %"class.RWStructuredBuffer<{{.*}}>" = type { <{{[0-9]*}} x [[ELTY:[a-z0-9_]*]]> }
+RWStructuredBuffer<TYPE> buf;
+
+export void assignments(inout TYPE things[10], TYPE scales[10]);
+export TYPE arithmetic(inout TYPE things[11])[11];
+export bool logic(bool truth[10], TYPE consequences[10])[10];
+export TYPE index(TYPE things[10], int i, TYPE val)[10];
+
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout TYPE things[10]) {
+
+  // CHECK: [[res0:%.*]] =  call [[TYPE]] @"dx.hl.op.ro.[[TYPE]] (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle {{%.*}}, i32 1)
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: store [[TYPE]] [[res0]], [[TYPE]]* [[adr0]]
+  things[0] = buf.Load(1);
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]] [[TYPE]] [[vec1]], [[vec5]]
+  // CHECK: store [[TYPE]] [[res1]], [[TYPE]]* [[adr1]]
+  things[1] += things[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]] [[TYPE]] [[vec2]], [[vec6]]
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  things[2] -= things[6];
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]] [[TYPE]] [[vec3]], [[vec7]]
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  things[3] *= things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]] [[TYPE]] [[vec4]], [[vec8]]
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  things[4] /= things[8];
+
+#ifndef DBL
+  // NODBL: [[adr9:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 9
+  // NODBL: [[vec9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // NODBL: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 5
+  // NODBL: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] [[TYPE]] [[vec5]], [[vec9]]
+  // NODBL: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  things[5] %= things[9];
+#endif
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export TYPE arithmetic(inout TYPE things[11])[11] {
+  TYPE res[11];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[res1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[res0:%.*]] = [[SUB]] [[TYPE]]
+  res[0] = -things[0];
+  res[1] = +things[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[ADD]] [[TYPE]] [[vec1]], [[vec2]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 2
+  // CHECK: store [[TYPE]] [[res2]], [[TYPE]]* [[adr2]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[SUB]] [[TYPE]] [[vec2]], [[vec3]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 3
+  // CHECK: store [[TYPE]] [[res3]], [[TYPE]]* [[adr3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[MUL]] [[TYPE]] [[vec3]], [[vec4]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 4
+  // CHECK: store [[TYPE]] [[res4]], [[TYPE]]* [[adr4]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[res5:%.*]] = [[DIV]] [[TYPE]] [[vec4]], [[vec5]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 5
+  // CHECK: store [[TYPE]] [[res5]], [[TYPE]]* [[adr5]]
+  res[5] = things[4] / things[5];
+
+#ifndef DBL
+  // NODBL: [[adr5:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 5
+  // NODBL: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 6
+  // NODBL: [[vec6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // NODBL: [[res6:%.*]] = [[REM]] [[TYPE]] [[vec5]], [[vec6]]
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 6
+  // NODBL: store [[TYPE]] [[res6]], [[TYPE]]* [[adr6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%.*]] = load [[TYPE]], [[TYPE]]* [[adr7]]
+  // CHECK: [[res7:%.*]] = [[ADD]] [[TYPE]] [[vec7]], <[[ELTY]] [[POS1:(1|1\.0*e\+0*|0xH3C00)]]>
+  // CHECK: store [[TYPE]] [[res7]], [[TYPE]]* [[adr7]]
+  // This is a post op, so the original value goes into res[].
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 7
+  // CHECK: store [[TYPE]] [[vec7]], [[TYPE]]* [[adr7]]
+  res[7] = things[7]++;
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%.*]] = load [[TYPE]], [[TYPE]]* [[adr8]]
+  // CHECK: [[res8:%.*]] = [[ADD]] [[TYPE]] [[vec8]]
+  // CHECK: store [[TYPE]] [[res8]], [[TYPE]]* [[adr8]]
+  // This is a post op, so the original value goes into res[].
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 8
+  // CHECK: store [[TYPE]] [[vec8]], [[TYPE]]* [[adr8]]
+  res[8] = things[8]--;
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%.*]] = load [[TYPE]], [[TYPE]]* [[adr9]]
+  // CHECK: [[res9:%.*]] = [[ADD]] [[TYPE]] [[vec9]]
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 9
+  // CHECK: store [[TYPE]] [[res9]], [[TYPE]]* [[adr9]]
+  res[9] = ++things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %things, i32 0, i32 10
+  // CHECK: [[vec10:%.*]] = load [[TYPE]], [[TYPE]]* [[adr10]]
+  // CHECK: [[res10:%.*]] = [[ADD]] [[TYPE]] [[vec10]]
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x [[TYPE]]], [11 x [[TYPE]]]* %res, i32 0, i32 10
+  // CHECK: store [[TYPE]] [[res10]], [[TYPE]]* [[adr10]]
+  res[10] = --things[10];
+
+  // Memcpy res into return value.
+  // CHECK: [[retptr:%.*]] = bitcast [11 x [[TYPE]]]* %agg.result to i8*
+  // CHECK: [[resptr:%.*]] = bitcast [11 x [[TYPE]]]* %res to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]]
+  // CHECK: ret void
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export bool logic(bool truth[10], TYPE consequences[10])[10] {
+  bool res[10];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load i32, i32* [[adr0]]
+  // CHECK: [[bvec0:%.*]] = icmp ne i32 [[vec0]], 0
+  // CHECK: [[bres0:%.*]] = xor i1 [[bvec0]], true
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 0
+  // CHECK: [[res0:%.*]] = zext i1 [[bres0]] to i32
+  // CHECK: store i32 [[res0]], i32* [[adr0]]
+  res[0] = !truth[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load i32, i32* [[adr1]]
+  // CHECK: [[bvec1:%.*]] = icmp ne i32 [[vec1]], 0
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load i32, i32* [[adr2]]
+  // CHECK: [[bvec2:%.*]] = icmp ne i32 [[vec2]], 0
+  // CHECK: [[bres1:%.*]] = or i1 [[bvec1]], [[bvec2]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 1
+  // CHECK: [[res1:%.*]] = zext i1 [[bres1]] to i32
+  // CHECK: store i32 [[res1]], i32* [[adr1]]
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load i32, i32* [[adr2]]
+  // CHECK: [[bvec2:%.*]] = icmp ne i32 [[vec2]], 0
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[bvec3:%.*]] = icmp ne i32 [[vec3]], 0
+  // CHECK: [[bres2:%.*]] = and i1 [[bvec2]], [[bvec3]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 2
+  // CHECK: [[res2:%.*]] = zext i1 [[bres2]] to i32
+  // CHECK: store i32 [[res2]], i32* [[adr2]]
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[bvec3:%.*]] = icmp ne i32 [[vec3]], 0
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load i32, i32* [[adr4]]
+  // CHECK: [[bvec4:%.*]] = icmp ne i32 [[vec4]], 0
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %truth, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load i32, i32* [[adr5]]
+  // CHECK: [[bvec5:%.*]] = icmp ne i32 [[vec5]], 0
+  // CHECK: [[bres3:%.*]] = select i1 [[bvec3]], i1 [[bvec4]], i1 [[bvec5]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 3
+  // CHECK: [[res3:%.*]] = zext i1 [[bres3]] to i32
+  // CHECK: store i32 [[res3]], i32* [[adr3]]
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[cmp4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq [[TYPE]] [[vec0]], [[vec1]]
+  // CHECK: [[bres4:%.*]] = extractelement <1 x i1> [[cmp4]], i64 0
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 4
+  // CHECK: [[res4:%.*]] = zext i1 [[bres4]] to i32
+  // CHECK: store i32 [[res4]], i32* [[adr4]]
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load [[TYPE]], [[TYPE]]* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[cmp5:%.*]] = [[CMP]] {{u?}}ne [[TYPE]] [[vec1]], [[vec2]]
+  // CHECK: [[bres5:%.*]] = extractelement <1 x i1> [[cmp5]], i64 0
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 5
+  // CHECK: [[res5:%.*]] = zext i1 [[bres5]] to i32
+  // CHECK: store i32 [[res5]], i32* [[adr5]]
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[cmp6:%.*]] = [[CMP]] {{[osu]?}}lt [[TYPE]] [[vec2]], [[vec3]]
+  // CHECK: [[bres6:%.*]] = extractelement <1 x i1> [[cmp6]], i64 0
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 6
+  // CHECK: [[res6:%.*]] = zext i1 [[bres6]] to i32
+  // CHECK: store i32 [[res6]], i32* [[adr6]]
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load [[TYPE]], [[TYPE]]* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[cmp7:%.*]] = [[CMP]] {{[osu]]?}}gt [[TYPE]] [[vec3]], [[vec4]]
+  // CHECK: [[bres7:%.*]] = extractelement <1 x i1> [[cmp7]], i64 0
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 7
+  // CHECK: [[res7:%.*]] = zext i1 [[bres7]] to i32
+  // CHECK: store i32 [[res7]], i32* [[adr7]]
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load [[TYPE]], [[TYPE]]* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[cmp8:%.*]] = [[CMP]] {{[osu]]?}}le [[TYPE]] [[vec4]], [[vec5]]
+  // CHECK: [[bres8:%.*]] = extractelement <1 x i1> [[cmp8]], i64 0
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 8
+  // CHECK: [[res8:%.*]] = zext i1 [[bres8]] to i32
+  // CHECK: store i32 [[res8]], i32* [[adr8]]
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load [[TYPE]], [[TYPE]]* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %consequences, i32 0, i32 6
+  // CHECK: [[vec6:%.*]] = load [[TYPE]], [[TYPE]]* [[adr6]]
+  // CHECK: [[cmp9:%.*]] = [[CMP]] {{[osu]?}}ge [[TYPE]] [[vec5]], [[vec6]]
+  // CHECK: [[bres9:%.*]] = extractelement <1 x i1> [[cmp9]], i64 0
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %res, i32 0, i32 9
+  // CHECK: [[res9:%.*]] = zext i1 [[bres9]] to i32
+  // CHECK: store i32 [[res9]], i32* [[adr9]]
+  res[9] = consequences[5] >= consequences[6];
+
+  // Memcpy res into return value.
+  // CHECK: [[retptr:%.*]] = bitcast [10 x i32]* %agg.result to i8*
+  // CHECK: [[resptr:%.*]] = bitcast [10 x i32]* %res to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]]
+  // CHECK: ret void
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export TYPE index(TYPE things[10], int i)[10] {
+  // CHECK: [[res:%.*]] = alloca [10 x [[TYPE]]]
+  // CHECK: store i32 %i, i32* [[iadd:%.[0-9]*]]
+  TYPE res[10];
+
+  // CHECK: [[res0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 0
+  // CHECK: store [[TYPE]] zeroinitializer, [[TYPE]]* [[res0]]
+  res[0] = 0;
+
+  // CHECK: [[i:%.*]] = load i32, i32* [[iadd]]
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 [[i]]
+  // CHECK: store [[TYPE]] <[[ELTY]] {{(1|1\.0*e\+0*|0xH3C00).*}}>, [[TYPE]]* [[adri]]
+  res[i] = 1;
+
+  // CHECK: [[res2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 2
+  // CHECK: store [[TYPE]] <[[ELTY]] {{(2|2\.0*e\+0*|0xH4000).*}}>, [[TYPE]]* [[res2]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 0
+  // CHECK: [[thg0:%.*]] = load [[TYPE]], [[TYPE]]* [[adr0]]
+  // CHECK: [[res3:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 3
+  // CHECK: store [[TYPE]] [[thg0]], [[TYPE]]* [[res3]]
+  res[3] = things[0];
+
+  // CHECK: [[i:%.*]] = load i32, i32* [[iadd]]
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 [[i]]
+  // CHECK: [[thgi:%.*]] = load [[TYPE]], [[TYPE]]* [[adri]]
+  // CHECK: [[res4:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 4
+  // CHECK: store [[TYPE]] [[thgi]], [[TYPE]]* [[res4]]
+  res[4] = things[i];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %things, i32 0, i32 2
+  // CHECK: [[thg2:%.*]] = load [[TYPE]], [[TYPE]]* [[adr2]]
+  // CHECK: [[res5:%.*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* [[res]], i32 0, i32 5
+  // CHECK: store [[TYPE]] [[thg2]], [[TYPE]]* [[res5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+}
+
+// Test bit twiddling operators.
+// INT-LABEL: define void @"\01?bittwiddlers
+export void bittwiddlers(inout uint things[11]) {
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 1
+  // CHECK: [[ld1:%.*]] = load i32, i32* [[adr1]]
+  // CHECK: [[res1:%.*]] = xor i32 [[ld1]], -1
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 0
+  // CHECK: store i32 [[res1]], i32* [[adr0]]
+  things[0] = ~things[1];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load i32, i32* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[res1:%.*]] = or i32 [[ld2]], [[ld3]]
+  // CHECK: store i32 [[res1]], i32* [[adr1]]
+  things[1] = things[2] | things[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load i32, i32* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]]
+  // CHECK: [[res2:%.*]] = and i32 [[ld3]], [[ld4]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 2
+  // CHECK: store i32 [[res2]], i32* [[adr2]]
+  things[2] = things[3] & things[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load i32, i32* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]]
+  // CHECK: [[res3:%.*]] = xor i32 [[ld4]], [[ld5]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 3
+  // CHECK: store i32 [[res3]], i32* [[adr3]]
+  things[3] = things[4] ^ things[5];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load i32, i32* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]]
+  // CHECK: [[shv6:%.*]] = and i32 [[ld6]], 31
+  // CHECK: [[res4:%.*]] = shl i32 [[ld5]], [[shv6]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 4
+  // CHECK: store i32 [[res4]], i32* [[adr4]]
+  things[4] = things[5] << things[6];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]]
+  // CHECK: [[shv7:%.*]] = and i32 [[ld7]], 31
+  // CHECK: [[res5:%.*]] = lshr i32 [[ld6]], [[shv7]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 5
+  // CHECK: store i32 [[res5]], i32* [[adr5]]
+  things[5] = things[6] >> things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load i32, i32* [[adr6]]
+  // CHECK: [[res6:%.*]] = or i32 [[ld6]], [[ld8]]
+  // CHECK: store i32 [[res6]], i32* [[adr6]]
+  things[6] |= things[8];
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 9
+  // CHECK: [[ld9:%.*]] = load i32, i32* [[adr9]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load i32, i32* [[adr7]]
+  // CHECK: [[res7:%.*]] = and i32 [[ld7]], [[ld9]]
+  // CHECK: store i32 [[res7]], i32* [[adr7]]
+  things[7] &= things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 10
+  // CHECK: [[ld10:%.*]] = load i32, i32* [[adr10]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x i32], [11 x i32]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load i32, i32* [[adr8]]
+  // CHECK: [[res8:%.*]] = xor i32 [[ld8]], [[ld10]]
+  // CHECK: store i32 [[res8]], i32* [[adr8]]
+  things[8] ^= things[10];
+
+  // CHECK: ret void
+}
diff --git a/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl
new file mode 100644
index 0000000000..2c2ef01b8a
--- /dev/null
+++ b/tools/clang/test/CodeGenDXIL/passes/longvec-operators.hlsl
@@ -0,0 +1,420 @@
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float     -DNUM=4 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int       -DNUM=7 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=double    -DNUM=16 -DDBL %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=uint64_t  -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=float16_t -DNUM=34 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+// RUN: %dxc -fcgl -HV 2018 -T lib_6_9 -DTYPE=int16_t   -DNUM=129 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL
+
+// Mainly a source for the longvec scalarizer IR test.
+// Serves to verify some codegen as well.
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// CHECK: %"class.RWStructuredBuffer<{{.*}}>" = type { [[TYPE:[a-z0-9]*]] }
+// CHECK: external global {{\[}}[[NUM:[0-9]*]] x %"class.RWStructuredBuffer
+RWStructuredBuffer<TYPE> buf[NUM];
+
+
+// Test assignment operators.
+// CHECK-LABEL: define void @"\01?assignments
+export void assignments(inout vector<TYPE, NUM> things[10]) {
+
+  // CHECK: [[res0:%.*]] =  call [[TYPE]] @"dx.hl.op.ro.[[TYPE]] (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle {{%.*}}, i32 1)
+  // CHECK: [[vec0:%.*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[res0]], i32 0
+  // CHECK: [[res0:%.*]] = shufflevector <[[NUM]] x [[TYPE]]> [[vec0]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[adr0]]
+  things[0] = buf[0].Load(1);
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // CHECK: [[res1:%.*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec5]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[adr1]]
+  things[1] += things[5];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // CHECK: [[vec6:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]]
+  things[2] -= things[6];
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec7]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]]
+  things[3] *= things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]]
+  things[4] /= things[8];
+
+#ifndef DBL
+  // NODBL: [[adr9:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // NODBL: [[vec9:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]]
+  // NODBL: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // NODBL: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // NODBL: [[res5:%.*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]]
+  // NODBL: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]]
+  things[5] %= things[9];
+#endif
+}
+
+// Test arithmetic operators.
+// CHECK-LABEL: define void @"\01?arithmetic
+export vector<TYPE, NUM> arithmetic(inout vector<TYPE, NUM> things[11])[11] {
+  vector<TYPE, NUM> res[11];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: [[res1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]]
+  // CHECK: [[res0:%.*]] = [[SUB]] <[[NUM]] x [[TYPE]]>
+  res[0] = -things[0];
+  res[1] = +things[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[res2:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[adr2]]
+  res[2] = things[1] + things[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[res3:%.*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[adr3]]
+  res[3] = things[2] - things[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[res4:%.*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[adr4]]
+  res[4] = things[3] * things[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // CHECK: [[res5:%.*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[adr5]]
+  res[5] = things[4] / things[5];
+
+#ifndef DBL
+  // NODBL: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5
+  // NODBL: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6
+  // NODBL: [[vec6:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]]
+  // NODBL: [[res6:%.*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  // NODBL: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 6
+  // NODBL: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[adr6]]
+  res[6] = things[5] % things[6];
+#endif
+
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7
+  // CHECK: [[vec7:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr7]]
+  // CHECK: [[res7:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]], <[[TYPE]] [[POS1:(1|1\.0*e\+0*|0xH3C00)]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[adr7]]
+  // This is a post op, so the original value goes into res[].
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 7
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[adr7]]
+  res[7] = things[7]++;
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8
+  // CHECK: [[vec8:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr8]]
+  // CHECK: [[res8:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[adr8]]
+  // This is a post op, so the original value goes into res[].
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 8
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[adr8]]
+  res[8] = things[8]--;
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9
+  // CHECK: [[vec9:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr9]]
+  // CHECK: [[res9:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[adr9]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 9
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[adr9]]
+  res[9] = ++things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10
+  // CHECK: [[vec10:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr10]]
+  // CHECK: [[res10:%.*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[adr10]]
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %res, i32 0, i32 10
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[adr10]]
+  res[10] = --things[10];
+
+  // Memcpy res into return value.
+  // CHECK: [[retptr:%.*]] = bitcast [11 x <[[NUM]] x [[TYPE]]>]* %agg.result to i8*
+  // CHECK: [[resptr:%.*]] = bitcast [11 x <[[NUM]] x [[TYPE]]>]* %res to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]]
+  // CHECK: ret void
+  return res;
+}
+
+// Test logic operators.
+// Only permissable in pre-HLSL2021
+// CHECK-LABEL: define void @"\01?logic
+export vector<bool,NUM> logic(vector<bool,NUM> truth[10], vector<TYPE, NUM> consequences[10])[10] {
+  vector<bool, NUM> res[10];
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr0]]
+  // CHECK: [[bvec0:%.*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer
+  // CHECK: [[bres0:%.*]] = icmp eq <[[NUM]] x i1> [[bvec0]], zeroinitializer
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 0
+  // CHECK: [[res0:%.*]] = zext <[[NUM]] x i1> [[bres0]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[adr0]]
+  res[0] = !truth[0];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr1]]
+  // CHECK: [[bvec1:%.*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr2]]
+  // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[val1:%.*]] = icmp ne <[[NUM]] x i1> [[bvec1]], zeroinitializer
+  // CHECK: [[val2:%.*]] = icmp ne <[[NUM]] x i1> [[bvec2]], zeroinitializer
+  // CHECK: [[bres1:%.*]] = or <[[NUM]] x i1> [[val1]], [[val2]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 1
+  // CHECK: [[res1:%.*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[adr1]]
+  res[1] = truth[1] || truth[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr2]]
+  // CHECK: [[bvec2:%.*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]]
+  // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[val2:%.*]] = icmp ne <[[NUM]] x i1> [[bvec2]], zeroinitializer
+  // CHECK: [[val3:%.*]] = icmp ne <[[NUM]] x i1> [[bvec3]], zeroinitializer
+  // CHECK: [[bres2:%.*]] = and <[[NUM]] x i1> [[val2]], [[val3]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 2
+  // CHECK: [[res2:%.*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[adr2]]
+  res[2] = truth[2] && truth[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]]
+  // CHECK: [[bvec3:%.*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr4]]
+  // CHECK: [[bvec4:%.*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr5]]
+  // CHECK: [[bvec5:%.*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer
+  // CHECK: [[bres3:%.*]] = select <[[NUM]] x i1> [[bvec3]], <[[NUM]] x i1> [[bvec4]], <[[NUM]] x i1> [[bvec5]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 3
+  // CHECK: [[res3:%.*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[adr3]]
+  res[3] = truth[3] ? truth[4] : truth[5];
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 0
+  // CHECK: [[vec0:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]]
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // CHECK: [[bres4:%.*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 4
+  // CHECK: [[res4:%.*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[adr4]]
+  res[4] = consequences[0] == consequences[1];
+
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1
+  // CHECK: [[vec1:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr1]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[bres5:%.*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 5
+  // CHECK: [[res5:%.*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[adr5]]
+  res[5] = consequences[1] != consequences[2];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2
+  // CHECK: [[vec2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[bres6:%.*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 6
+  // CHECK: [[res6:%.*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[adr6]]
+  res[6] = consequences[2] <  consequences[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3
+  // CHECK: [[vec3:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[bres7:%.*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 7
+  // CHECK: [[res7:%.*]] = zext <[[NUM]] x i1> [[bres7]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[adr7]]
+  res[7] = consequences[3] >  consequences[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4
+  // CHECK: [[vec4:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // CHECK: [[bres8:%.*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 8
+  // CHECK: [[res8:%.*]] = zext <[[NUM]] x i1> [[bres8]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[adr8]]
+  res[8] = consequences[4] <= consequences[5];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5
+  // CHECK: [[vec5:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 6
+  // CHECK: [[vec6:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr6]]
+  // CHECK: [[bres9:%.*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]]
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %res, i32 0, i32 9
+  // CHECK: [[res9:%.*]] = zext <[[NUM]] x i1> [[bres9]] to <[[NUM]] x i32>
+  // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[adr9]]
+  res[9] = consequences[5] >= consequences[6];
+
+  // Memcpy res into return value.
+  // CHECK: [[retptr:%.*]] = bitcast [10 x <[[NUM]] x i32>]* %agg.result to i8*
+  // CHECK: [[resptr:%.*]] = bitcast [10 x <[[NUM]] x i32>]* %res to i8*
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[retptr]], i8* [[resptr]]
+  // CHECK: ret void
+  return res;
+}
+
+static const int Ix = 2;
+
+// Test indexing operators
+// CHECK-LABEL: define void @"\01?index
+export vector<TYPE, NUM> index(vector<TYPE, NUM> things[10], int i)[10] {
+  // CHECK: [[res:%.*]] = alloca [10 x <[[NUM]] x [[TYPE]]>]
+  // CHECK: store i32 %i, i32* [[iadd:%.[0-9]*]]
+  vector<TYPE, NUM> res[10];
+
+  // CHECK: [[res0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 0
+  // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[res0]]
+  res[0] = 0;
+
+  // CHECK: [[i:%.*]] = load i32, i32* [[iadd]]
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 [[i]]
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(1|1\.0*e\+0*|0xH3C00).*}}, <[[NUM]] x [[TYPE]]>* [[adri]]
+  res[i] = 1;
+
+  // CHECK: [[res2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 2
+  // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(2|2\.0*e\+0*|0xH4000).*}}, <[[NUM]] x [[TYPE]]>* [[res2]]
+  res[Ix] = 2;
+
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0
+  // CHECK: [[thg0:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr0]]
+  // CHECK: [[res3:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 3
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thg0]], <[[NUM]] x [[TYPE]]>* [[res3]]
+  res[3] = things[0];
+
+  // CHECK: [[i:%.*]] = load i32, i32* [[iadd]]
+  // CHECK: [[adri:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 [[i]]
+  // CHECK: [[thgi:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adri]]
+  // CHECK: [[res4:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 4
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thgi]], <[[NUM]] x [[TYPE]]>* [[res4]]
+  res[4] = things[i];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2
+  // CHECK: [[thg2:%.*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[adr2]]
+  // CHECK: [[res5:%.*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 5
+  // CHECK: store <[[NUM]] x [[TYPE]]> [[thg2]], <[[NUM]] x [[TYPE]]>* [[res5]]
+  res[5] = things[Ix];
+  // CHECK: ret void
+  return res;
+}
+
+// Test bit twiddling operators.
+// INT-LABEL: define void @"\01?bittwiddlers
+export void bittwiddlers(inout vector<uint, NUM> things[11]) {
+  // CHECK: [[adr1:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 1
+  // CHECK: [[ld1:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr1]]
+  // CHECK: [[res1:%.*]] = xor <[[NUM]] x i32> [[ld1]], <i32 -1
+  // CHECK: [[adr0:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 0
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[adr0]]
+  things[0] = ~things[1];
+
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 2
+  // CHECK: [[ld2:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr2]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]]
+  // CHECK: [[res1:%.*]] = or <[[NUM]] x i32> [[ld2]], [[ld3]]
+  // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[adr1]]
+  things[1] = things[2] | things[3];
+
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 3
+  // CHECK: [[ld3:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr3]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr4]]
+  // CHECK: [[res2:%.*]] = and <[[NUM]] x i32> [[ld3]], [[ld4]]
+  // CHECK: [[adr2:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 2
+  // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[adr2]]
+  things[2] = things[3] & things[4];
+
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 4
+  // CHECK: [[ld4:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr4]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr5]]
+  // CHECK: [[res3:%.*]] = xor <[[NUM]] x i32> [[ld4]], [[ld5]]
+  // CHECK: [[adr3:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 3
+  // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[adr3]]
+  things[3] = things[4] ^ things[5];
+
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: [[ld5:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr5]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr6]]
+  // CHECK: [[shv6:%.*]] = and <[[NUM]] x i32> [[ld6]], <i32 31
+  // CHECK: [[res4:%.*]] = shl <[[NUM]] x i32> [[ld5]], [[shv6]]
+  // CHECK: [[adr4:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 4
+  // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[adr4]]
+  things[4] = things[5] << things[6];
+
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr6]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr7]]
+  // CHECK: [[shv7:%.*]] = and <[[NUM]] x i32> [[ld7]], <i32 31
+  // CHECK: [[res5:%.*]] = lshr <[[NUM]] x i32> [[ld6]], [[shv7]]
+  // CHECK: [[adr5:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 5
+  // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[adr5]]
+  things[5] = things[6] >> things[7];
+
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr8]]
+  // CHECK: [[adr6:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 6
+  // CHECK: [[ld6:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr6]]
+  // CHECK: [[res6:%.*]] = or <[[NUM]] x i32> [[ld6]], [[ld8]]
+  // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[adr6]]
+  things[6] |= things[8];
+
+  // CHECK: [[adr9:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 9
+  // CHECK: [[ld9:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr9]]
+  // CHECK: [[adr7:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 7
+  // CHECK: [[ld7:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr7]]
+  // CHECK: [[res7:%.*]] = and <[[NUM]] x i32> [[ld7]], [[ld9]]
+  // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[adr7]]
+  things[7] &= things[9];
+
+  // CHECK: [[adr10:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 10
+  // CHECK: [[ld10:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr10]]
+  // CHECK: [[adr8:%.*]] = getelementptr inbounds [11 x <[[NUM]] x i32>], [11 x <[[NUM]] x i32>]* %things, i32 0, i32 8
+  // CHECK: [[ld8:%.*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[adr8]]
+  // CHECK: [[res8:%.*]] = xor <[[NUM]] x i32> [[ld8]], [[ld10]]
+  // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[adr8]]
+  things[8] ^= things[10];
+
+  // CHECK: ret void
+}
diff --git a/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv b/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv
index 3b0c060a0d..9d915a84f2 100644
--- a/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv
+++ b/tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv
@@ -96,7 +96,6 @@ DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input,
 // CHECK-NEXT:                OpDecorate %in_var_TANVCORNER Patch
 // CHECK-NEXT:                OpDecorate %in_var_TANWEIGHTS Patch
 // CHECK-NEXT:                OpDecorate %gl_TessCoord BuiltIn TessCoord
-// CHECK-NEXT:                OpDecorate %gl_TessCoord Patch
 // CHECK-NEXT:                OpDecorate %gl_Position BuiltIn Position
 // CHECK-NEXT:                OpDecorate %in_var_BEZIERPOS Location 0
 // CHECK-NEXT:                OpDecorate %in_var_TANGENT Location 1
diff --git a/tools/clang/test/CodeGenSPIRV/cast.to.void.hlsl b/tools/clang/test/CodeGenSPIRV/cast.to.void.hlsl
new file mode 100644
index 0000000000..19a37d071c
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/cast.to.void.hlsl
@@ -0,0 +1,18 @@
+// RUN: %dxc dxc -T cs_6_6 -E Main -spirv %s -fcgl | FileCheck %s
+
+
+// Make sure no code is generated for the cast to void.
+
+// CHECK: %src_Main = OpFunction %void None
+// CHECK-NEXT: OpLabel
+// CHECK-NEXT: %x = OpVariable
+// CHECK-NEXT: OpStore %x %false
+// CHECK-NEXT: OpReturn
+// CHECK-NEXT: OpFunctionEnd
+
+[numthreads(1, 1, 1)]
+void Main()
+{
+    bool x = false;
+    (void)x;
+}
diff --git a/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl b/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl
index 3ec0ad447e..8d0195d672 100644
--- a/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cs.groupshared.function-param.out.hlsl
@@ -28,14 +28,10 @@ groupshared S D;
 [numthreads(1,1,1)]
 void main() {
 // CHECK: %E = OpVariable %_ptr_Function_int Function
-// CHECK-NEXT: [[TempVar:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function
-
   int E;
 
 // CHECK:        [[A:%[0-9]+]] = OpAccessChain %_ptr_Uniform_int %A %int_0 %uint_0
-// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %int [[A]]
-// CHECK-NEXT: OpStore [[TempVar]] [[ld]]
-// CHECK-NEXT:     {{%[0-9]+}} = OpFunctionCall %void %foo [[TempVar]] %B %C %D %E
+// CHECK-NEXT:     {{%[0-9]+}} = OpFunctionCall %void %foo [[A]] %B %C %D %E
   foo(A[0], B, C, D, E);
   A[0] = A[0] | B | C | D.a | E;
 }
diff --git a/tools/clang/test/CodeGenSPIRV/ddx.compute.khr.hlsl b/tools/clang/test/CodeGenSPIRV/ddx.compute.khr.hlsl
new file mode 100644
index 0000000000..9e2246e6a5
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/ddx.compute.khr.hlsl
@@ -0,0 +1,29 @@
+// RUN: %dxc -T cs_6_6 -E main -fspv-extension=SPV_KHR_compute_shader_derivatives -fcgl  %s -spirv  2>&1 | FileCheck %s
+
+// CHECK: OpCapability ComputeDerivativeGroupQuadsKHR
+// CHECK: OpExtension "SPV_KHR_compute_shader_derivatives"
+// CHECK: OpExecutionMode %main DerivativeGroupQuadsKHR
+
+
+SamplerState ss : register(s2);
+SamplerComparisonState scs;
+
+RWStructuredBuffer<uint> o;
+Texture1D        <float>  t1;
+
+[numthreads(2,2,1)]
+void main(uint3 id : SV_GroupThreadID)
+{
+    // CHECK: OpDPdx %float %float_0_5
+    o[0] = ddx(0.5);
+    // CHECK: OpDPdxCoarse %float %float_0_5
+    o[1] = ddx_coarse(0.5);
+    // CHECK: OpDPdy %float %float_0_5
+    o[2] = ddy(0.5);
+    // CHECK: OpDPdyCoarse %float %float_0_5
+    o[3] = ddy_coarse(0.5);
+    // CHECK: OpDPdxFine %float %float_0_5
+    o[4] = ddx_fine(0.5);
+    // CHECK: OpDPdyFine %float %float_0_5
+    o[5] = ddy_fine(0.5);
+}
\ No newline at end of file
diff --git a/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl b/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl
index a8578f7377..5815981057 100644
--- a/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/decoration.coherent.hlsl
@@ -1,5 +1,5 @@
 // RUN: %dxc -T ps_6_0 -E main -fcgl  %s -spirv | FileCheck %s -check-prefix=GLSL450
-// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model %s -spirv | FileCheck %s -check-prefix=VULKAN
+// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model -fspv-target-env=vulkan1.1 %s -spirv | FileCheck %s -check-prefix=VULKAN
 
 // When the GLSL450 memory model is used, there should be no memory operands on the loads and stores.
 // When the Vulkan memory model is used, there should be no decorations. There should be memory operands on the loads and stores instead.
diff --git a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl
index 70bf50abc6..dba7cd00ce 100644
--- a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-compute.hlsl
@@ -7,19 +7,19 @@ float4 foo(inout float f0, inout int f1)
     return 0;
 }
 
-// CHECK-DAG: [[s39:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function
-// CHECK-DAG: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function
+// CHECK: [[s39:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function
+// CHECK: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function
 // CHECK: [[s33:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Uniform_float {{%[a-zA-Z0-9_]+}} %int_0
+// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int {{%[a-zA-Z0-9_]+}} %int_1
 // CHECK: [[s37:%[a-zA-Z0-9_]+]] = OpLoad %float [[s33]]
 // CHECK:                OpStore [[s36]] [[s37]]
-// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int {{%[a-zA-Z0-9_]+}} %int_1
 // CHECK: [[s40:%[a-zA-Z0-9_]+]] = OpLoad %int [[s34]]
 // CHECK:                OpStore [[s39]] [[s40]]
 // CHECK: {{%[a-zA-Z0-9_]+}} = OpFunctionCall %v4float %foo [[s36]] [[s39]]
-// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]]
-// CHECK:                OpStore [[s33]] [[s38]]
 // CHECK: [[s41:%[a-zA-Z0-9_]+]] = OpLoad %int [[s39]]
 // CHECK:                OpStore [[s34]] [[s41]]
+// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]]
+// CHECK:                OpStore [[s33]] [[s38]]
 
 struct Stru {
   int x;
diff --git a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl
index 6acd104aa3..5977fc454a 100644
--- a/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/fn.fixfuncall-linkage.hlsl
@@ -6,19 +6,19 @@ RWStructuredBuffer< float4 > output : register(u1);
 
 // CHECK: OpDecorate %main LinkageAttributes "main" Export
 // CHECK: %main = OpFunction %int None
-// CHECK: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function
 // CHECK: [[s39:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_int Function
+// CHECK: [[s36:%[a-zA-Z0-9_]+]] = OpVariable %_ptr_Function_float Function
 // CHECK: [[s33:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_StorageBuffer_float {{%[a-zA-Z0-9_]+}} %int_0
+// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int %stru %int_1
 // CHECK: [[s37:%[a-zA-Z0-9_]+]] = OpLoad %float [[s33]]
 // CHECK:                OpStore [[s36]] [[s37]]
-// CHECK: [[s34:%[a-zA-Z0-9_]+]] = OpAccessChain %_ptr_Function_int %stru %int_1
 // CHECK: [[s40:%[a-zA-Z0-9_]+]] = OpLoad %int [[s34]]
 // CHECK:                OpStore [[s39]] [[s40]]
 // CHECK: {{%[a-zA-Z0-9_]+}} = OpFunctionCall %void %func [[s36]] [[s39]]
-// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]]
-// CHECK:                OpStore [[s33]] [[s38]]
 // CHECK: [[s41:%[a-zA-Z0-9_]+]] = OpLoad %int [[s39]]
 // CHECK:                OpStore [[s34]] [[s41]]
+// CHECK: [[s38:%[a-zA-Z0-9_]+]] = OpLoad %float [[s36]]
+// CHECK:                OpStore [[s33]] [[s38]]
 
 [noinline]
 void func(inout float f0, inout int f1) {
diff --git a/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl b/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl
index 4d75d27fa8..d0e771e834 100644
--- a/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl
@@ -11,13 +11,10 @@ void main(float input : INPUT) {
 // CHECK: %param_var_a = OpVariable %_ptr_Function_float Function
 
 // CHECK: [[val:%[0-9]+]] = OpLoad %float %input
-// CHECK:                   OpStore %param_var_a [[val]]
+// CHECK:                OpStore %param_var_a [[val]]
 // CHECK:  [[p0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_float %Data %int_0 %uint_0
-// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %float [[p0]]
-// CHECK-NEXT: OpStore [[temp0:%[a-zA-Z0-9_]+]] [[ld]]
 // CHECK:  [[p1:%[0-9]+]] = OpAccessChain %_ptr_Uniform_float %Data %int_0 %uint_1
-// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %float %32
-// CHECK-NEXT: OpStore [[temp1:%[a-zA-Z0-9_]+]] [[ld]]
-// CHECK:                OpFunctionCall %void %foo %param_var_a [[temp0]] [[temp1]]
+
+// CHECK:                OpFunctionCall %void %foo %param_var_a [[p0]] [[p1]]
     foo(input, Data[0], Data[1]);
 }
diff --git a/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl b/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl
index 5641923aaa..bda2183057 100644
--- a/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl
@@ -18,9 +18,7 @@ float4 main() : C {
 
     float4 val;
 // CHECK:    [[z_ptr:%[0-9]+]] = OpAccessChain %_ptr_Function_float %val %int_2
-// CHECK:       [[ld:%[0-9]+]] = OpLoad %float [[z_ptr]]
-// CHECK:                        OpStore %param_var_w [[ld]]
-// CHECK:          {{%[0-9]+}} = OpFunctionCall %void %bar %val %param_var_y %param_var_z %param_var_w
+// CHECK:          {{%[0-9]+}} = OpFunctionCall %void %bar %val %param_var_y %param_var_z [[z_ptr]]
 // CHECK-NEXT:   [[y:%[0-9]+]] = OpLoad %v3float %param_var_y
 // CHECK-NEXT: [[old:%[0-9]+]] = OpLoad %v4float %val
     // Write to val.zwx:
@@ -39,10 +37,6 @@ float4 main() : C {
 // CHECK-NEXT: [[old_0:%[0-9]+]] = OpLoad %v4float %val
 // CHECK-NEXT: [[new_0:%[0-9]+]] = OpVectorShuffle %v4float [[old_0]] [[z]] 4 5 2 3
 // CHECK-NEXT:                OpStore %val [[new_0]]
-    // Write to val.z:
-// CHECK-NEXT: [[new:%[0-9]+]] = OpLoad %float %param_var_w
-// CHECK-NEXT:                   OpStore [[z_ptr]] [[new]]
-
     bar(val, val.zwx, val.xy, val.z);
 
     return MyRWBuffer[0];
diff --git a/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl b/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl
index 3f890099f5..a4ad925f77 100644
--- a/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl
@@ -62,11 +62,7 @@ void main() {
   fn.incr();
 
 // CHECK:      [[rwsb_0:%[0-9]+]] = OpAccessChain %_ptr_Uniform_R %rwsb %int_0 %uint_0
-// CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad %R [[rwsb_0]]
-// CHECK-NEXT: [[ex:%[0-9]+]] = OpCompositeExtract %int [[ld]] 0
-// CHECK-NEXT: [[v:%[0-9]+]] = OpCompositeConstruct %R_0 [[ex]]
-// CHECK-NEXT: OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[v]]
-// CHECK-NEXT: {{%[0-9]+}} = OpFunctionCall %void %decr [[TempVar]]
+// CHECK-NEXT:      {{%[0-9]+}} = OpFunctionCall %void %decr [[rwsb_0]]
   decr(rwsb[0]);
 
 // CHECK: OpFunctionCall %void %decr2 %gs
@@ -91,29 +87,21 @@ void main() {
   fnarr[0].incr();
 
 // CHECK:      [[gsarr_0:%[0-9]+]] = OpAccessChain %_ptr_Workgroup_S %gsarr %int_0
-// CHECK:           [[ld:%[0-9]+]] = OpLoad %S [[gsarr_0]]
-// CHECK:                            OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[ld]]
-// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %decr2 [[TempVar]]
+// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %decr2 [[gsarr_0]]
   decr2(gsarr[0]);
 
 // CHECK:      [[starr_0:%[0-9]+]] = OpAccessChain %_ptr_Private_S %starr %int_0
-// CHECK:           [[ld:%[0-9]+]] = OpLoad %S [[starr_0]]
-// CHECK:                            OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[ld]]
-// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %decr2 [[TempVar]]
+// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %decr2 [[starr_0]]
   decr2(starr[0]);
 
 // CHECK:      [[fnarr_0:%[0-9]+]] = OpAccessChain %_ptr_Function_S %fnarr %int_0
-// CHECK:           [[ld:%[0-9]+]] = OpLoad %S [[fnarr_0]]
-// CHECK:                            OpStore [[TempVar:%[a-zA-Z0-9_]+]] [[ld]]
-// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %decr2 [[TempVar]]
+// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %decr2 [[fnarr_0]]
   decr2(fnarr[0]);
 
 // CHECK:        [[arr:%[0-9]+]] = OpAccessChain %_ptr_Function_int %arr %int_0
 // CHECK-NEXT: [[arr_0:%[0-9]+]] = OpLoad %int [[arr]]
 // CHECK-NEXT: [[arr_1:%[0-9]+]] = OpIAdd %int [[arr_0]] %int_1
-// CHECK-NEXT:                     OpStore [[arr]] [[arr_1]]
-// CHECK-NEXT:    [[ld:%[0-9]+]] = OpLoad %int [[arr]]
-// CHECK-NEXT:                     OpStore [[TempVar:%[0-9a-zA-Z_]+]] [[ld]]
-// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %int_decr [[TempVar]]
+// CHECK-NEXT:                  OpStore [[arr]] [[arr_1]]
+// CHECK-NEXT:       {{%[0-9]+}} = OpFunctionCall %void %int_decr [[arr]]
   int_decr(++arr[0]);
 }
diff --git a/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl b/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl
index e9a1813f31..a0b2ab7207 100644
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.ps.hlsl
@@ -1,5 +1,5 @@
 // RUN: %dxc -T ps_6_0 -E main -fcgl  %s -spirv | FileCheck %s -check-prefix=CHECK -check-prefix=GLSL450
-// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model %s -spirv | FileCheck %s -check-prefix=CHECK -check-prefix=VULKAN
+// RUN: %dxc -T ps_6_0 -E main -fcgl -fspv-use-vulkan-memory-model -fspv-target-env=vulkan1.1 %s -spirv | FileCheck %s -check-prefix=CHECK -check-prefix=VULKAN
 
 RWTexture1D <int>   g_tTex1di1;
 RWTexture1D <uint>  g_tTex1du1;
diff --git a/tools/clang/test/CodeGenSPIRV/lib.fn.export.with.entrypoint.hlsl b/tools/clang/test/CodeGenSPIRV/lib.fn.export.with.entrypoint.hlsl
new file mode 100644
index 0000000000..0ab965aded
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/lib.fn.export.with.entrypoint.hlsl
@@ -0,0 +1,19 @@
+// RUN: %dxc -T lib_6_6 -E main -fspv-target-env=universal1.5 -fcgl  %s -spirv | FileCheck %s
+
+// CHECK: OpEntryPoint MissKHR %miss "miss" %payload
+// CHECK: OpDecorate %func LinkageAttributes "func" Export
+
+
+struct RayPayload
+{
+    uint a;
+};
+
+export void func()
+{
+}
+
+[shader("miss")]
+void miss(inout RayPayload payload)
+{
+}
diff --git a/tools/clang/test/CodeGenSPIRV/meshshading.nv.triangle.indices.out.hlsl b/tools/clang/test/CodeGenSPIRV/meshshading.nv.triangle.indices.out.hlsl
new file mode 100644
index 0000000000..05d9d8fb1c
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/meshshading.nv.triangle.indices.out.hlsl
@@ -0,0 +1,65 @@
+// RUN: %dxc -T ms_6_5 -E outie -fcgl  %s -spirv | FileCheck %s
+// RUN: %dxc -T ms_6_5 -E innie -fcgl  %s -spirv | FileCheck %s
+
+// CHECK-DAG: [[v4_n05_05_0_1:%[0-9]+]] = OpConstantComposite %v4float %float_n0_5 %float_0_5 %float_0 %float_1
+// CHECK-DAG:  [[v4_05_05_0_1:%[0-9]+]] = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0 %float_1
+// CHECK-DAG:  [[v4_0_n05_0_1:%[0-9]+]] = OpConstantComposite %v4float %float_0 %float_n0_5 %float_0 %float_1
+// CHECK-DAG:  [[v3_1_0_0:%[0-9]+]] = OpConstantComposite %v3float %float_1 %float_0 %float_0
+// CHECK-DAG:  [[v3_0_1_0:%[0-9]+]] = OpConstantComposite %v3float %float_0 %float_1 %float_0
+// CHECK-DAG:  [[v3_0_0_1:%[0-9]+]] = OpConstantComposite %v3float %float_0 %float_0 %float_1
+// CHECK-DAG:  [[u3_0_1_2:%[0-9]+]] = OpConstantComposite %v3uint %uint_0 %uint_1 %uint_2
+
+// CHECK-DAG:  OpDecorate [[indices:%[0-9]+]] BuiltIn PrimitiveIndicesNV
+
+struct MeshOutput {
+  float4 position : SV_Position;
+  float3 color : COLOR0;
+};
+
+[outputtopology("triangle")]
+[numthreads(1, 1, 1)]
+void innie(out indices uint3 triangles[1], out vertices MeshOutput verts[3]) {
+    SetMeshOutputCounts(3, 2);
+
+    triangles[0] = uint3(0, 1, 2);
+// CHECK: [[off:%[0-9]+]] = OpIMul %uint %uint_0 %uint_3
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_uint [[indices]] [[off]]
+// CHECK: [[tmp:%[0-9]+]] = OpCompositeExtract %uint [[u3_0_1_2]] 0
+// CHECK:                   OpStore [[ptr]] [[tmp]]
+// CHECK: [[idx:%[0-9]+]] = OpIAdd %uint [[off]] %uint_1
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_uint [[indices]] [[idx]]
+// CHECK: [[tmp:%[0-9]+]] = OpCompositeExtract %uint [[u3_0_1_2]] 1
+// CHECK:                   OpStore [[ptr]] [[tmp]]
+// CHECK: [[idx:%[0-9]+]] = OpIAdd %uint [[off]] %uint_2
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_uint [[indices]] [[idx]]
+// CHECK: [[tmp:%[0-9]+]] = OpCompositeExtract %uint [[u3_0_1_2]] 2
+// CHECK:                   OpStore [[ptr]] [[tmp]]
+
+    verts[0].position = float4(-0.5, 0.5, 0.0, 1.0);
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v4float %gl_Position %int_0
+// CHECK:                   OpStore [[ptr]] [[v4_n05_05_0_1]]
+    verts[0].color = float3(1.0, 0.0, 0.0);
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v3float %out_var_COLOR0 %int_0
+// CHECK:                   OpStore [[ptr]] [[v3_1_0_0]]
+
+    verts[1].position = float4(0.5, 0.5, 0.0, 1.0);
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v4float %gl_Position %int_1
+// CHECK:                   OpStore [[ptr]] [[v4_05_05_0_1]]
+    verts[1].color = float3(0.0, 1.0, 0.0);
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v3float %out_var_COLOR0 %int_1
+// CHECK:                   OpStore [[ptr]] [[v3_0_1_0]]
+
+    verts[2].position = float4(0.0, -0.5, 0.0, 1.0);
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v4float %gl_Position %int_2
+// CHECK:                   OpStore [[ptr]] [[v4_0_n05_0_1]]
+    verts[2].color = float3(0.0, 0.0, 1.0);
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Output_v3float %out_var_COLOR0 %int_2
+// CHECK:                   OpStore [[ptr]] [[v3_0_0_1]]
+
+}
+
+[outputtopology("triangle")]
+[numthreads(1, 1, 1)]
+void outie(out indices uint3 triangles[1], out vertices MeshOutput verts[3]) {
+	innie(triangles, verts);
+}
diff --git a/tools/clang/test/CodeGenSPIRV/op.struct.access.bitfield.sized.rvalue.hlsl b/tools/clang/test/CodeGenSPIRV/op.struct.access.bitfield.sized.rvalue.hlsl
new file mode 100644
index 0000000000..414d8a638c
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/op.struct.access.bitfield.sized.rvalue.hlsl
@@ -0,0 +1,22 @@
+// RUN: %dxc -T cs_6_2 -E main -spirv -fcgl -enable-16bit-types %s | FileCheck %s
+
+struct S1
+{
+  uint16_t a : 8;
+};
+
+S1 foo()
+{
+  return (S1)0;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  uint16_t test = foo().a;
+// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Function_ushort %temp_var_S1 %int_0
+// CHECK: [[raw:%[0-9]+]] = OpLoad %ushort [[ptr]]
+// CHECK: [[tmp:%[0-9]+]] = OpShiftLeftLogical %ushort [[raw]] %uint_8
+// CHECK: [[out:%[0-9]+]] = OpShiftRightLogical %ushort [[tmp]] %uint_8
+// CHECK-NOT:               OpLoad %ushort [[out]]
+// CHECK:                   OpStore %test [[out]]
+}
diff --git a/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl b/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl
index 5e4049f8c3..391e09a428 100644
--- a/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/semantic.domain-location.ds.hlsl
@@ -4,7 +4,6 @@
 // CHECK-SAME: %gl_TessCoord
 
 // CHECK: OpDecorate %gl_TessCoord BuiltIn TessCoord
-// CHECK: OpDecorate %gl_TessCoord Patch
 
 // CHECK: %gl_TessCoord = OpVariable %_ptr_Input_v3float Input
 
diff --git a/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl b/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl
new file mode 100644
index 0000000000..fb9f6e0d76
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/sm6.quad-any-all.hlsl
@@ -0,0 +1,41 @@
+// RUN: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.1 -fcgl  %s -spirv | FileCheck %s --check-prefixes=CHECK,QUAD
+// RUN: %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.1 -fspv-extension=SPV_KHR_16bit_storage -fcgl  %s -spirv | FileCheck %s --check-prefixes=CHECK,NOQUAD
+// RUN: not %dxc -T cs_6_0 -E main -fspv-target-env=vulkan1.0 -fcgl  %s -spirv 2>&1 | FileCheck %s --check-prefixes=ERROR
+
+// CHECK: ; Version: 1.3
+
+// QUAD: OpCapability QuadControlKHR
+// QUAD: OpExtension "SPV_KHR_quad_control"
+
+RWStructuredBuffer<float3> values;
+
+[numthreads(32, 1, 1)]
+void main(uint3 id: SV_DispatchThreadID) {
+  uint outIdx = (id.y * 8) + id.x;
+
+// CHECK:        [[val1:%[0-9]+]] = OpIEqual %bool {{%[0-9]+}}
+// QUAD-NEXT:         {{%[0-9]+}} = OpGroupNonUniformQuadAnyKHR %bool [[val1]]
+
+// NOQUAD-NEXT: [[inv0:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_0
+// NOQUAD-NEXT:  [[or0:%[0-9]+]] = OpLogicalOr %bool [[val1]] [[inv0]]
+// NOQUAD-NEXT: [[inv1:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_1
+// NOQUAD-NEXT:  [[or1:%[0-9]+]] = OpLogicalOr %bool [[or0]] [[inv1]]
+// NOQUAD-NEXT: [[inv2:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val1]] %uint_2
+// NOQUAD-NEXT:  [[or2:%[0-9]+]] = OpLogicalOr %bool [[or1]] [[inv2]]
+
+// ERROR: 27:24: error: Vulkan 1.1 is required for Wave Operation but not permitted to use
+    values[outIdx].x = QuadAny(outIdx % 4 == 0) ? 1.0 : 2.0;
+
+// CHECK:        [[val2:%[0-9]+]] = OpIEqual %bool {{%[0-9]+}}
+// QUAD-NEXT:         {{%[0-9]+}} = OpGroupNonUniformQuadAllKHR %bool [[val2]]
+
+// NOQUAD-NEXT: [[inv0:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_0
+// NOQUAD-NEXT:  [[or0:%[0-9]+]] = OpLogicalAnd %bool [[val2]] [[inv0]]
+// NOQUAD-NEXT: [[inv1:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_1
+// NOQUAD-NEXT:  [[or1:%[0-9]+]] = OpLogicalAnd %bool [[or0]] [[inv1]]
+// NOQUAD-NEXT: [[inv2:%[0-9]+]] = OpGroupNonUniformQuadSwap %bool %uint_3 [[val2]] %uint_2
+// NOQUAD-NEXT:  [[or2:%[0-9]+]] = OpLogicalAnd %bool [[or1]] [[inv2]]
+
+// ERROR: 40:24: error: Vulkan 1.1 is required for Wave Operation but not permitted to use
+    values[outIdx].y = QuadAll(outIdx % 2 == 0) ? 3.0 : 4.0;
+}
diff --git a/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl b/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl
index a8fe81e021..6f073aeb46 100644
--- a/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/spirv.interface.ds.hlsl
@@ -85,7 +85,6 @@ struct DsOut {
 // CHECK: OpDecorateString %gl_PointSize UserSemantic "PSIZE"
 // CHECK: OpDecorate %gl_TessCoord BuiltIn TessCoord
 // CHECK: OpDecorateString %gl_TessCoord UserSemantic "SV_DomainLocation"
-// CHECK: OpDecorate %gl_TessCoord Patch
 // CHECK: OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
 // CHECK: OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor"
 // CHECK: OpDecorate %gl_TessLevelOuter Patch
diff --git a/tools/clang/test/CodeGenSPIRV/texture.calculate.lod.compute.linear.khr.hlsl b/tools/clang/test/CodeGenSPIRV/texture.calculate.lod.compute.linear.khr.hlsl
new file mode 100644
index 0000000000..23f52ad4b5
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/texture.calculate.lod.compute.linear.khr.hlsl
@@ -0,0 +1,23 @@
+// RUN: %dxc -T cs_6_6 -E main -fspv-extension=SPV_KHR_compute_shader_derivatives -fcgl  %s -spirv  2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %dxc -T cs_6_6 -E main -fspv-extension=SPV_KHR_compute_shader_derivatives %s -spirv  2>&1 | FileCheck %s --check-prefix=CHECK
+
+// CHECK: OpCapability ComputeDerivativeGroupLinearKHR
+// CHECK: OpExtension "SPV_KHR_compute_shader_derivatives"
+// CHECK: OpExecutionMode %main DerivativeGroupLinearKHR
+
+SamplerState ss : register(s2);
+SamplerComparisonState scs;
+
+RWStructuredBuffer<uint> o;
+Texture1D        <float>  t1;
+
+[numthreads(16,1,1)]
+void main(uint3 id : SV_GroupThreadID)
+{
+    //CHECK:          [[t1:%[0-9]+]] = OpLoad %type_1d_image %t1
+    //CHECK-NEXT:    [[ss1:%[0-9]+]] = OpLoad %type_sampler %ss
+    //CHECK-NEXT:    [[si1:%[0-9]+]] = OpSampledImage %type_sampled_image [[t1]] [[ss1]]
+    //CHECK-NEXT: [[query1:%[0-9]+]] = OpImageQueryLod %v2float [[si1]] %float_0_5
+    //CHECK-NEXT:        {{%[0-9]+}} = OpCompositeExtract %float [[query1]] 0
+    o[0] = t1.CalculateLevelOfDetail(ss, 0.5);
+}
diff --git a/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl b/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl
index e5954abae5..99d365b5e2 100644
--- a/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/type.buffer.half.hlsl
@@ -1,6 +1,14 @@
-// RUN: not %dxc -T ps_6_6 -E main -fcgl  %s -spirv -enable-16bit-types  2>&1 | FileCheck %s
+// RUN: not %dxc -T ps_6_6 -E main -fcgl  %s -spirv -enable-16bit-types  2>&1 | FileCheck %s --check-prefix=VK
+// RUN: %dxc -T ps_6_6 -E main -fcgl  %s -spirv -fspv-target-env=universal1.5 -enable-16bit-types  2>&1 | FileCheck %s --check-prefix=UNIVERSAL
 
-// CHECK: error: 16-bit texture types not yet supported with -spirv
+// When targeting Vulkan, A 16-bit floating pointer buffer is not valid.
+// VK: error: The sampled type for textures cannot be a floating point type smaller than 32-bits when targeting a Vulkan environment.
+
+// When not targeting Vulkan, we should generate the 16-bit floating pointer buffer.
+// UNIVERSAL: %half = OpTypeFloat 16
+// UNIVERSAL: %type_buffer_image = OpTypeImage %half Buffer 2 0 0 1 Unknown
+// UNIVERSAL: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image
+// UNIVERSAL: %MyBuffer = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant
 Buffer<half> MyBuffer;
 
 void main(): SV_Target { }
diff --git a/tools/clang/test/CodeGenSPIRV/type.buffer.half4.hlsl b/tools/clang/test/CodeGenSPIRV/type.buffer.half4.hlsl
new file mode 100644
index 0000000000..f29af69c1c
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/type.buffer.half4.hlsl
@@ -0,0 +1,14 @@
+// RUN: not %dxc -T ps_6_6 -E main -fcgl  %s -spirv -enable-16bit-types  2>&1 | FileCheck %s --check-prefix=VK
+// RUN: %dxc -T ps_6_6 -E main -fcgl  %s -spirv -fspv-target-env=universal1.5 -enable-16bit-types  2>&1 | FileCheck %s --check-prefix=UNIVERSAL
+
+// When targeting Vulkan, A 16-bit floating pointer buffer is not valid.
+// VK: error: The sampled type for textures cannot be a floating point type smaller than 32-bits when targeting a Vulkan environment.
+
+// When not targeting Vulkan, we should generate the 16-bit floating pointer buffer.
+// UNIVERSAL: %half = OpTypeFloat 16
+// UNIVERSAL: %type_buffer_image = OpTypeImage %half Buffer 2 0 0 1 Unknown
+// UNIVERSAL: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image
+// UNIVERSAL: %MyBuffer = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant
+Buffer<half4> MyBuffer;
+
+void main(): SV_Target { }
diff --git a/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl b/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl
index 9d226eb962..526bfc002c 100644
--- a/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/vk.binding.global-struct-of-resource.and.array.hlsl
@@ -27,6 +27,7 @@ float4 main() : SV_Target
 // CHECK:   [[x:%[0-9]+]] = OpSampledImage %type_sampled_image [[tex]] [[smp]]
   return Textures[0].Sample(TheStruct.Sampler, float2(0, 0))
 // CHECK: [[tex:%[0-9]+]] = OpLoad %type_2d_image %TheStruct_Texture
+// CHECK: [[smp:%[0-9]+]] = OpLoad %type_sampler %TheStruct_Sampler
 // CHECK:   [[x:%[0-9]+]] = OpSampledImage %type_sampled_image [[tex]] [[smp]]
        + TheStruct.Texture.Sample(TheStruct.Sampler, float2(0, 0));
 }
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl
new file mode 100644
index 0000000000..f0f5c54a16
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl
@@ -0,0 +1,28 @@
+// RUN: %dxc -spirv -E main -T cs_6_7 %s | FileCheck %s
+
+// Bug was causing alignment miss
+
+struct Content {
+  int a;
+};
+
+typedef vk::BufferPointer<Content> BufferContent;
+typedef vk::BufferPointer<BufferContent> BufferBuffer;
+
+RWStructuredBuffer<BufferBuffer> rwbuf;
+
+void foo(BufferContent bc) {
+  bc.Get().a = 1;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  foo(rwbuf[0].Get());
+}
+
+// CHECK: [[L0:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} %{{[_0-9A-Za-z]*}} Aligned 8
+// CHECK: [[L1:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} [[L0]] Aligned 8
+// CHECK: [[L2:%[_0-9A-Za-z]*]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[L1]] %int_0
+// CHECK: OpStore [[L2]] %int_1 Aligned 4
+
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl
new file mode 100644
index 0000000000..fc5b9edad0
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl
@@ -0,0 +1,72 @@
+// RUN: %dxc -spirv -Od -T ps_6_0 -E MainPs %s | FileCheck %s
+
+struct Globals_s
+{
+    float4 g_vSomeConstantA;
+    float4 g_vTestFloat4;
+    float4 g_vSomeConstantB;
+};
+
+typedef vk::BufferPointer<Globals_s> Globals_p;
+
+struct TestPushConstant_t
+{
+    Globals_p m_nBufferDeviceAddress;
+};
+
+[[vk::push_constant]] TestPushConstant_t g_PushConstants;
+
+cbuffer cbuf {
+    [[vk::aliased_pointer]] Globals_p bp;
+}
+
+// CHECK: OpDecorate [[BP0:%[_0-9A-Za-z]*]] AliasedPointer
+// CHECK: OpDecorate [[BP1:%[_0-9A-Za-z]*]] AliasedPointer
+// CHECK: OpDecorate [[BP:%[_0-9A-Za-z]*]] AliasedPointer
+// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32
+// CHECK-DAG: [[F1:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 1
+// CHECK-DAG: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0
+// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4
+// CHECK: [[V4C:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F1]] [[F0]] [[F0]] [[F0]]
+// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK-DAG: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0
+// CHECK-DAG: [[I1:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 1
+// CHECK: [[GS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]]
+// CHECK: [[PGS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GS]]
+// CHECK: [[TT:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGS]]
+// CHECK: [[PTT:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[TT]]
+// CHECK: [[PFV4FLOAT:%[_0-9A-Za-z]*]] = OpTypePointer Function [[V4FLOAT]]
+// CHECK: [[PPGS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGS]]
+// CHECK: [[PBV4FLOAT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]]
+
+void f([[vk::aliased_pointer]] Globals_p bp) {
+}
+
+float4 MainPs(void) : SV_Target0
+{
+    float4 vTest = float4(1.0,0.0,0.0,0.0);
+    [[vk::aliased_pointer]] Globals_p bp0 = Globals_p(g_PushConstants.m_nBufferDeviceAddress);
+    [[vk::aliased_pointer]] Globals_p bp1 = Globals_p(g_PushConstants.m_nBufferDeviceAddress);
+    bp0.Get().g_vTestFloat4 = vTest;
+    f(bp0);
+    return bp1.Get().g_vTestFloat4; // Returns float4(1.0,0.0,0.0,0.0)
+}
+
+// CHECK: [[GP:%[_0-9A-Za-z]*]] = OpVariable [[PTT]] PushConstant
+// CHECK: [[VTEST:%[0-9A-Za-z]*]] = OpVariable [[PFV4FLOAT]] Function
+// CHECK: OpStore [[VTEST]] [[V4C]]
+// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGS]] [[GP]] [[I0]]
+// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad %_ptr_PhysicalStorageBuffer_Globals_s [[X1]]
+// CHECK: OpStore [[BP0]] [[X2]]
+// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGS]] [[GP]] [[I0]]
+// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[X3]]
+// CHECK: OpStore [[BP1]] [[X4]]
+// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[VTEST]]
+// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP0]] Aligned 16
+// CHECK: [[X7:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X6]] [[I1]]
+// CHECK: OpStore [[X7]] [[X5]] Aligned 16
+// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP1]] Aligned 16
+// CHECK: [[X9:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X8]] [[I1]]
+// CHECK: [[X10:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X9]] Aligned 16
+// CHECK: OpReturnValue [[X10]]
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl
new file mode 100644
index 0000000000..992d8b39fd
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl
@@ -0,0 +1,39 @@
+// RUN: %dxc -spirv -fcgl -T ps_6_0 %s | FileCheck %s
+
+struct S {
+  uint u;
+};
+
+typedef vk::BufferPointer<S> BP;
+
+struct PC {
+  BP bp;
+};
+
+[[vk::push_constant]] PC pc;
+
+// CHECK: [[UINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 0
+// CHECK: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0
+// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0
+// CHECK: [[S:%[_0-9A-Za-z]*]] = OpTypeStruct [[UINT]]
+// CHECK: [[PS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[S]]
+// CHECK: [[PU:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[UINT]]
+// CHECK: [[U1:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 1
+// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpVariable %{{[_0-9A-Za-z]*}} PushConstant
+
+void main()
+{
+// CHECK: [[IN:%[_0-9A-Za-z]*]] = OpVariable
+// CHECK: [[OUT:%[_0-9A-Za-z]*]] = OpVariable
+  uint u0, u1;
+
+// CHECK: [[X1:%[_0-9]+]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[PC]] [[I0]]
+// CHECK: [[X2:%[_0-9]+]] = OpLoad [[PS]] [[X1]] Aligned 4
+// CHECK: [[X3:%[_0-9]+]] = OpAccessChain [[PU]] [[X2]] [[I0]]
+// CHECK: [[X4:%[_0-9]+]] = OpLoad [[UINT]] [[IN]]
+// CHECK: [[X5:%[_0-9]+]] = OpAtomicExchange [[UINT]] [[X3]] [[U1]] [[U0]] [[X4]]
+// CHECK: OpStore [[OUT]] [[X5]]
+  InterlockedExchange(pc.bp.Get().u, u0, u1);
+}
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl
new file mode 100644
index 0000000000..86cf48c41e
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error1.hlsl
@@ -0,0 +1,19 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Content {
+  float a;
+};
+
+typedef vk::BufferPointer<Content> BufferContent;
+
+[[vk::push_constant]]
+BufferContent buffer;
+
+[numthreads(1, 1, 1)]
+void main() {
+  float tmp = buffer.Get().a;
+  buffer.Get().a = tmp;
+}
+
+// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl
new file mode 100644
index 0000000000..09585a7664
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error2.hlsl
@@ -0,0 +1,19 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Globals_s {
+  float4 a;
+};
+
+typedef vk::BufferPointer<Globals_s> Globals_p;
+typedef vk::BufferPointer<Globals_p> Globals_pp;
+
+[[vk::push_constant]]
+Globals_pp bda;
+
+[numthreads(1, 1, 1)]
+void main() {
+  float4 r = bda.Get().Get().a;
+}
+
+// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl
new file mode 100644
index 0000000000..e803b5b754
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error3.hlsl
@@ -0,0 +1,19 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Content {
+  uint a;
+};
+
+typedef vk::BufferPointer<uint> BufferContent;
+
+[[vk::push_constant]]
+BufferContent buffer;
+
+[numthreads(1, 1, 1)]
+void main() {
+  uint data = buffer.Get();
+  buffer.Get() = data;
+}
+
+// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl
new file mode 100644
index 0000000000..1029aa7f2e
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error4.hlsl
@@ -0,0 +1,18 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Content {
+  uint a;
+};
+
+typedef vk::BufferPointer<uint> BufferContent;
+
+[[vk::push_constant]]
+BufferContent buffer;
+
+[numthreads(1, 1, 1)]
+void main() {
+  buffer.Get() = 1;
+}
+
+// CHECK: vk::push_constant attribute cannot be used on declarations with vk::BufferPointer type
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl
new file mode 100644
index 0000000000..62bdb7f3cb
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error5.hlsl
@@ -0,0 +1,26 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Content {
+  int a;
+};
+
+typedef vk::BufferPointer<Content> BufferContent;
+typedef vk::BufferPointer<BufferContent> BufferBuffer;
+
+//[[vk::push_constant]]
+//BufferContent buffer;
+
+RWStructuredBuffer<BufferBuffer> rwbuf;
+
+// Wrong type in the parameter.
+void foo(BufferContent bc) {
+  bc.Get().a = 1;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  foo(rwbuf[0]);
+}
+
+// CHECK: no matching function for call to 'foo'
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl
new file mode 100644
index 0000000000..a89b286edf
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.error6.hlsl
@@ -0,0 +1,23 @@
+// RUN: not %dxc -spirv -E main -T cs_6_7 %s 2>&1 | FileCheck %s
+
+struct Content {
+  int a;
+};
+
+typedef vk::BufferPointer<Content> BufferContent;
+typedef vk::BufferPointer<BufferContent> BufferBuffer;
+
+RWStructuredBuffer<BufferContent> buf;
+
+void foo(const BufferContent bc) {
+  bc.Get().a = 1;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  static BufferContent bcs = buf[0];
+  static BufferBuffer bbs = (BufferContent)bcs;
+}
+
+// CHECK: cannot initialize a variable of type 'BufferPointer<BufferContent>' with an lvalue of type 'BufferPointer<Content>'
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl
new file mode 100644
index 0000000000..b44e1eca09
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl
@@ -0,0 +1,46 @@
+// RUN: %dxc -spirv -Od -T cs_6_7 %s | FileCheck %s
+// RUN: %dxc -spirv -Od -T cs_6_7 -DALIGN_16 %s | FileCheck %s
+// RUN: %dxc -spirv -Od -T cs_6_7 -DNO_PC %s | FileCheck %s
+
+// Was getting bogus type errors with the defined changes
+
+#ifdef ALIGN_16
+typedef vk::BufferPointer<uint, 16> BufferType;
+#else
+typedef vk::BufferPointer<uint, 32> BufferType;
+#endif
+#ifndef NO_PC
+struct PushConstantStruct {
+  BufferType push_buffer;
+};
+[[vk::push_constant]] PushConstantStruct push_constant;
+#endif
+
+RWStructuredBuffer<uint> output;
+
+// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK: [[I0:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0
+// CHECK: [[UINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 0
+// CHECK: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0
+// CHECK: [[PPUINT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[UINT]]
+// CHECK: [[PFPPUINT:%[_0-9A-Za-z]*]] = OpTypePointer Function [[PPUINT]]
+// CHECK: [[PUUINT:%[_0-9A-Za-z]*]] = OpTypePointer Uniform [[UINT]]
+// CHECK: [[OUTPUT:%[_0-9A-Za-z]*]] = OpVariable %{{[_0-9A-Za-z]*}} Uniform
+
+[numthreads(1, 1, 1)]
+void main() {
+  uint64_t addr = 123;
+  vk::BufferPointer<uint, 32> test = vk::BufferPointer<uint, 32>(addr);
+  output[0] = test.Get();
+}
+
+// CHECK: [[TEST:%[_0-9A-Za-z]*]] = OpVariable [[PFPPUINT]] Function
+// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpConvertUToPtr [[PPUINT]]
+// CHECK: OpStore [[TEST]] [[X1]]
+// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PPUINT]] [[TEST]] Aligned 32
+// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[UINT]] [[X2]] Aligned 4
+// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpAccessChain [[PUUINT]] [[OUTPUT]] [[I0]] [[U0]]
+// CHECK: OpStore [[X4]] [[X3]]
+// CHECK: OpReturn
+// CHECK: OpFunctionEnd
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl
new file mode 100644
index 0000000000..71fee1a795
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl
@@ -0,0 +1,101 @@
+// RUN: %dxc -spirv -Od -T ps_6_0 -E MainPs %s | FileCheck %s
+
+// CHECK: OpCapability PhysicalStorageBufferAddresses
+// CHECK: OpExtension "SPV_KHR_physical_storage_buffer"
+// CHECK: OpMemoryModel PhysicalStorageBuffer64 GLSL450
+// CHECK: OpEntryPoint Fragment [[MAIN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]]
+
+// Forward declaration
+typedef struct block_s block_t;
+typedef vk::BufferPointer<block_t, 32> block_p;
+
+struct block_s
+{
+      float4 x;
+      block_p next;
+};
+
+struct TestPushConstant_t
+{
+      block_p root;
+};
+
+[[vk::push_constant]] TestPushConstant_t g_PushConstants;
+
+// CHECK: OpDecorate [[GP:%[_0-9A-Za-z]*]] AliasedPointer
+// CHECK: OpDecorate [[COPY1:%[_0-9A-Za-z]*]] RestrictPointer
+// CHECK: OpDecorate [[COPY2:%[_0-9A-Za-z]*]] RestrictPointer
+// CHECK: OpMemberDecorate [[BLOCK:%[_0-9A-Za-z]*]] 1 Offset 16
+// CHECK: OpTypeForwardPointer [[PBLOCK:%[_0-9A-Za-z]*]] PhysicalStorageBuffer
+// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0
+// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1
+// CHECK: [[ULONG:%[_0-9A-Za-z]*]] = OpTypeInt 64 0
+// CHECK: [[UL0:%[_0-9A-Za-z]*]] = OpConstant [[ULONG]] 0
+// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32
+// CHECK: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0
+// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4
+// CHECK: [[CV4FLOAT:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F0]] [[F0]] [[F0]] [[F0]]
+// CHECK: [[BLOCK]] = OpTypeStruct [[V4FLOAT]] [[PBLOCK]]
+// CHECK: [[PBLOCK]] = OpTypePointer PhysicalStorageBuffer [[BLOCK]]
+// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PBLOCK]]
+// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]]
+// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]]
+// CHECK: [[PPBLOCK0:%[_0-9A-Za-z]*]] = OpTypePointer Function %_ptr_PhysicalStorageBuffer_block_s
+// CHECK: [[PPBLOCK1:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PBLOCK]]
+// CHECK: [[PPBLOCK2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[PBLOCK]]
+// CHECK: [[BOOL:%[_0-9A-Za-z]*]] = OpTypeBool
+// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]]
+// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant
+// CHECK: [[OUT]] = OpVariable [[PV4FLOAT1]] Output
+
+[numthreads(1,1,1)]
+float4 MainPs(void) : SV_Target0
+{
+  if (__has_feature(hlsl_vk_buffer_pointer)) {
+      [[vk::aliased_pointer]] block_p g_p =
+          vk::static_pointer_cast<block_t, 16>(g_PushConstants.root);
+      g_p = g_p.Get().next;
+      uint64_t addr = (uint64_t)g_p;
+      block_p copy1 = block_p(addr);
+      block_p copy2 = block_p(copy1);
+      if (addr == 0) // Null pointer test
+          return float4(0.0,0.0,0.0,0.0);
+      return g_p.Get().x;
+  }
+  return float4(0.0,0.0,0.0,0.0);
+}
+
+// CHECK: [[MAIN]] = OpFunction
+// CHECK-NEXT: OpLabel
+// CHECK-NEXT: [[RESULT:%[_0-9A-Za-z]*]] = OpFunctionCall [[V4FLOAT]] [[FUN:%[_0-9A-Za-z]*]]
+// CHECK: OpStore [[OUT]] [[RESULT]]
+// CHECK: OpFunctionEnd
+// CHECK: [[FUN]] = OpFunction [[V4FLOAT]]
+// CHECK: [[GP]] = OpVariable [[PPBLOCK0]] Function
+// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK1]] [[GPC]] [[S0]]
+// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X1]]
+// CHECK: OpStore [[GP]] [[X2]]
+// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32
+// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK2]] [[X3]] [[S1]]
+// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X4]] Aligned 8
+// CHECK: OpStore [[GP]] [[X5]]
+// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]]
+// CHECK: [[X7:%[_0-9A-Za-z]*]] = OpConvertPtrToU [[ULONG]] [[X6]]
+// CHECK: OpStore [[ADDR:%[_0-9A-Za-z]*]] [[X7]]
+// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[ULONG]] [[ADDR]]
+// CHECK: [[X9:%[_0-9A-Za-z]*]] = OpConvertUToPtr [[PBLOCK]] [[X8]]
+// CHECK: OpStore [[COPY1]] [[X9]]
+// CHECK: [[X10:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[COPY1]]
+// CHECK: OpStore [[COPY2]] [[X10]]
+// CHECK: [[X11:%[_0-9A-Za-z]*]] = OpLoad [[ULONG]] [[ADDR]]
+// CHECK: [[X12:%[_0-9A-Za-z]*]] = OpIEqual %bool [[X11]] [[UL0]]
+// CHECK: OpBranchConditional [[X12]] [[IF_TRUE:%[_0-9A-Za-z]*]] [[IF_MERGE:%[_0-9A-Za-z]*]]
+// CHECK: [[IF_TRUE]] = OpLabel
+// CHECK: OpReturnValue [[CV4FLOAT]]
+// CHECK: [[IF_MERGE]] = OpLabel
+// CHECK: [[X13:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32
+// CHECK: [[X14:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X13]] [[S0]]
+// CHECK: [[X15:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X14]] Aligned 16
+// CHECK: OpReturnValue [[X15]]
+// CHECK: OpFunctionEnd
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl
new file mode 100644
index 0000000000..c7d6f0ed2b
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl
@@ -0,0 +1,48 @@
+// RUN: %dxc -spirv -T ps_6_0 -E MainPs %s | FileCheck %s
+
+// CHECK: OpEntryPoint Fragment [[FUN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]]
+
+struct Globals_s
+{
+      float4 g_vSomeConstantA;
+      float4 g_vTestFloat4;
+      float4 g_vSomeConstantB;
+};
+
+typedef vk::BufferPointer<Globals_s> Globals_p;
+
+struct TestPushConstant_t
+{
+      Globals_p m_nBufferDeviceAddress;
+};
+
+[[vk::push_constant]] TestPushConstant_t g_PushConstants;
+
+// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0
+// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1
+// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32
+// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4
+// CHECK: [[GLOBALS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]]
+// CHECK: [[PGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GLOBALS]]
+// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGLOBALS]]
+// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]]
+// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]]
+// CHECK: [[PPGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGLOBALS]]
+// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]]
+// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant
+// CHECK-DAG: [[OUT]] = OpVariable [[PV4FLOAT1]] Output
+
+float4 MainPs(void) : SV_Target0
+{
+      float4 vTest = g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4;
+      return vTest;
+}
+
+// CHECK: [[FUN]] = OpFunction
+// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGLOBALS]] [[GPC]] [[S0]]
+// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]]
+// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]]
+// CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X3]] Aligned 16
+// CHECK: OpStore [[OUT]] [[X4]]
+// CHECK: OpFunctionEnd
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl
new file mode 100644
index 0000000000..930770cc16
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl
@@ -0,0 +1,35 @@
+// RUN: %dxc -spirv -HV 202x -Od -T cs_6_9 %s | FileCheck %s
+
+// Issue #7302: implicit object argument of Get() evaluates to rvalue
+
+template<class T, class U>
+[[vk::ext_instruction(/*spv::OpBitcast*/124)]]
+T bitcast(U);
+
+struct Content
+{
+  int a;
+};
+
+// CHECK: [[INT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK-DAG: [[I1:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 1
+// CHECK-DAG: [[IO:%[_0-9A-Za-z]*]] = OpConstant [[INT]] 0
+// CHECK: [[UINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 0
+// CHECK-DAG: [[UDEADBEEF:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 3735928559
+// CHECK-DAG: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0
+// CHECK: [[V2UINT:%[_0-9A-Za-z]*]] = OpTypeVector [[UINT]] 2
+// CHECK: [[VECTOR:%[_0-9A-Za-z]*]] = OpConstantComposite [[V2UINT]] [[UDEADBEEF]] [[U0]]
+// CHECK: [[CONTENT:%[_0-9A-Za-z]*]] = OpTypeStruct [[INT]]
+// CHECK: [[PPCONTENT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[CONTENT]]
+// CHECK: [[PPINT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[INT]]
+
+[numthreads(1, 1, 1)]
+void main()
+{
+  bitcast<vk::BufferPointer<Content> >(uint32_t2(0xdeadbeefu,0x0u)).Get().a = 1;
+}
+
+// CHECK: [[BITCAST:%[0-9]*]] = OpBitcast [[PPCONTENT]] [[VECTOR]]
+// CHECK: [[PTR:%[0-9]*]] = OpAccessChain [[PPINT]] [[BITCAST]] [[IO]]
+// CHECK: OpStore [[PTR]] [[I1]] Aligned 4
+
diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl
new file mode 100644
index 0000000000..b2efd02cbd
--- /dev/null
+++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl
@@ -0,0 +1,52 @@
+// RUN: %dxc -spirv -T ps_6_0 -E MainPs %s | FileCheck %s
+
+// CHECK: OpEntryPoint Fragment [[FUN:%[_0-9A-Za-z]*]] "MainPs" [[OUT:%[_0-9A-Za-z]*]]
+
+struct Globals_s
+{
+      float4 g_vSomeConstantA;
+      float4 g_vTestFloat4;
+      float4 g_vSomeConstantB;
+};
+
+typedef vk::BufferPointer<Globals_s> Globals_p;
+
+struct TestPushConstant_t
+{
+      Globals_p m_nBufferDeviceAddress;
+};
+
+[[vk::push_constant]] TestPushConstant_t g_PushConstants;
+
+// CHECK: [[FLOAT:%[_0-9A-Za-z]*]] = OpTypeFloat 32
+// CHECK-DAG: [[F0:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 0
+// CHECK-DAG: [[F1:%[_0-9A-Za-z]*]] = OpConstant [[FLOAT]] 1
+// CHECK: [[V4FLOAT:%[_0-9A-Za-z]*]] = OpTypeVector [[FLOAT]] 4
+// CHECK-DAG: [[CV4FLOAT:%[_0-9A-Za-z]*]] = OpConstantComposite [[V4FLOAT]] [[F1]] [[F0]] [[F0]] [[F0]]
+// CHECK: [[SINT:%[_0-9A-Za-z]*]] = OpTypeInt 32 1
+// CHECK-DAG: [[S0:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 0
+// CHECK-DAG: [[S1:%[_0-9A-Za-z]*]] = OpConstant [[SINT]] 1
+// CHECK: [[GLOBALS:%[_0-9A-Za-z]*]] = OpTypeStruct [[V4FLOAT]] [[V4FLOAT]] [[V4FLOAT]]
+// CHECK: [[PGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[GLOBALS]]
+// CHECK: [[PC:%[_0-9A-Za-z]*]] = OpTypeStruct [[PGLOBALS]]
+// CHECK: [[PPC:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PC]]
+// CHECK: [[PV4FLOAT1:%[_0-9A-Za-z]*]] = OpTypePointer Output [[V4FLOAT]]
+// CHECK: [[PPGLOBALS:%[_0-9A-Za-z]*]] = OpTypePointer PushConstant [[PGLOBALS]]
+// CHECK: [[PV4FLOAT2:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[V4FLOAT]]
+// CHECK: [[GPC:%[_0-9A-Za-z]*]] = OpVariable [[PPC]] PushConstant
+// CHECK-DAG: [[OUT]] = OpVariable [[PV4FLOAT1]] Output
+
+float4 MainPs(void) : SV_Target0
+{
+      float4 vTest = float4(1.0,0.0,0.0,0.0);
+      g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4 = vTest;
+      return vTest;
+}
+
+// CHECK: [[FUN]] = OpFunction
+// CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPGLOBALS]] [[GPC]] [[S0]]
+// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]]
+// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]]
+// CHECK: OpStore [[X3]] [[CV4FLOAT]] Aligned 16
+// CHECK: OpStore [[OUT]] [[CV4FLOAT]]
+// CHECK: OpFunctionEnd
diff --git a/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl b/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl
new file mode 100644
index 0000000000..77c0f51911
--- /dev/null
+++ b/tools/clang/test/DXC/FinishCodeGen/unreachable-discard.hlsl
@@ -0,0 +1,21 @@
+// RUN: %dxc /T ps_6_5 -fcgl %s | FileCheck %s
+
+// Compiling this HLSL would trigger an assertion:
+//    While deleting: void (i32, float)* %dx.hl.op..void (i32, float)
+//    Use still stuck around after Def is destroyed:  call void @"dx.hl.op..void (i32, float)"(i32 120, float -1.000000e+00), !dbg <0x503000001cc8>
+//    Error: assert(use_empty() && "Uses remain when a value is destroyed!")
+//    File: <snip>/src/external/DirectXShaderCompiler/lib/IR/Value.cpp(83)
+//
+// Bug was fixed in CodeGenFunction::EmitDiscardStmt by skipping the emission of
+// an unreachable discard.
+
+// CHECK:      define void @main()
+// CHECK:      br label %
+// CHECK-NOT:  call void @"dx.hl.op..void (i32, float)"
+// CHECK:      ret void
+
+void main() {
+  while (true) {
+  }
+  discard;
+}
diff --git a/tools/clang/test/DXC/Passes/DxilGen/LowerAllocateRayQuery2.ll b/tools/clang/test/DXC/Passes/DxilGen/LowerAllocateRayQuery2.ll
new file mode 100644
index 0000000000..ab86452b17
--- /dev/null
+++ b/tools/clang/test/DXC/Passes/DxilGen/LowerAllocateRayQuery2.ll
@@ -0,0 +1,118 @@
+; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
+; generated the IR with:
+; ExtractIRForPassTest.py -p dxilgen -o LowerAllocateRayQuery2.ll tools\clang\test\CodeGenDXIL\hlsl\objects\RayQuery\allocateRayQuery2.hlsl -- -T vs_6_9
+; Importantly, extraction took place with spirv code-gen enabled
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%struct.RaytracingAccelerationStructure = type { i32 }
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+%struct.RayDesc = type { <3 x float>, float, <3 x float>, float }
+%"class.RayQuery<1024, 1>" = type { i32 }
+%"class.RayQuery<1, 0>" = type { i32 }
+
+@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind readnone
+declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1
+
+; Function Attrs: nounwind readnone
+declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1
+
+; Function Attrs: nounwind
+declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0
+
+; Function Attrs: nounwind
+define void @main(<3 x float>, float, <3 x float>, float) #0 {
+entry:
+  ; CHECK: call i32 @dx.op.allocateRayQuery2(i32 258, i32 1024, i32 1)
+  %rayQuery12 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1024, i32 1), !dbg !42 ; line:15 col:79
+  %4 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !46 ; line:17 col:3
+  %5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %4), !dbg !46 ; line:17 col:3
+  %6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !46 ; line:17 col:3
+  call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery12, %dx.types.Handle %6, i32 1024, i32 2, <3 x float> %0, float %1, <3 x float> %2, float %3), !dbg !46 ; line:17 col:3
+
+  ; CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 1)
+  %rayQuery23 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 1, i32 0), !dbg !47 ; line:21 col:35
+  %7 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !48 ; line:22 col:3
+  %8 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %7), !dbg !48 ; line:22 col:3
+  %9 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %8, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !48 ; line:22 col:3
+  call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery23, %dx.types.Handle %9, i32 0, i32 2, <3 x float> %0, float %1, <3 x float> %2, float %3), !dbg !48 ; line:22 col:3
+  ret void, !dbg !49 ; line:23 col:1
+}
+
+; Function Attrs: nounwind
+declare void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!llvm.module.flags = !{!0}
+!pauseresume = !{!1}
+!llvm.ident = !{!2}
+!dx.version = !{!3}
+!dx.valver = !{!3}
+!dx.shaderModel = !{!4}
+!dx.typeAnnotations = !{!5, !21}
+!dx.entryPoints = !{!34}
+!dx.fnprops = !{!39}
+!dx.options = !{!40, !41}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!2 = !{!"dxc(private) 1.8.0.4853 (lowerOMM, ca5df957eb33-dirty)"}
+!3 = !{i32 1, i32 9}
+!4 = !{!"vs", i32 6, i32 9}
+!5 = !{i32 0, %struct.RayDesc undef, !6, %"class.RayQuery<1024, 1>" undef, !11, %"class.RayQuery<1, 0>" undef, !17}
+!6 = !{i32 32, !7, !8, !9, !10}
+!7 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3}
+!8 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9}
+!9 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3}
+!10 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9}
+!11 = !{i32 4, !12, !13}
+!12 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5}
+!13 = !{i32 0, !14}
+!14 = !{!15, !16}
+!15 = !{i32 1, i64 1024}
+!16 = !{i32 1, i64 1}
+!17 = !{i32 4, !12, !18}
+!18 = !{i32 0, !19}
+!19 = !{!16, !20}
+!20 = !{i32 1, i64 0}
+!21 = !{i32 1, void (<3 x float>, float, <3 x float>, float)* @main, !22}
+!22 = !{!23, !25, !28, !30, !32}
+!23 = !{i32 0, !24, !24}
+!24 = !{}
+!25 = !{i32 0, !26, !27}
+!26 = !{i32 4, !"RAYDESC", i32 7, i32 9}
+!27 = !{i32 0}
+!28 = !{i32 0, !26, !29}
+!29 = !{i32 1}
+!30 = !{i32 0, !26, !31}
+!31 = !{i32 2}
+!32 = !{i32 0, !26, !33}
+!33 = !{i32 3}
+!34 = !{void (<3 x float>, float, <3 x float>, float)* @main, !"main", null, !35, null}
+!35 = !{!36, null, null, null}
+!36 = !{!37}
+!37 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !38}
+!38 = !{i32 0, i32 4}
+!39 = !{void (<3 x float>, float, <3 x float>, float)* @main, i32 1}
+!40 = !{i32 -2147483584}
+!41 = !{i32 -1}
+!42 = !DILocation(line: 15, column: 79, scope: !43)
+!43 = !DISubprogram(name: "main", scope: !44, file: !44, line: 11, type: !45, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, function: void (<3 x float>, float, <3 x float>, float)* @main)
+!44 = !DIFile(filename: "tools\5Cclang\5Ctest\5CCodeGenDXIL\5Chlsl\5Cobjects\5CRayQuery\5CallocateRayQuery2.hlsl", directory: "")
+!45 = !DISubroutineType(types: !24)
+!46 = !DILocation(line: 17, column: 3, scope: !43)
+!47 = !DILocation(line: 21, column: 35, scope: !43)
+!48 = !DILocation(line: 22, column: 3, scope: !43)
+!49 = !DILocation(line: 23, column: 1, scope: !43)
diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll
new file mode 100644
index 0000000000..17a968675f
--- /dev/null
+++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_dxilgen.ll
@@ -0,0 +1,100 @@
+; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
+; REQUIRES: dxil-1-9
+
+;
+; Buffer Definitions:
+;
+; cbuffer $Globals
+; {
+;
+;   [0 x i8] (type annotation not present)
+;
+; }
+;
+;
+; Resource Bindings:
+;
+; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
+; ------------------------------ ---------- ------- ----------- ------- -------------- ------
+; $Globals                          cbuffer      NA          NA     CB0   cb4294967295     1
+;
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%ConstantBuffer = type opaque
+%dx.types.HitObject = type { i8* }
+%"class.dx::HitObject" = type { i32 }
+
+@"$Globals" = external constant %ConstantBuffer
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+entry:
+  %hit = alloca %dx.types.HitObject, align 4
+  %tmp = alloca %dx.types.HitObject, align 4
+  %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !19 ; line:9 col:3
+  call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !19 ; line:9 col:3
+; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)
+  %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !23 ; line:9 col:17
+  %2 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !24 ; line:10 col:3
+  call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !24 ; line:10 col:3
+; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)
+  call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %tmp), !dbg !24 ; line:10 col:3
+  %3 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !24 ; line:10 col:3
+  call void @llvm.lifetime.end(i64 4, i8* %3) #0, !dbg !24 ; line:10 col:3
+  %4 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !25 ; line:11 col:1
+  call void @llvm.lifetime.end(i64 4, i8* %4) #0, !dbg !25 ; line:11 col:1
+  ret void, !dbg !25 ; line:11 col:1
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0
+
+; Function Attrs: nounwind
+declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0
+
+attributes #0 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!pauseresume = !{!1}
+!llvm.ident = !{!2}
+!dx.version = !{!3}
+!dx.valver = !{!3}
+!dx.shaderModel = !{!4}
+!dx.typeAnnotations = !{!5, !8}
+!dx.entryPoints = !{!12}
+!dx.fnprops = !{!16}
+!dx.options = !{!17, !18}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!2 = !{!"dxc(private) 1.8.0.4840 (ser_patch_1 9ffd030b1)"}
+!3 = !{i32 1, i32 9}
+!4 = !{!"lib", i32 6, i32 9}
+!5 = !{i32 0, %"class.dx::HitObject" undef, !6}
+!6 = !{i32 4, !7}
+!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4}
+!8 = !{i32 1, void ()* @"\01?main@@YAXXZ", !9}
+!9 = !{!10}
+!10 = !{i32 1, !11, !11}
+!11 = !{}
+!12 = !{null, !"", null, !13, null}
+!13 = !{null, null, !14, null}
+!14 = !{!15}
+!15 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null}
+!16 = !{void ()* @"\01?main@@YAXXZ", i32 7}
+!17 = !{i32 -2147483584}
+!18 = !{i32 -1}
+!19 = !DILocation(line: 9, column: 3, scope: !20)
+!20 = !DISubprogram(name: "main", scope: !21, file: !21, line: 8, type: !22, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ")
+!21 = !DIFile(filename: "tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make.hlsl", directory: "")
+!22 = !DISubroutineType(types: !11)
+!23 = !DILocation(line: 9, column: 17, scope: !20)
+!24 = !DILocation(line: 10, column: 3, scope: !20)
+!25 = !DILocation(line: 11, column: 1, scope: !20)
diff --git a/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll
new file mode 100644
index 0000000000..ca25b1e115
--- /dev/null
+++ b/tools/clang/test/DXC/Passes/DxilGen/maybereorder_dxilgen.ll
@@ -0,0 +1,105 @@
+; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
+; REQUIRES: dxil-1-9
+
+
+;
+; Buffer Definitions:
+;
+; cbuffer $Globals
+; {
+;
+;   [0 x i8] (type annotation not present)
+;
+; }
+;
+;
+; Resource Bindings:
+;
+; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
+; ------------------------------ ---------- ------- ----------- ------- -------------- ------
+; $Globals                          cbuffer      NA          NA     CB0   cb4294967295     1
+;
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%ConstantBuffer = type opaque
+%dx.types.HitObject = type { i8* }
+%"class.dx::HitObject" = type { i32 }
+
+@"$Globals" = external constant %ConstantBuffer
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+entry:
+  %hit = alloca %dx.types.HitObject, align 4
+  %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !19 ; line:9 col:3
+  call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !19 ; line:9 col:3
+; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)
+  %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !23 ; line:9 col:17
+  call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %hit), !dbg !24 ; line:10 col:3
+  call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32)"(i32 359, %dx.types.HitObject* %hit, i32 241, i32 3), !dbg !25 ; line:11 col:3
+  call void @"dx.hl.op..void (i32, i32, i32)"(i32 359, i32 242, i32 7), !dbg !26 ; line:12 col:3
+  %2 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !27 ; line:13 col:1
+  call void @llvm.lifetime.end(i64 4, i8* %2) #0, !dbg !27 ; line:13 col:1
+  ret void, !dbg !27 ; line:13 col:1
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0
+
+; Function Attrs: nounwind
+declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0
+
+; Function Attrs: nounwind
+declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32)"(i32, %dx.types.HitObject*, i32, i32) #0
+
+; Function Attrs: nounwind
+declare void @"dx.hl.op..void (i32, i32, i32)"(i32, i32, i32) #0
+
+attributes #0 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!pauseresume = !{!1}
+!llvm.ident = !{!2}
+!dx.version = !{!3}
+!dx.valver = !{!3}
+!dx.shaderModel = !{!4}
+!dx.typeAnnotations = !{!5, !8}
+!dx.entryPoints = !{!12}
+!dx.fnprops = !{!16}
+!dx.options = !{!17, !18}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!2 = !{!"dxc(private) 1.8.0.4840 ser_patch_1 9ffd030b1)"}
+!3 = !{i32 1, i32 9}
+!4 = !{!"lib", i32 6, i32 9}
+!5 = !{i32 0, %"class.dx::HitObject" undef, !6}
+!6 = !{i32 4, !7}
+!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4}
+!8 = !{i32 1, void ()* @"\01?main@@YAXXZ", !9}
+!9 = !{!10}
+!10 = !{i32 1, !11, !11}
+!11 = !{}
+!12 = !{null, !"", null, !13, null}
+!13 = !{null, null, !14, null}
+!14 = !{!15}
+!15 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null}
+!16 = !{void ()* @"\01?main@@YAXXZ", i32 7}
+!17 = !{i32 -2147483584}
+!18 = !{i32 -1}
+!19 = !DILocation(line: 9, column: 3, scope: !20)
+!20 = !DISubprogram(name: "main", scope: !21, file: !21, line: 8, type: !22, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ")
+!21 = !DIFile(filename: "tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/maybereorder.hlsl", directory: "")
+!22 = !DISubroutineType(types: !11)
+!23 = !DILocation(line: 9, column: 17, scope: !20)
+!24 = !DILocation(line: 10, column: 3, scope: !20)
+!25 = !DILocation(line: 11, column: 3, scope: !20)
+!26 = !DILocation(line: 12, column: 3, scope: !20)
+!27 = !DILocation(line: 13, column: 1, scope: !20)
diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll
new file mode 100644
index 0000000000..89ee886c2e
--- /dev/null
+++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll
@@ -0,0 +1,142 @@
+; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s
+
+;
+; Buffer Definitions:
+;
+; cbuffer $Globals
+; {
+;
+;   [0 x i8] (type annotation not present)
+;
+; }
+;
+;
+; Resource Bindings:
+;
+; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
+; ------------------------------ ---------- ------- ----------- ------- -------------- ------
+; $Globals                          cbuffer      NA          NA     CB0   cb4294967295     1
+;
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%ConstantBuffer = type opaque
+%dx.types.HitObject = type { i8* }
+%"class.dx::HitObject" = type { i32 }
+%struct.RayDesc = type { <3 x float>, float, <3 x float>, float }
+
+@"$Globals" = external constant %ConstantBuffer
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+entry:
+  %hit = alloca %dx.types.HitObject, align 4
+  %tmp = alloca %dx.types.HitObject, align 4
+  %ray = alloca %struct.RayDesc, align 4
+; CHECK-NOT: %{{[^ ]+}} = alloca %struct.RayDesc
+  %tmp2 = alloca %dx.types.HitObject, align 4
+; CHECK: %[[HIT0:[^ ]+]] = alloca %dx.types.HitObject, align 4
+; CHECK: %[[HIT1:[^ ]+]] = alloca %dx.types.HitObject, align 4
+; CHECK: %[[HIT2:[^ ]+]] = alloca %dx.types.HitObject, align 4
+  %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !23 ; line:42 col:3
+  call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !23 ; line:42 col:3
+; CHECK:  %[[THIS0:[^ ]+]] = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %[[HIT0]])
+; CHECK-NOT: %[[THIS0]]
+  %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !27 ; line:42 col:17
+  %2 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !28 ; line:43 col:3
+  call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !28 ; line:43 col:3
+; CHECK:  call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %[[HIT1]])
+  call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %tmp), !dbg !28 ; line:43 col:3
+  %3 = bitcast %dx.types.HitObject* %tmp to i8*, !dbg !28 ; line:43 col:3
+  call void @llvm.lifetime.end(i64 4, i8* %3) #0, !dbg !28 ; line:43 col:3
+  %4 = bitcast %struct.RayDesc* %ray to i8*, !dbg !29 ; line:44 col:3
+  call void @llvm.lifetime.start(i64 32, i8* %4) #0, !dbg !29 ; line:44 col:3
+  %5 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 0, !dbg !30 ; line:44 col:17
+  store <3 x float> zeroinitializer, <3 x float>* %5, !dbg !30 ; line:44 col:17
+  %6 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 1, !dbg !30 ; line:44 col:17
+  store float 0.000000e+00, float* %6, !dbg !30 ; line:44 col:17
+  %7 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 2, !dbg !30 ; line:44 col:17
+  store <3 x float> <float 0.000000e+00, float 1.000000e+00, float 0x3FA99999A0000000>, <3 x float>* %7, !dbg !30 ; line:44 col:17
+  %8 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 3, !dbg !30 ; line:44 col:17
+  store float 1.000000e+03, float* %8, !dbg !30 ; line:44 col:17
+  %9 = bitcast %dx.types.HitObject* %tmp2 to i8*, !dbg !31 ; line:45 col:3
+  call void @llvm.lifetime.start(i64 4, i8* %9) #0, !dbg !31 ; line:45 col:3
+; CHECK: store <3 x float> zeroinitializer, <3 x float>* %[[pRDO:[^ ]+]],
+; CHECK: store float 0.000000e+00, float* %[[pRDTMIN:[^ ]+]],
+; CHECK: store <3 x float> <float 0.000000e+00, float 1.000000e+00, float 0x3FA99999A0000000>, <3 x float>* %[[pRDD:[^ ]+]],
+; CHECK: store float 1.000000e+03, float* %[[pRDTMAX:[^ ]+]],
+; CHECK-DAG: %[[RDO:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDO]],
+; CHECK-DAG: %[[RDTMIN:[^ ]+]] = load float, float* %[[pRDTMIN]],
+; CHECK-DAG: %[[RDD:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDD]],
+; CHECK-DAG: %[[RDTMAX:[^ ]+]] = load float, float* %[[pRDTMAX]],
+; CHECK:  call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 387, %dx.types.HitObject* %[[HIT2]], i32 0, i32 1, <3 x float> %[[RDO]], float %[[RDTMIN]], <3 x float> %[[RDD]], float %[[RDTMAX]])
+  call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32 387, %dx.types.HitObject* %tmp2, i32 0, i32 1, %struct.RayDesc* %ray), !dbg !31 ; line:45 col:3
+  %10 = bitcast %dx.types.HitObject* %tmp2 to i8*, !dbg !31 ; line:45 col:3
+  call void @llvm.lifetime.end(i64 4, i8* %10) #0, !dbg !31 ; line:45 col:3
+  %11 = bitcast %struct.RayDesc* %ray to i8*, !dbg !32 ; line:46 col:1
+  call void @llvm.lifetime.end(i64 32, i8* %11) #0, !dbg !32 ; line:46 col:1
+  %12 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !32 ; line:46 col:1
+  call void @llvm.lifetime.end(i64 4, i8* %12) #0, !dbg !32 ; line:46 col:1
+  ret void, !dbg !32 ; line:46 col:1
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0
+
+; Function Attrs: nounwind
+declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0
+
+; Function Attrs: nounwind
+declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*) #0
+
+attributes #0 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!pauseresume = !{!1}
+!dx.version = !{!2}
+!dx.valver = !{!2}
+!dx.shaderModel = !{!3}
+!dx.typeAnnotations = !{!4, !12}
+!dx.entryPoints = !{!16}
+!dx.fnprops = !{!20}
+!dx.options = !{!21, !22}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
+!2 = !{i32 1, i32 9}
+!3 = !{!"lib", i32 6, i32 9}
+!4 = !{i32 0, %"class.dx::HitObject" undef, !5, %struct.RayDesc undef, !7}
+!5 = !{i32 4, !6}
+!6 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4}
+!7 = !{i32 32, !8, !9, !10, !11}
+!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3}
+!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9}
+!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3}
+!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9}
+!12 = !{i32 1, void ()* @"\01?main@@YAXXZ", !13}
+!13 = !{!14}
+!14 = !{i32 1, !15, !15}
+!15 = !{}
+!16 = !{null, !"", null, !17, null}
+!17 = !{null, null, !18, null}
+!18 = !{!19}
+!19 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null}
+!20 = !{void ()* @"\01?main@@YAXXZ", i32 7}
+!21 = !{i32 -2147483584}
+!22 = !{i32 -1}
+!23 = !DILocation(line: 42, column: 3, scope: !24)
+!24 = !DISubprogram(name: "main", scope: !25, file: !25, line: 41, type: !26, isLocal: false, isDefinition: true, scopeLine: 41, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ")
+!25 = !DIFile(filename: "tools/clang/test/HLSLFileCheck/hlsl/objects/HitObject/hitobject_make_ast.hlsl", directory: "")
+!26 = !DISubroutineType(types: !15)
+!27 = !DILocation(line: 42, column: 17, scope: !24)
+!28 = !DILocation(line: 43, column: 3, scope: !24)
+!29 = !DILocation(line: 44, column: 3, scope: !24)
+!30 = !DILocation(line: 44, column: 17, scope: !24)
+!31 = !DILocation(line: 45, column: 3, scope: !24)
+!32 = !DILocation(line: 46, column: 1, scope: !24)
\ No newline at end of file
diff --git a/tools/clang/test/DXC/metal.test b/tools/clang/test/DXC/metal.test
new file mode 100644
index 0000000000..3d00850abc
--- /dev/null
+++ b/tools/clang/test/DXC/metal.test
@@ -0,0 +1,7 @@
+// REQUIRES: metal
+
+// Metal libraries are LLVM bitcode. This check inspects the magic number from
+// the metal library output.
+// RUN: %dxc %S/Inputs/smoke.hlsl  /T ps_6_0 -metal -Fo Tmp.metal
+// RUN: head -c 4 Tmp.metal | FileCheck -check-prefix=MTL %s
+// MTL: {{^MTLB}}
diff --git a/tools/clang/test/DXC/no_metal.test b/tools/clang/test/DXC/no_metal.test
new file mode 100644
index 0000000000..37af16cad5
--- /dev/null
+++ b/tools/clang/test/DXC/no_metal.test
@@ -0,0 +1,4 @@
+// UNSUPPORTED: metal
+
+// RUN:not %dxc %S/Inputs/smoke.hlsl  /T ps_6_0 -metal 2>&1 | FileCheck %s
+// CHECK:Metal CodeGen not available
diff --git a/tools/clang/test/DXC/no_metal_disassembly.test b/tools/clang/test/DXC/no_metal_disassembly.test
new file mode 100644
index 0000000000..44283a8fe8
--- /dev/null
+++ b/tools/clang/test/DXC/no_metal_disassembly.test
@@ -0,0 +1,7 @@
+// REQUIRES: metal
+
+// These cases both fail because the shader converter library cannot emit
+// textual IR.
+// RUN: not %dxc %S/Inputs/smoke.hlsl  /T ps_6_0 -metal -Fo Tmp.metal -Fc Tmp.air 2>&1 | FileCheck %s
+// RUN: not %dxc %S/Inputs/smoke.hlsl  /T ps_6_0 -metal 2>&1 | FileCheck %s
+// CHECK: Disassembly of Metal IR not supported (yet).
diff --git a/tools/clang/test/DXILValidation/load-store-validation.hlsl b/tools/clang/test/DXILValidation/load-store-validation.hlsl
new file mode 100644
index 0000000000..d4e5e29db8
--- /dev/null
+++ b/tools/clang/test/DXILValidation/load-store-validation.hlsl
@@ -0,0 +1,74 @@
+// This file is not used directly for testing.
+// This is the HLSL source for validation of various invalid load/store parameters.
+// It is used to generate LitDxilValidation/load-store-validation.ll using `dxc -T ps_6_9`.
+// Output is modified to trigger various validation errors.
+
+Texture1D<float4> Tex;
+RWTexture1D<float4> RwTex;
+SamplerState Samp;
+
+StructuredBuffer<float4> VecBuf;
+StructuredBuffer<float> ScalBuf;
+ByteAddressBuffer BaBuf;
+
+RWStructuredBuffer<float4> OutVecBuf;
+RWStructuredBuffer<float> OutScalBuf;
+RWByteAddressBuffer OutBaBuf;
+
+// Some simple ways to generate the vector ops in question.
+float4 main(int i : IX) : SV_Target {
+  // Texture provides some invalid handles to plug in.
+  float4 TexVal = Tex.Sample(Samp, i);
+  RwTex[0] = TexVal;
+
+  // For invalid RC on Load (and inevitably invalid RK).
+  float BadRCLd = ScalBuf[0];
+  // For invalid RK on Load.
+  float BadRKLd = ScalBuf[1];
+  // For non-constant alignment on Load.
+  float BadAlnLd = ScalBuf[2];
+  // For undefined offset on Structured Buffer Load.
+  float BadStrOffLd = ScalBuf[3];
+  // For defined (and therefore invalid) offset on Byte Address Buffer Load.
+  float BadBabOffLd = BaBuf.Load<float>(0);
+
+  // For invalid RC on Vector Load (and inevitably invalid RK).
+  float4 BadRCVcLd = VecBuf[0];
+  // For invalid RK on Vector Load.
+  float4 BadRKVcLd = VecBuf[1];
+  // For non-constant alignment on Vector Load.
+  float4 BadAlnVcLd = VecBuf[2];
+  // For undefined offset on Structured Buffer Vector Load.
+  float4 BadStrOffVcLd = VecBuf[3];
+  // For defined (and therefore invalid) offset on Byte Address Buffer Vector Load.
+  float4 BadBabOffVcLd = BaBuf.Load<float4>(4);
+
+  // For Store to non-UAV.
+  OutScalBuf[0] = BadRCLd;
+  // For invalid RK on Store.
+  OutScalBuf[1] = BadRKLd;
+  // For non-constant alignment on Store.
+  OutScalBuf[2] = BadAlnLd;
+  // For undefined offset on Structured Buffer Store.
+  OutScalBuf[3] = BadStrOffLd;
+  // For undefined value Store.
+  OutScalBuf[4] = 77;
+  // For defined (and therefore invalid) offset on Byte Address Buffer Store.
+  OutBaBuf.Store<float>(0, BadBabOffLd);
+
+  // For Vector Store to non-UAV.
+  OutVecBuf[0] = BadRCVcLd;
+  // For invalid RK on Vector Store.
+  OutVecBuf[1] = BadRKVcLd;
+  // For non-constant alignment on Vector Store.
+  OutVecBuf[2] = BadAlnVcLd;
+  // For undefined offset on Structured Buffer Vector Store.
+  OutVecBuf[3] = BadStrOffVcLd;
+  // For undefinded value Vector Store.
+  OutVecBuf[4] = 77;
+  // For defined (and therefore invalid) offset on Byte Address Buffer Vector Store.
+  OutBaBuf.Store<float4>(4, BadBabOffVcLd);
+
+  return TexVal;
+}
+
diff --git a/tools/clang/test/DXILValidation/ser_hitobject_make_passing.ll b/tools/clang/test/DXILValidation/ser_hitobject_make_passing.ll
new file mode 100644
index 0000000000..88b71ff3e0
--- /dev/null
+++ b/tools/clang/test/DXILValidation/ser_hitobject_make_passing.ll
@@ -0,0 +1,46 @@
+; RUN: %dxv %s | FileCheck %s
+
+; CHECK: Validation succeeded.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.HitObject = type { i8* }
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+  ; Test HitObject_MakeMiss (opcode 265)
+  %r265 = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 4, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03)  ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax)
+
+  ; Test HitObject_MakeNop (opcode 266)
+  %r266 = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)  ; HitObject_MakeNop()
+
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32, i32, i32, float, float, float, float, float, float, float, float) #1
+
+; Function Attrs: nounwind readnone
+declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.typeAnnotations = !{!2}
+!dx.entryPoints = !{!9, !11}
+
+!0 = !{i32 1, i32 9}
+!1 = !{!"lib", i32 6, i32 9}
+!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !3}
+!3 = !{!4}
+!4 = !{i32 1, !5, !5}
+!5 = !{}
+!9 = !{null, !"", null, null, !10}
+!10 = !{i32 0, i64 0}
+!11 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !12}
+!12 = !{i32 8, i32 7, i32 5, !13}
+!13 = !{i32 0}
diff --git a/tools/clang/test/DXILValidation/vector-validation.hlsl b/tools/clang/test/DXILValidation/vector-validation.hlsl
new file mode 100644
index 0000000000..5d6a5cd4a2
--- /dev/null
+++ b/tools/clang/test/DXILValidation/vector-validation.hlsl
@@ -0,0 +1,14 @@
+// This file is not used directly for testing.
+// This is the HLSL source for validation of disallowed 6.9 features in previous shader models.
+// It is used to generate LitDxilValidation/vector-validation.ll using `dxc -T ps_6_9`.
+// Output is modified to have shader model 6.8 instead.
+
+RWStructuredBuffer<float4> VecBuf;
+
+// some simple ways to generate the vector ops in question.
+float4 main(float val : VAL) :SV_Position {
+  float4 vec = VecBuf[1];
+  VecBuf[0] = val;
+  return vec[2];
+}
+
diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/raytracingpipelineconfig1.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/raytracingpipelineconfig1.hlsl
new file mode 100644
index 0000000000..44424f5d14
--- /dev/null
+++ b/tools/clang/test/HLSLFileCheck/d3dreflect/raytracingpipelineconfig1.hlsl
@@ -0,0 +1,19 @@
+// RUN: %dxilver 1.9 | %dxc -T lib_6_9  %s | FileCheck %s
+// RUN: %dxilver 1.9 | %dxc -T lib_6_9 -ast-dump %s | FileCheck -check-prefix=AST %s
+// RUN: %dxilver 1.9 | %dxc -T lib_6_9 -ast-dump-implicit %s | FileCheck -check-prefix=ASTIMPL %s
+
+
+// CHECK: ; RaytracingPipelineConfig1 rpc = { MaxTraceRecursionDepth = 32, Flags = RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS };
+
+// AST: TranslationUnitDecl 0x{{.+}} <<invalid sloc>> <invalid sloc>
+// AST-NEXT: VarDecl 0x{{.+}} rpc 'RaytracingPipelineConfig1' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingPipelineConfig1'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <IntegralCast>
+// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 32
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <LValueToRValue>
+// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS' 'const unsigned int'
+// ASTIMPL: VarDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS 'const unsigned int' static cinit
+// ASTIMPL-NEXT: IntegerLiteral 0x{{.+}} <<invalid sloc>> 'const unsigned int' 1024
+// ASTIMPL-NEXT: AvailabilityAttr 0x{{.+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS };
diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/rdat_mintarget/sm69_barriers.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/rdat_mintarget/sm69_barriers.hlsl
new file mode 100644
index 0000000000..6cedf44e20
--- /dev/null
+++ b/tools/clang/test/HLSLFileCheck/d3dreflect/rdat_mintarget/sm69_barriers.hlsl
@@ -0,0 +1,53 @@
+// RUN: %dxilver 1.9 | %dxc -T lib_6_9 %s | %D3DReflect %s | %FileCheck %s -check-prefixes=RDAT
+
+// Check that stage flags are set correctly still for different barrier modes in SM 6.9.
+
+// RDAT: FunctionTable[{{.*}}] = {
+
+RWByteAddressBuffer BAB : register(u1, space0);
+
+// RDAT-LABEL: UnmangledName: "fn_barrier_reorder"
+// RDAT: FeatureInfo1: 0
+// RDAT: FeatureInfo2: 0
+// RDAT: ShaderStageFlag: (Library | RayGeneration)
+// RDAT: MinShaderTarget: 0x60069
+
+[noinline] export
+void fn_barrier_reorder() {
+  Barrier(UAV_MEMORY, REORDER_SCOPE);
+}
+
+// RDAT-LABEL: UnmangledName: "fn_barrier_reorder2"
+// RDAT: FeatureInfo1: 0
+// RDAT: FeatureInfo2: 0
+// RDAT: ShaderStageFlag: (Library | RayGeneration)
+// RDAT: MinShaderTarget: 0x60069
+
+[noinline] export
+void fn_barrier_reorder2() {
+  Barrier(BAB, REORDER_SCOPE);
+}
+
+// RDAT-LABEL: UnmangledName: "rg_barrier_reorder_in_call"
+// RDAT: FeatureInfo1: 0
+// RDAT: FeatureInfo2: 0
+// RDAT: ShaderStageFlag: (RayGeneration)
+// RDAT: MinShaderTarget: 0x70069
+
+[shader("raygeneration")]
+void rg_barrier_reorder_in_call() {
+  fn_barrier_reorder();
+  BAB.Store(0, 0);
+}
+
+// RDAT-LABEL: UnmangledName: "rg_barrier_reorder_in_call2"
+// RDAT: FeatureInfo1: 0
+// RDAT: FeatureInfo2: 0
+// RDAT: ShaderStageFlag: (RayGeneration)
+// RDAT: MinShaderTarget: 0x70069
+
+[shader("raygeneration")]
+void rg_barrier_reorder_in_call2() {
+  fn_barrier_reorder2();
+  BAB.Store(0, 0);
+}
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl
index 33086852ab..5443ada0c9 100644
--- a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl
@@ -15,6 +15,7 @@
 // ext_vector array.
 // CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <<invalid sloc>> <invalid sloc> implicit class matrix definition
 // CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <<invalid sloc>> Implicit final
+// CHECK-NEXT: HLSLMatrixAttr {{0x[0-9a-fA-F]+}} <<invalid sloc>> Implicit
 // CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <<invalid sloc>> <invalid sloc> implicit h 'element [row_count] __attribute__((ext_vector_type(col_count)))'
 
 
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl
index 0ad236a4b2..12859b7eda 100644
--- a/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl
@@ -12,6 +12,7 @@
 // Verify the class, final attribute and ext_vector field decl.
 // CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <<invalid sloc>> <invalid sloc> implicit class vector definition
 // CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <<invalid sloc>> Implicit final
+// CHECK-NEXT: HLSLVectorAttr {{0x[0-9a-fA-F]+}} <<invalid sloc>> Implicit
 // CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <<invalid sloc>> <invalid sloc> implicit h 'element __attribute__((ext_vector_type(element_count)))'
 
 // Verify operator overloads for const vector subscript operators.
diff --git a/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll b/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll
index 35fd0d6b1d..d5b0bbb2a7 100644
--- a/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll
+++ b/tools/clang/test/HLSLFileCheck/passes/dxil/lower_type/vec_array_param.ll
@@ -30,4 +30,3 @@ entry:
 declare float @"\01?foo@@YAMY02V?$vector@M$02@@@Z"([3 x <3 x float>]*)
 
 attributes #0 = { nounwind }
-
diff --git a/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll b/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll
new file mode 100644
index 0000000000..cab9942b02
--- /dev/null
+++ b/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll
@@ -0,0 +1,68 @@
+; RUN: %dxilver 1.9 | %dxv %s
+
+; Buffer Definitions:
+;
+;
+; Resource Bindings:
+;
+; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
+; ------------------------------ ---------- ------- ----------- ------- -------------- ------
+; BAB                                   UAV    byte         r/w      U0             u1     1
+;
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%dx.types.ResourceProperties = type { i32, i32 }
+%struct.RWByteAddressBuffer = type { i32 }
+
+@"\01?BAB@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+  %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?BAB@@3URWByteAddressBuffer@@A", align 4
+  call void @dx.op.barrierByMemoryType(i32 244, i32 1, i32 8)  ; BarrierByMemoryType(MemoryTypeFlags,SemanticFlags)
+  %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1)  ; CreateHandleForLib(Resource)
+  %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 })  ; AnnotateHandle(res,props)  resource: RWByteAddressBuffer
+  call void @dx.op.barrierByMemoryHandle(i32 245, %dx.types.Handle %3, i32 8)  ; BarrierByMemoryHandle(object,SemanticFlags)
+  ret void
+}
+
+; Function Attrs: noduplicate nounwind
+declare void @dx.op.barrierByMemoryType(i32, i32, i32) #1
+
+; Function Attrs: noduplicate nounwind
+declare void @dx.op.barrierByMemoryHandle(i32, %dx.types.Handle, i32) #1
+
+; Function Attrs: nounwind readnone
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #3
+
+attributes #0 = { nounwind }
+attributes #1 = { noduplicate nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind readonly }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.typeAnnotations = !{!5}
+!dx.entryPoints = !{!9, !11}
+
+!0 = !{i32 1, i32 9}
+!1 = !{!"lib", i32 6, i32 9}
+!2 = !{null, !3, null, null}
+!3 = !{!4}
+!4 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?BAB@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"BAB", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!5 = !{i32 1, void ()* @"\01?main@@YAXXZ", !6}
+!6 = !{!7}
+!7 = !{i32 1, !8, !8}
+!8 = !{}
+!9 = !{null, !"", null, !2, !10}
+!10 = !{i32 0, i64 8589934608}
+!11 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !12}
+!12 = !{i32 8, i32 7, i32 5, !13}
+!13 = !{i32 0}
diff --git a/tools/clang/test/LitDXILValidation/load-store-validation.ll b/tools/clang/test/LitDXILValidation/load-store-validation.ll
new file mode 100644
index 0000000000..34b2f6b602
--- /dev/null
+++ b/tools/clang/test/LitDXILValidation/load-store-validation.ll
@@ -0,0 +1,229 @@
+; RUN: not %dxv %s 2>&1 | FileCheck %s
+
+; Ensure proper validation errors are produced for invalid parameters to load and store operations.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%dx.types.ResBind = type { i32, i32, i32, i8 }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
+%dx.types.ResRet.v4f32 = type { <4 x float>, i32 }
+%"class.Texture1D<vector<float, 4> >" = type { <4 x float>, %"class.Texture1D<vector<float, 4> >::mips_type" }
+%"class.Texture1D<vector<float, 4> >::mips_type" = type { i32 }
+%"class.StructuredBuffer<vector<float, 4> >" = type { <4 x float> }
+%"class.StructuredBuffer<float>" = type { float }
+%struct.ByteAddressBuffer = type { i32 }
+%"class.RWStructuredBuffer<vector<float, 4> >" = type { <4 x float> }
+%"class.RWStructuredBuffer<float>" = type { float }
+%struct.RWByteAddressBuffer = type { i32 }
+%struct.SamplerState = type { i32 }
+
+; Unfortunately, the validation errors come in weird orders.
+; Inlining them isn't helpful, so we'll just dump them all here.
+; Inline comments, variable names, and notes should help find the corresponding source.
+
+; CHECK: error: raw/typed buffer offset must be undef.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp44, i32 0, i32 0, float %badBabOff, float undef, float undef, float undef, i8 1, i32 4)'
+; CHECK: error: Assignment of undefined values to UAV.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp42, i32 4, i32 0, float undef, float undef, float undef, float undef, i8 1, i32 4)
+; CHECK: error: structured buffer requires defined index and offset coordinates.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp41, i32 3, i32 undef, float %badStrOff, float undef, float undef, float undef, i8 1, i32 4)
+; CHECK: error: Raw Buffer alignment value must be a constant.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp40, i32 2, i32 0, float %badAln, float undef, float undef, float undef, i8 1, i32 %ix)'
+; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %rwTex, i32 1, i32 0, float %badRK, float undef, float undef, float undef, i8 1, i32 4)'
+; CHECK: error: store should be on uav resource.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %scalBuf, i32 0, i32 0, float %badRC, float undef, float undef, float undef, i8 1, i32 4)'
+
+; CHECK: error: raw/typed buffer offset must be undef.
+; CHECK-NEXT: note: at '%badBabOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %baBuf, i32 0, i32 0, i8 1, i32 4)'
+; CHECK: error: structured buffer requires defined index and offset coordinates.
+; CHECK-NEXT: note: at '%badStrOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 3, i32 undef, i8 1, i32 4)'
+; CHECK: error: Raw Buffer alignment value must be a constant.
+; CHECK-NEXT: note: at '%badAlnLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 2, i32 0, i8 1, i32 %ix)'
+; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer
+; CHECK-NEXT: note: at '%badRKLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tex, i32 1, i32 0, i8 1, i32 4)'
+; CHECK: error: load can only run on UAV/SRV resource.
+; CHECK-NEXT: note: at '%badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4)'
+; CHECK-NEXT: error: buffer load/store only works on Raw/Typed/StructuredBuffer.
+; CHECK-NEXT: note: at '%badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4)'
+
+; CHECK: error: raw/typed buffer offset must be undef.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp51, i32 4, i32 0, <4 x float> %badBabOffVc, i32 4)'
+; CHECK: error: Assignment of undefined values to UAV.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp49, i32 4, i32 0, <4 x float> undef, i32 4)'
+; CHECK: error: structured buffer requires defined index and offset coordinates.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp48, i32 3, i32 undef, <4 x float> %badStrOffVc, i32 4)'
+; CHECK: error: Raw Buffer alignment value must be a constant.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp47, i32 2, i32 0, <4 x float> %badAlnVc, i32 %ix)'
+; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %rwTex, i32 1, i32 0, <4 x float> %badRKVc, i32 4)'
+; CHECK: error: store should be on uav resource.
+; CHECK-NEXT: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %vecBuf, i32 0, i32 0, <4 x float> %badRCVc, i32 4)'
+
+; CHECK: error: raw/typed buffer offset must be undef.
+; CHECK-NEXT: note: at '%badBabOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %baBuf, i32 4, i32 0, i32 4)'
+; CHECK: error: structured buffer requires defined index and offset coordinates.
+; CHECK-NEXT: note: at '%badStrOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 3, i32 undef, i32 4)'
+; CHECK: error: Raw Buffer alignment value must be a constant.
+; CHECK-NEXT: note: at '%badAlnVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 2, i32 0, i32 %ix)'
+; CHECK: error: buffer load/store only works on Raw/Typed/StructuredBuffer
+; CHECK-NEXT: note: at '%badRKVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %tex, i32 1, i32 0, i32 4)'
+; CHECK: error: load can only run on UAV/SRV resource.
+; CHECK-NEXT: note: at '%badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4)'
+; CHECK-NEXT: error: buffer load/store only works on Raw/Typed/StructuredBuffer.
+; CHECK-NEXT: note: at '%badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4)'
+
+define void @main() {
+bb:
+  %tmp = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false)
+  %tmp1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false)
+  %tmp2 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false)
+  %tmp3 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false)
+  %tmp4 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false)
+  %tmp5 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false)
+  %tmp6 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind zeroinitializer, i32 0, i1 false)
+  %tmp7 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 3 }, i32 0, i1 false)
+  %tmp8 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 0, i1 false)
+  %ix = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %texIx = sitofp i32 %ix to float
+  %tex = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 1, i32 1033 })
+  %samp = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp7, %dx.types.ResourceProperties { i32 14, i32 0 })
+  %tmp10 = call %dx.types.ResRet.f32 @dx.op.sample.f32(i32 60, %dx.types.Handle %tex, %dx.types.Handle %samp, float %texIx, float undef, float undef, float undef, i32 0, i32 undef, i32 undef, float undef)
+  %tmp11 = extractvalue %dx.types.ResRet.f32 %tmp10, 0
+  %tmp12 = extractvalue %dx.types.ResRet.f32 %tmp10, 1
+  %tmp13 = extractvalue %dx.types.ResRet.f32 %tmp10, 2
+  %tmp14 = extractvalue %dx.types.ResRet.f32 %tmp10, 3
+  %rwTex = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp8, %dx.types.ResourceProperties { i32 4097, i32 1033 })
+  call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle %rwTex, i32 0, i32 undef, i32 undef, float %tmp11, float %tmp12, float %tmp13, float %tmp14, i8 15)
+  %scalBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp4, %dx.types.ResourceProperties { i32 12, i32 4 })
+  ; Invalid RC on Load (and inevitably invalid RK).
+  %badRCLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %samp, i32 0, i32 0, i8 1, i32 4)
+  %badRC = extractvalue %dx.types.ResRet.f32 %badRCLd, 0
+  ; Invalid RK on Load.
+  %badRKLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %tex, i32 1, i32 0, i8 1, i32 4)
+  %badRK = extractvalue %dx.types.ResRet.f32 %badRKLd, 0
+  ; Non-constant alignment on Load.
+  %badAlnLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 2, i32 0, i8 1, i32 %ix)
+  %badAln = extractvalue %dx.types.ResRet.f32 %badAlnLd, 0
+  ; Undefined offset on Structured Buffer Load.
+  %badStrOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %scalBuf, i32 3, i32 undef, i8 1, i32 4)
+  %badStrOff = extractvalue %dx.types.ResRet.f32 %badStrOffLd, 0
+  %baBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp3, %dx.types.ResourceProperties { i32 11, i32 0 })
+  ; Defined (and therefore invalid) offset on Byte Address Buffer Load.
+  %badBabOffLd = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %baBuf, i32 0, i32 0, i8 1, i32 4)
+  %badBabOff = extractvalue %dx.types.ResRet.f32 %badBabOffLd, 0
+
+  %vecBuf = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp5, %dx.types.ResourceProperties { i32 12, i32 16 })
+  ; Invalid RC on Vector Load (and inevitably invalid RK).
+  %badRCVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %samp, i32 0, i32 0, i32 4)
+  %badRCVc = extractvalue %dx.types.ResRet.v4f32 %badRCVcLd, 0
+  ; Invalid RK on Vector Load.
+  %badRKVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %tex, i32 1, i32 0, i32 4)
+  %badRKVc = extractvalue %dx.types.ResRet.v4f32 %badRKVcLd, 0
+  ; Non-constant alignment on Vector Load.
+  %badAlnVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 2, i32 0, i32 %ix)
+  %badAlnVc = extractvalue %dx.types.ResRet.v4f32 %badAlnVcLd, 0
+  ; Undefined offset on Structured Buffer Vector Load.
+  %badStrOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %vecBuf, i32 3, i32 undef, i32 4)
+  %badStrOffVc = extractvalue %dx.types.ResRet.v4f32 %badStrOffVcLd, 0
+  ; Defined (and therefore invalid) offset on Byte Address Buffer Vector Load.
+  %badBabOffVcLd = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %baBuf, i32 4, i32 0, i32 4)
+  %badBabOffVc = extractvalue %dx.types.ResRet.v4f32 %badBabOffVcLd, 0
+
+  ; Store to non-UAV.
+  %tmp38 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %scalBuf, i32 0, i32 0, float %badRC, float undef, float undef, float undef, i8 1, i32 4)
+  ; Invalid RK on Store.
+  %tmp39 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %rwTex, i32 1, i32 0, float %badRK, float undef, float undef, float undef, i8 1, i32 4)
+  ; Non-constant alignment on Store.
+  %tmp40 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp40, i32 2, i32 0, float %badAln, float undef, float undef, float undef, i8 1, i32 %ix)
+  ; Undefined offset on Structured Buffer Store.
+  %tmp41 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp41, i32 3, i32 undef, float %badStrOff, float undef, float undef, float undef, i8 1, i32 4)
+  ; Undefined value Store.
+  %tmp42 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp1, %dx.types.ResourceProperties { i32 4108, i32 4 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp42, i32 4, i32 0, float undef, float undef, float undef, float undef, i8 1, i32 4)
+  ; Defined (and therefore invalid) offset on Byte Address Buffer Store.
+  %tmp44 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %tmp44, i32 0, i32 0, float %badBabOff, float undef, float undef, float undef, i8 1, i32 4)
+
+  ; Vector Store to non-UAV.
+  %tmp45 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %rwTex, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %vecBuf, i32 0, i32 0, <4 x float> %badRCVc, i32 4)
+  ; Invalid RK on Vector Store.
+  %tmp46 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %rwTex, i32 1, i32 0, <4 x float> %badRKVc, i32 4)
+  ; Non-constant alignment on Vector Store.
+  %tmp47 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp47, i32 2, i32 0, <4 x float> %badAlnVc, i32 %ix)
+  ; Undefined offset on Structured Buffer Vector Store.
+  %tmp48 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp48, i32 3, i32 undef, <4 x float> %badStrOffVc, i32 4)
+  ; Undefinded value Vector Store.
+  %tmp49 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp49, i32 4, i32 0, <4 x float> undef, i32 4)
+  ; Defined (and therefore invalid) offset on Byte Address Buffer Vector Store.
+  %tmp51 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %tmp, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %tmp51, i32 4, i32 0, <4 x float> %badBabOffVc, i32 4)
+
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %tmp11)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %tmp12)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %tmp13)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %tmp14)
+  ret void
+}
+
+declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #2
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
+declare %dx.types.ResRet.f32 @dx.op.sample.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32, float) #1
+declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #0
+declare void @dx.op.rawBufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8, i32) #0
+declare %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32, %dx.types.Handle, i32, i32, i8, i32) #1
+declare void @dx.op.rawBufferVectorStore.v4f32(i32, %dx.types.Handle, i32, i32, <4 x float>, i32) #0
+declare %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32, %dx.types.Handle, i32, i32, i32) #1
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2
+declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
+
+!dx.version = !{!1}
+!dx.valver = !{!1}
+!dx.shaderModel = !{!2}
+!dx.resources = !{!3}
+!dx.viewIdState = !{!18}
+!dx.entryPoints = !{!19}
+
+!1 = !{i32 1, i32 9}
+!2 = !{!"ps", i32 6, i32 9}
+!3 = !{!4, !12, null, !16}
+!4 = !{!5, !7, !9, !11}
+!5 = !{i32 0, %"class.Texture1D<vector<float, 4> >"* undef, !"", i32 0, i32 0, i32 1, i32 1, i32 0, !6}
+!6 = !{i32 0, i32 9}
+!7 = !{i32 1, %"class.StructuredBuffer<vector<float, 4> >"* undef, !"", i32 0, i32 1, i32 1, i32 12, i32 0, !8}
+!8 = !{i32 1, i32 16}
+!9 = !{i32 2, %"class.StructuredBuffer<float>"* undef, !"", i32 0, i32 2, i32 1, i32 12, i32 0, !10}
+!10 = !{i32 1, i32 4}
+!11 = !{i32 3, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 3, i32 1, i32 11, i32 0, null}
+!12 = !{!13, !14, !15}
+!13 = !{i32 0, %"class.RWStructuredBuffer<vector<float, 4> >"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !8}
+!14 = !{i32 1, %"class.RWStructuredBuffer<float>"* undef, !"", i32 0, i32 1, i32 1, i32 12, i1 false, i1 false, i1 false, !10}
+!15 = !{i32 2, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 2, i32 1, i32 11, i1 false, i1 false, i1 false, null}
+!16 = !{!17}
+!17 = !{i32 0, %struct.SamplerState* undef, !"", i32 0, i32 0, i32 1, i32 0, null}
+!18 = !{[3 x i32] [i32 1, i32 4, i32 0]}
+!19 = !{void ()* @main, !"main", !20, !3, !27}
+!20 = !{!21, !24, null}
+!21 = !{!22}
+!22 = !{i32 0, !"IX", i8 4, i8 0, !23, i8 1, i32 1, i8 1, i32 0, i8 0, null}
+!23 = !{i32 0}
+!24 = !{!25}
+!25 = !{i32 0, !"SV_Target", i8 9, i8 16, !23, i8 0, i32 1, i8 4, i32 0, i8 0, !26}
+!26 = !{i32 3, i32 15}
+!27 = !{i32 0, i64 8589934608}
diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll
new file mode 100644
index 0000000000..e527125009
--- /dev/null
+++ b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll
@@ -0,0 +1,110 @@
+; REQUIRES: dxil-1-9
+; RUN: %dxv %s 2>&1 | FileCheck %s
+
+; CHECK: Validation succeeded.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%struct.AttribType = type { float, float }
+%dx.types.HitObject = type { i8* }
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+  %attrs = alloca %struct.AttribType, align 4
+  %nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)  ; HitObject_MakeNop()
+
+  %r269 = call i1 @dx.op.hitObject_StateScalar.i1(i32 269, %dx.types.HitObject %nop)  ; HitObject_IsMiss(hitObject)
+
+  %r270 = call i1 @dx.op.hitObject_StateScalar.i1(i32 270, %dx.types.HitObject %nop)  ; HitObject_IsHit(hitObject)
+
+  %r271 = call i1 @dx.op.hitObject_StateScalar.i1(i32 271, %dx.types.HitObject %nop)  ; HitObject_IsNop(hitObject)
+
+  %r272 = call i32 @dx.op.hitObject_StateScalar.i32(i32 272, %dx.types.HitObject %nop)  ; HitObject_RayFlags(hitObject)
+
+  %r273 = call float @dx.op.hitObject_StateScalar.f32(i32 273, %dx.types.HitObject %nop)  ; HitObject_RayTMin(hitObject)
+
+  %r274 = call float @dx.op.hitObject_StateScalar.f32(i32 274, %dx.types.HitObject %nop)  ; HitObject_RayTCurrent(hitObject)
+
+  %r275 = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %nop, i32 0)  ; HitObject_WorldRayOrigin(hitObject,component)
+
+  %r276 = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %nop, i32 0)  ; HitObject_WorldRayDirection(hitObject,component)
+
+  %r277 = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %nop, i32 0)  ; HitObject_ObjectRayOrigin(hitObject,component)
+
+  %r278 = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %nop, i32 0)  ; HitObject_ObjectRayDirection(hitObject,component)
+
+  %r279 = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 0, i32 0)  ; HitObject_ObjectToWorld3x4(hitObject,row,col)
+
+  %r280 = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 0, i32 0)  ; HitObject_WorldToObject3x4(hitObject,row,col)
+
+  %r281 = call i32 @dx.op.hitObject_StateScalar.i32(i32 281, %dx.types.HitObject %nop)  ; HitObject_GeometryIndex(hitObject)
+
+  %r282 = call i32 @dx.op.hitObject_StateScalar.i32(i32 282, %dx.types.HitObject %nop)  ; HitObject_InstanceIndex(hitObject)
+
+  %r283 = call i32 @dx.op.hitObject_StateScalar.i32(i32 283, %dx.types.HitObject %nop)  ; HitObject_InstanceID(hitObject)
+
+  %r284 = call i32 @dx.op.hitObject_StateScalar.i32(i32 284, %dx.types.HitObject %nop)  ; HitObject_PrimitiveIndex(hitObject)
+
+  %r285 = call i32 @dx.op.hitObject_StateScalar.i32(i32 285, %dx.types.HitObject %nop)  ; HitObject_HitKind(hitObject)
+
+  %r286 = call i32 @dx.op.hitObject_StateScalar.i32(i32 286, %dx.types.HitObject %nop)  ; HitObject_ShaderTableIndex(hitObject)
+
+  %r287 = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject %nop, i32 1)  ; HitObject_SetShaderTableIndex(hitObject,shaderTableIndex)
+
+  %r288 = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject %nop, i32 42)  ; HitObject_LoadLocalRootTableConstant(hitObject,offset)
+
+  call void @dx.op.hitObject_Attributes.struct.AttribType(i32 289, %dx.types.HitObject %nop, %struct.AttribType* nonnull %attrs)  ; HitObject_Attributes(hitObject,attributes)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1
+
+; Function Attrs: nounwind readnone
+declare %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32, %dx.types.HitObject, i32) #1
+
+; Function Attrs: nounwind readnone
+declare i1 @dx.op.hitObject_StateScalar.i1(i32, %dx.types.HitObject) #1
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.hitObject_StateScalar.i32(i32, %dx.types.HitObject) #1
+
+; Function Attrs: nounwind readonly
+declare i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32, %dx.types.HitObject, i32) #2
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.hitObject_StateVector.f32(i32, %dx.types.HitObject, i32) #1
+
+; Function Attrs: nounwind argmemonly
+declare void @dx.op.hitObject_Attributes.struct.AttribType(i32, %dx.types.HitObject, %struct.AttribType*) #3
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.hitObject_StateScalar.f32(i32, %dx.types.HitObject) #1
+
+; Function Attrs: nounwind readnone
+declare float @dx.op.hitObject_StateMatrix.f32(i32, %dx.types.HitObject, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
+attributes #3 = { nounwind argmemonly }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.typeAnnotations = !{!2}
+!dx.entryPoints = !{!3, !4}
+
+!0 = !{i32 1, i32 9}
+!1 = !{!"lib", i32 6, i32 9}
+!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !5}
+!3 = !{null, !"", null, null, !6}
+!4 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !7}
+!5 = !{!8}
+!6 = !{i32 0, i64 0}
+!7 = !{i32 8, i32 7, i32 5, !9}
+!8 = !{i32 1, !10, !10}
+!9 = !{i32 0}
+!10 = !{}
+
diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_passing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_passing.ll
new file mode 100644
index 0000000000..5b0c65fd6b
--- /dev/null
+++ b/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_passing.ll
@@ -0,0 +1,84 @@
+; REQUIRES: dxil-1-9
+; RUN: %dxv %s | FileCheck %s
+
+; CHECK: Validation succeeded.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%struct.Payload = type { <3 x float> }
+%struct.CustomAttrs = type { float, float }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.HitObject = type { i8* }
+%struct.RaytracingAccelerationStructure = type { i32 }
+
+@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+  %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", align 4
+  %2 = alloca %struct.CustomAttrs, align 4
+  %3 = call i32 @dx.op.allocateRayQuery(i32 178, i32 5)  ; AllocateRayQuery(constRayFlags)
+  %4 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1)  ; CreateHandleForLib(Resource)
+  %5 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %4, %dx.types.ResourceProperties { i32 16, i32 0 })  ; AnnotateHandle(res,props)  resource: RTAccelerationStructure
+  call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %3, %dx.types.Handle %5, i32 0, i32 255, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03)  ; RayQuery_TraceRayInline(rayQueryHandle,accelerationStructure,rayFlags,instanceInclusionMask,origin_X,origin_Y,origin_Z,tMin,direction_X,direction_Y,direction_Z,tMax)
+  %6 = call %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32 263, i32 %3)  ; HitObject_FromRayQuery(rayQueryHandle)
+  %7 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %3, i32 16, %struct.CustomAttrs* nonnull %2)  ; HitObject_FromRayQueryWithAttrs(rayQueryHandle,HitKind,CommittedAttribs)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare i32 @dx.op.allocateRayQuery(i32, i32) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.rayQuery_TraceRayInline(i32, i32, %dx.types.Handle, i32, i32, float, float, float, float, float, float, float, float) #0
+
+; Function Attrs: nounwind readonly
+declare %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32, i32, i32, %struct.CustomAttrs*) #1
+
+; Function Attrs: nounwind readonly
+declare %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.typeAnnotations = !{!6}
+!dx.dxrPayloadAnnotations = !{!10}
+!dx.entryPoints = !{!13, !15}
+
+!0 = !{i32 1, i32 9}
+!1 = !{!"lib", i32 6, i32 9}
+!2 = !{!3, null, null, null}
+!3 = !{!4}
+!4 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !5}
+!5 = !{i32 0, i32 4}
+!6 = !{i32 1, void ()* @"\01?main@@YAXXZ", !7}
+!7 = !{!8}
+!8 = !{i32 1, !9, !9}
+!9 = !{}
+!10 = !{i32 0, %struct.Payload undef, !11}
+!11 = !{!12}
+!12 = !{i32 0, i32 8210}
+!13 = !{null, !"", null, !2, !14}
+!14 = !{i32 0, i64 33554432}
+!15 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !16}
+!16 = !{i32 8, i32 7, i32 5, !17}
+!17 = !{i32 0}
diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_traceinvoke_passing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_traceinvoke_passing.ll
new file mode 100644
index 0000000000..f3b99300be
--- /dev/null
+++ b/tools/clang/test/LitDXILValidation/ser_hitobject_traceinvoke_passing.ll
@@ -0,0 +1,68 @@
+; REQUIRES: dxil-1-9
+; RUN: %dxv %s 2>&1 | FileCheck %s
+
+; CHECK: Validation succeeded.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%struct.Payload = type { <3 x float> }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.HitObject = type { i8* }
+%struct.RaytracingAccelerationStructure = type { i32 }
+
+@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+  %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", align 4
+  %2 = alloca %struct.Payload, align 4
+  %3 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1)  ; CreateHandleForLib(Resource)
+  %4 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 16, i32 0 })  ; AnnotateHandle(res,props)  resource: RTAccelerationStructure
+  %5 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)  ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload)
+  call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %5, %struct.Payload* nonnull %2)  ; HitObject_Invoke(hitObject,payload)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.Payload*) #0
+
+; Function Attrs: nounwind
+declare void @dx.op.hitObject_Invoke.struct.Payload(i32, %dx.types.HitObject, %struct.Payload*) #0
+
+; Function Attrs: nounwind readnone
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.typeAnnotations = !{!3}
+!dx.dxrPayloadAnnotations = !{!4}
+!dx.entryPoints = !{!5, !6}
+
+!0 = !{i32 1, i32 9}
+!1 = !{!"lib", i32 6, i32 9}
+!2 = !{!7, null, null, null}
+!3 = !{i32 1, void ()* @"\01?main@@YAXXZ", !8}
+!4 = !{i32 0, %struct.Payload undef, !9}
+!5 = !{null, !"", null, !2, null}
+!6 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !10}
+!7 = !{!11}
+!8 = !{!12}
+!9 = !{!13}
+!10 = !{i32 8, i32 7, i32 5, !14}
+!11 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !15}
+!12 = !{i32 1, !16, !16}
+!13 = !{i32 0, i32 8210}
+!14 = !{i32 0}
+!15 = !{i32 0, i32 4}
+!16 = !{}
diff --git a/tools/clang/test/LitDXILValidation/ser_maybereorder_failing.ll b/tools/clang/test/LitDXILValidation/ser_maybereorder_failing.ll
new file mode 100644
index 0000000000..4502b9241d
--- /dev/null
+++ b/tools/clang/test/LitDXILValidation/ser_maybereorder_failing.ll
@@ -0,0 +1,60 @@
+; REQUIRES: dxil-1-9
+; RUN: not %dxv %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.HitObject = type { i8* }
+
+; CHECK: Function: ?main@@YAXXZ: error: Use of undef coherence hint or num coherence hint bits in MaybeReorderThread.
+; CHECK-NEXT: note: at 'call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 1, i32 undef)'
+
+; CHECK: Function: ?main@@YAXXZ: error: Use of undef coherence hint or num coherence hint bits in MaybeReorderThread.
+; CHECK-NEXT: note: at 'call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 undef, i32 1)'
+
+; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef.
+; CHECK-NEXT: note: at 'call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject undef, i32 11, i32 0)'
+
+; CHECK: Validation failed.
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+  %nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)  ; HitObject_MakeNop()
+
+  ; Validate that hit object is not undef.
+  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject undef, i32 11, i32 0)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+
+  ; Validate that coherence hint is not undef while numCoherenceHintBitsFromLSB is not 0.
+  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 undef, i32 1)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+
+  ; Validate that num coherence hint bits from LSB is not undef.
+  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 1, i32 undef)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.maybeReorderThread(i32, %dx.types.HitObject, i32, i32) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.typeAnnotations = !{!2}
+!dx.entryPoints = !{!6, !8}
+
+!0 = !{i32 1, i32 9}
+!1 = !{!"lib", i32 6, i32 9}
+!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !3}
+!3 = !{!4}
+!4 = !{i32 1, !5, !5}
+!5 = !{}
+!6 = !{null, !"", null, null, !7}
+!7 = !{i32 0, i64 0}
+!8 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !9}
+!9 = !{i32 8, i32 7, i32 5, !10}
+!10 = !{i32 0}
diff --git a/tools/clang/test/LitDXILValidation/ser_maybereorder_passing.ll b/tools/clang/test/LitDXILValidation/ser_maybereorder_passing.ll
new file mode 100644
index 0000000000..8ee7677bd4
--- /dev/null
+++ b/tools/clang/test/LitDXILValidation/ser_maybereorder_passing.ll
@@ -0,0 +1,46 @@
+; REQUIRES: dxil-1-9
+; RUN: %dxv %s | FileCheck %s
+
+; CHECK: Validation succeeded.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.HitObject = type { i8* }
+
+; Function Attrs: nounwind
+define void @"\01?main@@YAXXZ"() #0 {
+  %nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266)  ; HitObject_MakeNop()
+  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 241, i32 3)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+  
+  ; Coherence hint disabled, accept 'undef' coherence hint bits.
+  call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %nop, i32 undef, i32 0)  ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.maybeReorderThread(i32, %dx.types.HitObject, i32, i32) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.typeAnnotations = !{!2}
+!dx.entryPoints = !{!6, !8}
+
+!0 = !{i32 1, i32 9}
+!1 = !{!"lib", i32 6, i32 9}
+!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !3}
+!3 = !{!4}
+!4 = !{i32 1, !5, !5}
+!5 = !{}
+!6 = !{null, !"", null, null, !7}
+!7 = !{i32 0, i64 0}
+!8 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !9}
+!9 = !{i32 8, i32 7, i32 5, !10}
+!10 = !{i32 0}
diff --git a/tools/clang/test/LitDXILValidation/vector-validation.ll b/tools/clang/test/LitDXILValidation/vector-validation.ll
new file mode 100644
index 0000000000..74e8116e88
--- /dev/null
+++ b/tools/clang/test/LitDXILValidation/vector-validation.ll
@@ -0,0 +1,78 @@
+; RUN: not %dxv %s 2>&1 | FileCheck %s
+
+; Confirm that 6.9 specific LLVM operations and DXIL intrinsics fail in 6.8
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-ms-dx"
+
+%dx.types.Handle = type { i8* }
+%dx.types.ResBind = type { i32, i32, i32, i8 }
+%dx.types.ResourceProperties = type { i32, i32 }
+%dx.types.ResRet.v4f32 = type { <4 x float>, i32 }
+%"class.RWStructuredBuffer<vector<float, 4> >" = type { <4 x float> }
+
+; CHECK: Function: main: error: Instructions must be of an allowed type.
+; CHECK: note: at '%6 = insertelement <4 x float> undef, float %2, i32 0
+; CHECK: Function: main: error: Instructions must be of an allowed type.
+; CHECK: note: at '%7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK: Function: main: error: Instructions must be of an allowed type.
+; CHECK: note: at '%8 = extractelement <4 x float> %5, i32 2
+; CHECK: Function: main: error: Opcode RawBufferVectorLoad not valid in shader model vs_6_8.
+; CHECK: note: at '%4 = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %3, i32 1, i32 0, i32 8)'
+; CHECK: Function: main: error: Opcode RawBufferVectorStore not valid in shader model vs_6_8.
+; CHECK: note: at 'call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %3, i32 0, i32 0, <4 x float> %7, i32 4)'
+; CHECK: Function: main: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties.  See other errors for details.
+; CHECK: Function: main: error: Function uses features incompatible with the shader model.
+define void @main() {
+  %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false)
+  %2 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
+  %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4108, i32 16 })
+  %4 = call %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32 303, %dx.types.Handle %3, i32 1, i32 0, i32 8)
+  %5 = extractvalue %dx.types.ResRet.v4f32 %4, 0
+  %6 = insertelement <4 x float> undef, float %2, i32 0
+  %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer
+  call void @dx.op.rawBufferVectorStore.v4f32(i32 304, %dx.types.Handle %3, i32 0, i32 0, <4 x float> %7, i32 4)
+  %8 = extractelement <4 x float> %5, i32 2
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %8)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %8)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %8)
+  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %8)
+  ret void
+}
+
+declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #0
+declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #1
+declare %dx.types.ResRet.v4f32 @dx.op.rawBufferVectorLoad.v4f32(i32, %dx.types.Handle, i32, i32, i32) #2
+declare void @dx.op.rawBufferVectorStore.v4f32(i32, %dx.types.Handle, i32, i32, <4 x float>, i32) #1
+declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #0
+declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #0
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readonly }
+
+!dx.version = !{!1}
+!dx.valver = !{!1}
+!dx.shaderModel = !{!2}
+!dx.resources = !{!3}
+!dx.viewIdState = !{!7}
+!dx.entryPoints = !{!8}
+
+!1 = !{i32 1, i32 8}
+!2 = !{!"vs", i32 6, i32 8}
+!3 = !{null, !4, null, null}
+!4 = !{!5}
+!5 = !{i32 0, %"class.RWStructuredBuffer<vector<float, 4> >"* undef, !"", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !6}
+!6 = !{i32 1, i32 16}
+!7 = !{[3 x i32] [i32 1, i32 4, i32 0]}
+!8 = !{void ()* @main, !"main", !9, !3, !17}
+!9 = !{!10, !14, null}
+!10 = !{!11}
+!11 = !{i32 0, !"VAL", i8 9, i8 0, !12, i8 0, i32 1, i8 1, i32 0, i8 0, !13}
+!12 = !{i32 0}
+!13 = !{i32 3, i32 1}
+!14 = !{!15}
+!15 = !{i32 0, !"SV_Position", i8 9, i8 3, !12, i8 4, i32 1, i8 4, i32 0, i8 0, !16}
+!16 = !{i32 3, i32 15}
+!17 = !{i32 0, i64 8590000144}
+
diff --git a/tools/clang/test/SemaHLSL/attributes/reordercoherent_ast.hlsl b/tools/clang/test/SemaHLSL/attributes/reordercoherent_ast.hlsl
new file mode 100644
index 0000000000..53366de828
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/attributes/reordercoherent_ast.hlsl
@@ -0,0 +1,17 @@
+// RUN: %dxc -T lib_6_9 -ast-dump %s | FileCheck %s
+// REQUIRES: dxil-1-9
+
+// CHECK: |-VarDecl {{.*}} used uav1 'reordercoherent RWTexture1D<float4>':'RWTexture1D<vector<float, 4> >'
+// CHECK-NEXT: | |-HLSLReorderCoherentAttr
+reordercoherent RWTexture1D<float4> uav1 : register(u3);
+RWBuffer<float4> uav2;
+
+[shader("raygeneration")]
+void main()
+{
+ // CHECK: |   `-VarDecl {{.*}} uav3 'reordercoherent RWTexture1D<float4>':'RWTexture1D<vector<float, 4> >' cinit
+ // CHECK-NEXT: |     |
+ // CHECK-NEXT: |     |
+ // CHECK-NEXT: |     `-HLSLReorderCoherentAttr
+  reordercoherent  RWTexture1D<float4> uav3 = uav1;
+}
diff --git a/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl b/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl
index 4fcce749d7..ece7e3f2f4 100644
--- a/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl
+++ b/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl
@@ -1,3 +1,4 @@
+// REQUIRES: spirv
 // RUN: %dxc -T ps_6_0 -E main -verify -spirv %s
 
 struct S
diff --git a/tools/clang/test/SemaHLSL/const-default.hlsl b/tools/clang/test/SemaHLSL/const-default.hlsl
index 2ebb6fe52e..6b5e43e0e9 100644
--- a/tools/clang/test/SemaHLSL/const-default.hlsl
+++ b/tools/clang/test/SemaHLSL/const-default.hlsl
@@ -33,7 +33,11 @@ class MyClass {
 ConstantBuffer<MyClass> g_const_buffer2;
 TextureBuffer<MyClass> g_texture_buffer2;
 
+// expected-note@+2 {{forward declaration of 'FWDDeclStruct'}}
+// expected-note@+1 {{forward declaration of 'FWDDeclStruct'}}
 struct FWDDeclStruct;
+// expected-note@+2 {{forward declaration of 'FWDDeclClass'}}
+// expected-note@+1 {{forward declaration of 'FWDDeclClass'}}
 class FWDDeclClass;
 
 // Ensure forward declared struct/class fails as expected
diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm68_unavailable.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm68_unavailable.hlsl
new file mode 100644
index 0000000000..fc42f99a9a
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm68_unavailable.hlsl
@@ -0,0 +1,8 @@
+// RUN: %dxc -Tlib_6_8 -verify %s
+
+[Shader("compute")]
+[numthreads(1, 1, 1)]
+void main() {
+  // expected-error@+1{{invalid SemanticFlags for Barrier operation; expected 0 or some combination of GROUP_SYNC, GROUP_SCOPE, DEVICE_SCOPE flags}}
+  Barrier(0, REORDER_SCOPE);
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm69_passing.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm69_passing.hlsl
new file mode 100644
index 0000000000..18271a2b11
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/barrier/reorder_scope_sm69_passing.hlsl
@@ -0,0 +1,12 @@
+// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s
+
+RWByteAddressBuffer BAB : register(u1, space0);
+
+[shader("raygeneration")]
+void main() {
+// CHECK:  call void @dx.op.barrierByMemoryType(i32 244, i32 1, i32 8)
+  Barrier(UAV_MEMORY, REORDER_SCOPE);
+
+// CHECK:  call void @dx.op.barrierByMemoryHandle(i32 245, %dx.types.Handle %{{[^ ]+}}, i32 8)
+  Barrier(BAB, REORDER_SCOPE);
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/hitobject_reorder.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/hitobject_reorder.hlsl
new file mode 100644
index 0000000000..fa3ab68506
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/hitobject_reorder.hlsl
@@ -0,0 +1,10 @@
+// RUN: %dxc -T lib_6_9 -E main %s -verify
+
+// expected-no-diagnostics
+
+[shader("raygeneration")] void main() {
+  dx::HitObject hit;
+  dx::MaybeReorderThread(hit);
+  dx::MaybeReorderThread(hit, 0xf1, 3);
+  dx::MaybeReorderThread(0xf2, 7);
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-entry-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-entry-errors.hlsl
new file mode 100644
index 0000000000..3c97ea0a77
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-entry-errors.hlsl
@@ -0,0 +1,62 @@
+// RUN: %dxc -T lib_6_9 %s -verify
+
+struct [raypayload] Payload
+{
+    float elem
+          : write(caller,closesthit,anyhit,miss)
+          : read(caller,closesthit,anyhit,miss);
+};
+
+struct Attribs { float2 barys; };
+void CallReorder()
+{
+// expected-error@+6{{dx::MaybeReorderThread is unavailable in shader stage 'compute' (requires 'raygeneration')}}
+// expected-error@+5{{dx::MaybeReorderThread is unavailable in shader stage 'callable' (requires 'raygeneration')}}
+// expected-error@+4{{dx::MaybeReorderThread is unavailable in shader stage 'intersection' (requires 'raygeneration')}}
+// expected-error@+3{{dx::MaybeReorderThread is unavailable in shader stage 'anyhit' (requires 'raygeneration')}}
+// expected-error@+2{{dx::MaybeReorderThread is unavailable in shader stage 'closesthit' (requires 'raygeneration')}}
+// expected-error@+1{{dx::MaybeReorderThread is unavailable in shader stage 'miss' (requires 'raygeneration')}}
+  dx::MaybeReorderThread(0,0);
+}
+
+// expected-note@+3{{entry function defined here}}
+[shader("compute")]
+[numthreads(4,4,4)]
+void mainReorderCS(uint ix : SV_GroupIndex, uint3 id : SV_GroupThreadID) {
+  CallReorder();
+}
+
+[shader("raygeneration")]
+void mainReorderRG() {
+  CallReorder();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("callable")]
+void mainReorderCALL(inout Attribs attrs) {
+  CallReorder();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("intersection")]
+void mainReorderIS() {
+  CallReorder();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("anyhit")]
+void mainReorderAH(inout Payload pld, in Attribs attrs) {
+  CallReorder();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("closesthit")]
+void mainReorderCH(inout Payload pld, in Attribs attrs) {
+  CallReorder();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("miss")]
+void mainReorderMS(inout Payload pld) {
+  CallReorder();
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-unavailable-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-unavailable-pre-sm69.hlsl
new file mode 100644
index 0000000000..db2d0fd2e3
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/intrinsics/reorder/reorder-unavailable-pre-sm69.hlsl
@@ -0,0 +1,9 @@
+// RUN: %dxc -T lib_6_8 %s -verify
+
+// Check that inciwMaybeReorderThread is unavailable pre SM 6.9.
+
+[shader("raygeneration")]
+void main() {
+  // expected-error@+1{{intrinsic dx::MaybeReorderThread potentially used by ''main'' requires shader model 6.9 or greater}}
+  dx::MaybeReorderThread(15u, 4u);
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/namespace/dx-namespace-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/namespace/dx-namespace-pre-sm69.hlsl
new file mode 100644
index 0000000000..e23f398538
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/namespace/dx-namespace-pre-sm69.hlsl
@@ -0,0 +1,8 @@
+// RUN: %dxc -T lib_6_8 %s -verify
+
+// expected-no-diagnostics
+using namespace dx;
+
+[shader("raygeneration")]
+void main() {
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-entry-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-entry-errors.hlsl
new file mode 100644
index 0000000000..44afcf47e7
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-entry-errors.hlsl
@@ -0,0 +1,58 @@
+// RUN: %dxc -T lib_6_9 %s -verify
+
+struct [raypayload] Payload
+{
+    float elem
+          : write(caller,anyhit,closesthit,miss)
+          : read(caller,anyhit,closesthit,miss);
+};
+
+struct Attribs { float2 barys; };
+
+dx::HitObject UseHitObject() {
+  return dx::HitObject::MakeNop();
+}
+
+// expected-note@+3{{entry function defined here}}
+[shader("compute")]
+[numthreads(4,4,4)]
+void mainHitCS(uint ix : SV_GroupIndex, uint3 id : SV_GroupThreadID) {
+// expected-error@-7{{dx::HitObject is unavailable in shader stage 'compute' (requires 'raygeneration', 'closesthit' or 'miss')}}
+  UseHitObject();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("callable")]
+void mainHitCALL(inout Attribs attrs) {
+// expected-error@-14{{dx::HitObject is unavailable in shader stage 'callable' (requires 'raygeneration', 'closesthit' or 'miss')}}
+  UseHitObject();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("intersection")]
+void mainHitIS() {
+// expected-error@-21{{dx::HitObject is unavailable in shader stage 'intersection' (requires 'raygeneration', 'closesthit' or 'miss')}}
+  UseHitObject();
+}
+
+// expected-note@+2{{entry function defined here}}
+[shader("anyhit")]
+void mainHitAH(inout Payload pld, in Attribs attrs) {
+// expected-error@-28{{dx::HitObject is unavailable in shader stage 'anyhit' (requires 'raygeneration', 'closesthit' or 'miss')}}
+  UseHitObject();
+}
+
+[shader("raygeneration")]
+void mainHitRG() {
+  UseHitObject();
+}
+
+[shader("closesthit")]
+void mainHitCH(inout Payload pld, in Attribs attrs) {
+  UseHitObject();
+}
+
+[shader("miss")]
+void mainHitMS(inout Payload pld) {
+  UseHitObject();
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl
new file mode 100644
index 0000000000..baa3a07a5b
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl
@@ -0,0 +1,4 @@
+// RUN: %dxc -T lib_6_9 %s -verify
+
+// expected-error@+1{{'dx::HitObject' is an object and cannot be used as a type parameter}}
+RWStructuredBuffer<dx::HitObject> InvalidBuffer;
diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unavailable-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unavailable-pre-sm69.hlsl
new file mode 100644
index 0000000000..59c8dfbe2f
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unavailable-pre-sm69.hlsl
@@ -0,0 +1,11 @@
+// RUN: %dxc -T lib_6_8 %s -verify
+
+// Check that the HitObject is unavailable pre SM 6.9.
+
+[shader("raygeneration")]
+void main() {
+  // expected-error@+3{{intrinsic dx::HitObject::MakeNop potentially used by ''main'' requires shader model 6.9 or greater}}
+  // expected-error@+2{{potential misuse of built-in type 'dx::HitObject' in shader model lib_6_8; introduced in shader model 6.9}}
+  // expected-error@+1{{potential misuse of built-in type 'dx::HitObject' in shader model lib_6_8; introduced in shader model 6.9}}
+  dx::HitObject hit = dx::HitObject::MakeNop();
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unsupported-vs.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unsupported-vs.hlsl
new file mode 100644
index 0000000000..4b6c45806b
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-unsupported-vs.hlsl
@@ -0,0 +1,8 @@
+// RUN: %dxc -T vs_6_9 %s -verify
+
+// expected-note@+1{{entry function defined here}}
+float main(RayDesc rayDesc: RAYDESC) : OUT {
+// expected-error@+1{{dx::HitObject is unavailable in shader stage 'vertex' (requires 'raygeneration', 'closesthit' or 'miss')}}
+  dx::HitObject::MakeNop();
+  return 0.f;
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-using-namespace.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-using-namespace.hlsl
new file mode 100644
index 0000000000..c266d81ddb
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-using-namespace.hlsl
@@ -0,0 +1,36 @@
+// RUN: %dxc -T lib_6_9 %s -verify
+
+// This test checks that HitObject can be used with 'using namespace dx' instead of explicit namespace prefix
+// expected-no-diagnostics
+
+struct [raypayload] Payload
+{
+    float elem
+          : write(caller,anyhit,closesthit,miss)
+          : read(caller,anyhit,closesthit,miss);
+};
+
+struct Attribs { float2 barys; };
+
+using namespace dx;
+
+[shader("raygeneration")]
+void main()
+{
+  HitObject hit;
+  MaybeReorderThread(hit);
+}
+
+[shader("closesthit")]
+void closestHit(inout Payload pld, in Attribs attrs)
+{
+  // Create a HitObject
+  HitObject hit;
+}
+
+[shader("miss")]
+void missShader(inout Payload pld)
+{
+  // Also test using a static method
+  HitObject hit = HitObject::MakeNop();
+} 
diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-without-namespace.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-without-namespace.hlsl
new file mode 100644
index 0000000000..cb7a24e1c7
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-without-namespace.hlsl
@@ -0,0 +1,39 @@
+// RUN: %dxc -T lib_6_9 %s -verify
+
+struct [raypayload] Payload
+{
+    float elem
+          : write(caller,anyhit,closesthit,miss)
+          : read(caller,anyhit,closesthit,miss);
+};
+
+struct Attribs { float2 barys; };
+
+[shader("raygeneration")]
+void main()
+{
+  // expected-error@+1{{unknown type name 'HitObject'}}
+  HitObject hit;
+}
+
+[shader("closesthit")]
+void closestHit(inout Payload pld, in Attribs attrs)
+{
+  // expected-error@+1{{unknown type name 'HitObject'}}
+  HitObject hit;
+}
+
+[shader("miss")]
+void missShader(inout Payload pld)
+{
+  // expected-error@+1{{unknown type name 'HitObject'}}
+  HitObject hit;
+}
+
+// Also test API methods
+[shader("raygeneration")]
+void main2()
+{
+  // expected-error@+1{{use of undeclared identifier 'HitObject'}}
+  HitObject::MakeNop();
+} 
\ No newline at end of file
diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/maybereorderthread-without-namespace.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/maybereorderthread-without-namespace.hlsl
new file mode 100644
index 0000000000..edf7e4fa71
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/maybereorderthread-without-namespace.hlsl
@@ -0,0 +1,31 @@
+// RUN: %dxc -T lib_6_9 %s -verify
+
+struct [raypayload] Payload
+{
+    float elem
+          : write(caller,anyhit,closesthit,miss)
+          : read(caller,anyhit,closesthit,miss);
+};
+
+struct Attribs { float2 barys; };
+
+[shader("raygeneration")]
+void main()
+{
+  // expected-error@+1{{use of undeclared identifier 'MaybeReorderThread'}}
+  MaybeReorderThread(1);
+}
+
+[shader("closesthit")]
+void closestHit(inout Payload pld, in Attribs attrs)
+{
+  // expected-error@+1{{use of undeclared identifier 'MaybeReorderThread'}}
+  MaybeReorderThread(2);
+}
+
+[shader("miss")]
+void missShader(inout Payload pld)
+{
+  // expected-error@+1{{use of undeclared identifier 'MaybeReorderThread'}}
+  MaybeReorderThread(3);
+} 
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl
new file mode 100644
index 0000000000..1625454360
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl
@@ -0,0 +1,24 @@
+// RUN: %dxc -DTYPE=float -DNUM=7 -T hs_6_9 -verify %s
+
+struct HsConstantData {
+  float Edges[3] : SV_TessFactor;
+  vector <float, 7> vec;
+};
+
+struct LongVec {
+  float4 f;
+  vector<TYPE,NUM> vec;
+};
+
+HsConstantData PatchConstantFunction( // expected-error{{vectors of over 4 elements in patch constant function return type are not supported}}
+				      vector<TYPE,NUM> vec : V, // expected-error{{vectors of over 4 elements in patch constant function parameters are not supported}}
+				      LongVec lv : L) { // expected-error{{vectors of over 4 elements in patch constant function parameters are not supported}}
+  return (HsConstantData)0;
+}
+
+[domain("tri")]
+[outputtopology("triangle_cw")]
+[outputcontrolpoints(32)]
+[patchconstantfunc("PatchConstantFunction")]
+void main() {
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
new file mode 100644
index 0000000000..0604feeaec
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl
@@ -0,0 +1,200 @@
+// RUN: %dxc -T ps_6_9 -DTYPE=LongVec    -DNUM=5   -verify %s
+// RUN: %dxc -T ps_6_9 -DTYPE=LongVecSub -DNUM=128 -verify %s
+// RUN: %dxc -T ps_6_9                   -DNUM=1024 -verify %s
+
+// Add tests for base types and instantiated template classes with longvecs
+// Size of the vector shouldn't matter, but using a few different ones just in case.
+
+#define PASTE_(x,y) x##y
+#define PASTE(x,y) PASTE_(x,y)
+
+#ifndef TYPE
+#define TYPE LongVecTpl<NUM>
+#endif
+
+struct LongVec {
+  float4 f;
+  vector<float,NUM> vec;
+};
+
+struct LongVecSub : LongVec {
+  int3 is;
+};
+
+template <int N>
+struct LongVecTpl {
+  float4 f;
+  vector<float,N> vec;
+};
+
+vector<float, NUM> global_vec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+vector<float, NUM> global_vec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+TYPE global_vec_rec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+TYPE global_vec_rec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+
+cbuffer BadBuffy {
+  vector<float, NUM> cb_vec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  vector<float, NUM> cb_vec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  TYPE cb_vec_rec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  TYPE cb_vec_rec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+};
+
+tbuffer BadTuffy {
+  vector<float, NUM> tb_vec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  vector<float, NUM> tb_vec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  TYPE tb_vec_rec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+  TYPE tb_vec_rec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}}
+};
+
+ConstantBuffer< TYPE > const_buf; // expected-error{{vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}}
+TextureBuffer< TYPE > tex_buf; // expected-error{{vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}}
+
+[shader("pixel")]
+vector<float, NUM> main( // expected-error{{vectors of over 4 elements in entry function return type are not supported}}
+                     vector<float, NUM> vec : V) : SV_Target { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+  return vec;
+}
+
+[shader("vertex")]
+TYPE vs_main( // expected-error{{vectors of over 4 elements in entry function return type are not supported}}
+                     TYPE parm : P) : SV_Target { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+  parm.f = 0;
+  return parm;
+}
+
+
+[shader("geometry")]
+[maxvertexcount(3)]
+void gs_point(line TYPE e, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+              inout PointStream<TYPE> OutputStream0) {} // expected-error{{vectors of over 4 elements in geometry streams are not supported}}
+
+[shader("geometry")]
+[maxvertexcount(12)]
+void gs_line(line TYPE a, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+             inout LineStream<TYPE> OutputStream0) {} // expected-error{{vectors of over 4 elements in geometry streams are not supported}}
+
+
+[shader("geometry")]
+[maxvertexcount(12)]
+void gs_line(line TYPE a, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+             inout TriangleStream<TYPE> OutputStream0) {} // expected-error{{vectors of over 4 elements in geometry streams are not supported}}
+
+[shader("domain")]
+[domain("tri")]
+void ds_main(OutputPatch<TYPE, 3> TrianglePatch) {} // expected-error{{vectors of over 4 elements in tessellation patches are not supported}}
+
+void patch_const(InputPatch<TYPE, 3> inpatch, // expected-error{{vectors of over 4 elements in tessellation patches are not supported}}
+			   OutputPatch<TYPE, 3> outpatch) {} // expected-error{{vectors of over 4 elements in tessellation patches are not supported}}
+
+[shader("hull")]
+[domain("tri")]
+[outputtopology("triangle_cw")]
+[outputcontrolpoints(32)]
+[patchconstantfunc("patch_const")]
+void hs_main(InputPatch<TYPE, 3> TrianglePatch) {} // expected-error{{vectors of over 4 elements in tessellation patches are not supported}}
+
+RaytracingAccelerationStructure RTAS;
+
+struct [raypayload] DXRLongVec {
+  float4 f : write(closesthit) : read(caller);
+  vector<float,NUM> vec : write(closesthit) : read(caller);
+};
+
+struct [raypayload] DXRLongVecSub : DXRLongVec {
+  int3 is : write(closesthit) : read(caller);
+};
+
+template<int N>
+struct [raypayload] DXRLongVecTpl {
+  float4 f : write(closesthit) : read(caller);
+  vector<float,N> vec : write(closesthit) : read(caller);
+};
+
+#define RTTYPE PASTE(DXR,TYPE)
+
+[shader("raygeneration")]
+void raygen() {
+  RTTYPE p = (RTTYPE)0;
+  RayDesc ray = (RayDesc)0;
+  TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
+  CallShader(0, p); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
+}
+
+
+[shader("closesthit")]
+void closesthit(inout RTTYPE payload, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+		in RTTYPE attribs ) { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+  RayDesc ray;
+  TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
+  CallShader(0, payload); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
+}
+
+[shader("anyhit")]
+void AnyHit( inout RTTYPE payload, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+	      in RTTYPE attribs  ) // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+{
+}
+
+[shader("miss")]
+void Miss(inout RTTYPE payload){ // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+  RayDesc ray;
+  TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
+  CallShader(0, payload); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
+}
+
+[shader("intersection")]
+void Intersection() {
+  float hitT = RayTCurrent();
+  RTTYPE attr = (RTTYPE)0;
+  bool bReported = ReportHit(hitT, 0, attr); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
+}
+
+[shader("callable")]
+void callable1(inout RTTYPE p) { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}}
+  CallShader(0, p); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
+}
+
+groupshared LongVec as_pld;
+
+[shader("amplification")]
+[numthreads(1,1,1)]
+void Amp() {
+  DispatchMesh(1,1,1,as_pld); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}}
+}
+
+struct NodeLongVec {
+  uint3 grid : SV_DispatchGrid;
+  vector<float,NUM> vec;
+};
+
+struct NodeLongVecSub : NodeLongVec {
+  int3 is;
+};
+
+template<int N>
+struct NodeLongVecTpl {
+  uint3 grid : SV_DispatchGrid;
+  vector<float,N> vec;
+};
+
+#define NTYPE PASTE(Node,TYPE)
+
+[Shader("node")]
+[NodeLaunch("broadcasting")]
+[NumThreads(8,1,1)]
+[NodeMaxDispatchGrid(8,1,1)]
+void broadcast(DispatchNodeInputRecord<NTYPE> input,  // expected-error{{vectors of over 4 elements in node records are not supported}}
+                NodeOutput<TYPE> output) // expected-error{{vectors of over 4 elements in node records are not supported}}
+{
+  ThreadNodeOutputRecords<TYPE> touts; // expected-error{{vectors of over 4 elements in node records are not supported}}
+  GroupNodeOutputRecords<TYPE> gouts; // expected-error{{vectors of over 4 elements in node records are not supported}}
+}
+
+[Shader("node")]
+[NodeLaunch("coalescing")]
+[NumThreads(8,1,1)]
+void coalesce(GroupNodeInputRecords<TYPE> input) {} // expected-error{{vectors of over 4 elements in node records are not supported}}
+
+[Shader("node")]
+[NodeLaunch("thread")]
+void threader(ThreadNodeInputRecord<TYPE> input) {} // expected-error{{vectors of over 4 elements in node records are not supported}}
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl
new file mode 100644
index 0000000000..28b4a52158
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl
@@ -0,0 +1,27 @@
+// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=float
+// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=bool
+// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=uint64_t
+// RUN: %dxc -Tlib_6_9 -verify %s -DTYPE=double
+// RUN: %dxc -Tlib_6_9 -verify %s -enable-16bit-types -DTYPE=float16_t
+// RUN: %dxc -Tlib_6_9 -verify %s -enable-16bit-types -DTYPE=int16_t
+
+export
+vector<double, 3> doit(vector<double, 5> vec5) {
+  vec5.x = 1; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}}
+  return vec5.xyw; // expected-error {{invalid swizzle 'xyw' on vector of over 4 elements.}}
+}
+
+export
+TYPE arr_to_vec(TYPE arr[5]) {
+
+  TYPE val = (vector<TYPE, 6>(arr, 1)).x; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}}
+
+  TYPE val2 = ((vector<TYPE, 5>)arr).x; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}}
+
+  return val;
+}
+
+export TYPE lv_ctor(TYPE s) {
+  TYPE ret = (vector<TYPE,6>(1, 2, 3, 4, 5, s)).x; // expected-error {{invalid swizzle 'x' on vector of over 4 elements.}}
+  return ret;
+}
\ No newline at end of file
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl
new file mode 100644
index 0000000000..54c85191da
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl
@@ -0,0 +1,36 @@
+// RUN: %dxc -T ps_6_8 -verify %s
+
+#define TYPE float
+#define NUM 5
+
+StructuredBuffer<vector<TYPE,NUM> > sbuf; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+
+struct LongVec {
+  float4 f;
+  vector<TYPE,NUM> vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+};
+groupshared vector<TYPE, NUM> gs_vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+groupshared vector<TYPE, NUM> gs_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+
+static vector<TYPE, NUM> static_vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+static vector<TYPE, NUM> static_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+
+export vector<TYPE, NUM> lv_param_passthru( // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+                                           vector<TYPE, NUM> vec1) { // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, NUM> ret = vec1; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, NUM> arr[10]; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+  arr[1]= vec1;
+  return ret;
+}
+
+export void lv_param_in_out(in vector<TYPE, NUM> vec1, // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+                            out vector<TYPE, NUM> vec2) { // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+  vec2 = vec1;
+}
+
+export void lv_param_inout(inout vector<TYPE, NUM> vec1, // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+                           inout vector<TYPE, NUM> vec2) { // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+  vector<TYPE, NUM> tmp = vec1; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}}
+  vec1 = vec2;
+  vec2 = tmp;
+}
diff --git a/tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl
new file mode 100644
index 0000000000..c1da348695
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl
@@ -0,0 +1,116 @@
+// RUN: %dxc -T lib_6_9 -DTYPE=float -DNUM=1025 -verify %s
+// RUN: %dxc -T ps_6_9  -DTYPE=float -DNUM=1025 -verify %s
+
+// A test to verify that declarations of longvecs are permitted in all the accepted places.
+// Only tests for acceptance, most codegen is ignored for now.
+
+struct LongVec {
+  float4 f;
+  vector<TYPE,NUM> vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+};
+
+template <int N>
+struct LongVecTpl {
+  float4 f;
+  vector<TYPE,N> vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+};
+
+template <int N>
+struct LongVecTpl2 {
+  float4 f;
+  vector<TYPE,N> vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+};
+
+groupshared vector<TYPE, NUM> gs_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+groupshared vector<TYPE, NUM> gs_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+groupshared LongVecTpl<NUM> gs_vec_tpl; // expected-note{{in instantiation of template class 'LongVecTpl<1025>' requested here}}
+
+static vector<TYPE, NUM> static_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+static vector<TYPE, NUM> static_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+static LongVecTpl2<NUM> static_vec_tpl; // expected-note{{in instantiation of template class 'LongVecTpl2<1025>' requested here}}
+
+export vector<TYPE, NUM> // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+lv_param_passthru(vector<TYPE, NUM> vec1) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> ret = vec1; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  return ret;
+}
+
+export void lv_param_in_out(in vector<TYPE, NUM> vec1, // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+                            out vector<TYPE, NUM> vec2) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vec2 = vec1;
+}
+
+export void lv_param_inout(inout vector<TYPE, NUM> vec1, // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+                           inout vector<TYPE, NUM> vec2) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> tmp = vec1; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vec1 = vec2;
+  vec2 = tmp;
+}
+
+export void lv_global_assign(vector<TYPE, NUM> vec) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  static_vec = vec;
+}
+
+export vector<TYPE, NUM> lv_global_ret() { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> ret = static_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  return ret;
+}
+
+export void lv_gs_assign(vector<TYPE, NUM> vec) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  gs_vec = vec;
+}
+
+export vector<TYPE, NUM> lv_gs_ret() { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> ret = gs_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  return ret;
+}
+
+#define DIMS 10
+
+export vector<TYPE, NUM> // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+lv_param_arr_passthru(vector<TYPE, NUM> vec)[10] { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> ret[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  for (int i = 0; i < DIMS; i++)
+    ret[i] = vec;
+  return ret;
+}
+
+export void lv_global_arr_assign(vector<TYPE, NUM> vec[10]) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  for (int i = 0; i < DIMS; i++)
+    static_vec_arr[i] = vec[i];
+}
+
+export vector<TYPE, NUM> lv_global_arr_ret()[10] { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> ret[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  for (int i = 0; i < DIMS; i++)
+    ret[i] = static_vec_arr[i];
+  return ret;
+}
+
+export void lv_gs_arr_assign(vector<TYPE, NUM> vec[10]) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  for (int i = 0; i < DIMS; i++)
+    gs_vec_arr[i] = vec[i];
+}
+
+export vector<TYPE, NUM> lv_gs_arr_ret()[10] { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> ret[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  for (int i = 0; i < DIMS; i++)
+    ret[i] = gs_vec_arr[i];
+  return ret;
+}
+
+export LongVec lv_param_rec_passthru(LongVec vec) {
+  LongVec ret = vec;
+  return ret;
+}
+
+export vector<TYPE,NUM> lv_splat(TYPE scalar) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE,NUM> ret = scalar; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  return ret;
+}
+
+export vector<TYPE, NUM> lv_array_cast(TYPE arr[NUM]) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  vector<TYPE, NUM> ret = (vector<TYPE,NUM>)arr; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}}
+  return ret;
+}
+
diff --git a/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl b/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl
index 40e0452719..05ec268a0c 100644
--- a/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl
+++ b/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl
@@ -60,12 +60,9 @@ void woo() {
 }
 
 template<typename T>
-// expected-note@+1{{zero sized record defined here}}
 struct ForwardDecl; // expected-note{{template is declared here}}
 
 void woot() {
-  // Forward decl fails because forcing completion to check empty size for node object.
-  // expected-error@+1{{record used in GroupNodeInputRecords may not have zero size}}
   GroupNodeInputRecords<ForwardDecl<int> > data; // expected-error{{implicit instantiation of undefined template 'ForwardDecl<int>'}}
   foo(data);
 }
diff --git a/tools/clang/test/SemaHLSL/incomplete-type.hlsl b/tools/clang/test/SemaHLSL/incomplete-type.hlsl
index 8869b80400..b0d4f1da7f 100644
--- a/tools/clang/test/SemaHLSL/incomplete-type.hlsl
+++ b/tools/clang/test/SemaHLSL/incomplete-type.hlsl
@@ -1,17 +1,83 @@
-// RUN: %dxc -Tlib_6_3 -Wno-unused-value -verify %s
+// RUN: %dxc -Tlib_6_8 -Wno-unused-value -verify %s
 
 // Tests that the compiler is well-behaved with regard to uses of incomplete types.
 // Regression test for GitHub #2058, which crashed in this case.
 
-// expected-note@+4 {{forward declaration of 'S'}}
-// expected-note@+3 {{forward declaration of 'S'}}
-// expected-note@+2 {{forward declaration of 'S'}}
-// expected-note@+1 {{forward declaration of 'S'}}
-struct S;
+
+struct S; // expected-note 24 {{forward declaration of 'S'}}
+template <int N> struct T; // expected-note 4 {{template is declared here}}
+
 ConstantBuffer<S> CB; // expected-error {{variable has incomplete type 'S'}}
+ConstantBuffer<T<1> > TB; // expected-error {{implicit instantiation of undefined template 'T<1>'}}
+
+S s; // expected-error {{variable has incomplete type 'S'}}
+T<1> t; // expected-error {{implicit instantiation of undefined template 'T<1>'}}
+
+cbuffer BadBuffy {
+  S cb_s; // expected-error {{variable has incomplete type 'S'}}
+  T<1> cb_t; // expected-error {{implicit instantiation of undefined template 'T<1>'}}
+};
+
+tbuffer BadTuffy {
+  S tb_s; // expected-error {{variable has incomplete type 'S'}}
+  T<1> tb_t; // expected-error {{implicit instantiation of undefined template 'T<1>'}}
+};
+
 S func( // expected-error {{incomplete result type 'S' in function definition}}
   S param) // expected-error {{variable has incomplete type 'S'}}
 {
   S local; // expected-error {{variable has incomplete type 'S'}}
   return (S)0; // expected-error {{'S' is an incomplete type}}
 }
+
+[shader("geometry")]
+[maxvertexcount(3)]
+void gs_point(line S e, // expected-error {{variable has incomplete type 'S'}}
+              inout PointStream<S> OutputStream0) {} // expected-error {{variable has incomplete type 'S'}}
+
+[shader("geometry")]
+[maxvertexcount(12)]
+void gs_line(line S a, // expected-error {{variable has incomplete type 'S'}}
+             inout LineStream<S> OutputStream0) {} // expected-error {{variable has incomplete type 'S'}}
+
+
+[shader("geometry")]
+[maxvertexcount(12)]
+void gs_line(line S a, // expected-error {{variable has incomplete type 'S'}}
+             inout TriangleStream<S> OutputStream0) {} // expected-error {{variable has incomplete type 'S'}}
+
+
+[shader("domain")]
+[domain("tri")]
+void ds_main(OutputPatch<S, 3> TrianglePatch) {} // expected-error{{variable has incomplete type 'S'}}
+
+void patch_const(InputPatch<S, 3> inpatch, // expected-error{{variable has incomplete type 'S'}}
+                 OutputPatch<S, 3> outpatch) {} // expected-error{{variable has incomplete type 'S'}}
+
+[shader("hull")]
+[domain("tri")]
+[outputtopology("triangle_cw")]
+[outputcontrolpoints(32)]
+[patchconstantfunc("patch_const")]
+void hs_main(InputPatch<S, 3> TrianglePatch) {} // expected-error{{variable has incomplete type 'S'}}
+
+[Shader("node")]
+[NodeLaunch("broadcasting")]
+[NumThreads(8,1,1)]
+[NodeMaxDispatchGrid(8,1,1)]
+// expected-error@+1{{Broadcasting node shader 'broadcast' with NodeMaxDispatchGrid attribute must declare an input record containing a field with SV_DispatchGrid semantic}}
+void broadcast(DispatchNodeInputRecord<S> input,  // expected-error{{variable has incomplete type 'S'}}
+                NodeOutput<S> output) // expected-error{{variable has incomplete type 'S'}}
+{
+  ThreadNodeOutputRecords<S> touts; // expected-error{{variable has incomplete type 'S'}}
+  GroupNodeOutputRecords<S> gouts; // expected-error{{variable has incomplete type 'S'}}
+}
+
+[Shader("node")]
+[NodeLaunch("coalescing")]
+[NumThreads(8,1,1)]
+void coalesce(GroupNodeInputRecords<S> input) {} // expected-error{{variable has incomplete type 'S'}}
+
+[Shader("node")]
+[NodeLaunch("thread")]
+void threader(ThreadNodeInputRecord<S> input) {} // expected-error{{variable has incomplete type 'S'}}
diff --git a/tools/clang/test/SemaHLSL/rayquery-ast-dump-implicit.hlsl b/tools/clang/test/SemaHLSL/rayquery-ast-dump-implicit.hlsl
new file mode 100644
index 0000000000..55b4623725
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-ast-dump-implicit.hlsl
@@ -0,0 +1,14 @@
+// RUN: %dxc -T vs_6_9 -E main -ast-dump-implicit %s | FileCheck %s
+
+float main(RayDesc rayDesc : RAYDESC) : OUT {  
+  return 0;
+}
+
+// CHECK: VarDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit RAY_FLAG_FORCE_OMM_2_STATE 'const unsigned int' static cinit
+// CHECK: IntegerLiteral 0x{{.+}} <<invalid sloc>> 'const unsigned int' 1024
+// CHECK: AvailabilityAttr 0x{{.+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
+// CHECK: VarDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS 'const unsigned int' static cinit
+// CHECK: IntegerLiteral 0x{{.+}} <<invalid sloc>> 'const unsigned int' 1
+// CHECK: AvailabilityAttr 0x{{.+}} <<invalid sloc>> Implicit  6.9 0 0 ""
+
diff --git a/tools/clang/test/SemaHLSL/rayquery-ast-dump.hlsl b/tools/clang/test/SemaHLSL/rayquery-ast-dump.hlsl
new file mode 100644
index 0000000000..2ec79a060f
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-ast-dump.hlsl
@@ -0,0 +1,26 @@
+// RUN: %dxc -T vs_6_9 -E main -ast-dump %s | FileCheck %s
+
+RaytracingAccelerationStructure RTAS;
+
+
+float main(RayDesc rayDesc : RAYDESC) : OUT {
+  RayQuery<0, RAYQUERY_FLAG_NONE> rayQuery1;
+  RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS> rayQuery2;
+  rayQuery1.TraceRayInline(RTAS, 1, 2, rayDesc);
+  rayQuery2.TraceRayInline(RTAS, RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc);
+  return 0;
+}
+
+// CHECK: -DeclStmt 0x{{.+}}
+// CHECK-NEXT: `-VarDecl 0x{{.+}} used rayQuery1 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>' callinit
+// CHECK-NEXT:  `-CXXConstructExpr 0x{{.+}} 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>' 'void ()'
+// CHECK-NEXT: -DeclStmt 0x{{.+}} 
+// CHECK-NEXT: `-VarDecl 0x{{.+}} used rayQuery2 'RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS>':'RayQuery<1024, 1>' callinit
+// CHECK-NEXT:  `-CXXConstructExpr 0x{{.+}} 'RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS>':'RayQuery<1024, 1>' 'void ()'
+// CHECK-NEXT: -CXXMemberCallExpr 0x{{.+}} 'void'
+// CHECK-NEXT: -MemberExpr 0x{{.+}} '<bound member function type>' .TraceRayInline
+// CHECK-NEXT:  `-DeclRefExpr 0x{{.+}} 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>' lvalue Var 0x{{.+}} 'rayQuery1' 'RayQuery<0, RAYQUERY_FLAG_NONE>':'RayQuery<0, 0>'
+
+// CHECK: -CXXMemberCallExpr 0x{{.+}} 'void'
+// CHECK-NEXT: -MemberExpr 0x{{.+}} '<bound member function type>' .TraceRayInline
+// CHECK-NEXT: `-DeclRefExpr 0x{{.+}} 'RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS>':'RayQuery<1024, 1>' lvalue Var 0x{{.+}} 'rayQuery2' 'RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS>':'RayQuery<1024, 1>'
diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl
new file mode 100644
index 0000000000..722187cf43
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-omm-DXR-entry-point.hlsl
@@ -0,0 +1,17 @@
+// RUN: %dxc -T lib_6_3 -validator-version 1.8 -verify %s
+
+// expected-warning@+1{{potential misuse of built-in constant 'RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_3; introduced in shader model 6.9}}
+RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS };
+
+RaytracingAccelerationStructure RTAS;
+// DXR entry to test that restricted flags are diagnosed.
+[shader("raygeneration")]
+void main(void) {
+	RayDesc rayDesc;
+
+	// expected-warning@+2{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model lib_6_3; introduced in shader model 6.9}}
+	// expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_3; introduced in shader model 6.9}}
+	RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS> rayQuery;
+	// expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model lib_6_3; introduced in shader model 6.9}}
+	rayQuery.TraceRayInline(RTAS, RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc);
+}
diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-export-sm65.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-export-sm65.hlsl
new file mode 100644
index 0000000000..3e2031e0a7
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-export-sm65.hlsl
@@ -0,0 +1,11 @@
+// RUN: %dxc -T lib_6_5 -verify %s
+
+// expect no diagnostics here, since global variables
+// are not picked up through the recursive AST visitor's
+// traversal of the exported function.
+int x = RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS;
+
+export float4 MyExportedFunction(float4 color) {
+    // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_5; introduced in shader model 6.9}}
+    return color * RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS;
+}
diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65-warnings.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65-warnings.hlsl
new file mode 100644
index 0000000000..476c1a503e
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65-warnings.hlsl
@@ -0,0 +1,11 @@
+// RUN: %dxc -Wno-error-hlsl-rayquery-flags -Wno-error-hlsl-availability -T vs_6_5 -E main -verify %s
+
+RaytracingAccelerationStructure RTAS;
+void main(uint i : IDX, RayDesc rayDesc : RAYDESC) {
+
+  // expected-warning@+3{{A non-zero value for the RayQueryFlags template argument requires shader model 6.9 or above.}}
+  // expected-warning@+2{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}}
+  // expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}}
+  RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, 2> rayQuery0a;
+  
+}
diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl
new file mode 100644
index 0000000000..6904f58c7d
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-TU-sm65.hlsl
@@ -0,0 +1,46 @@
+// RUN: %dxc -T vs_6_5 -E main -verify %s
+
+// tests that RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS usage will emit
+// one warning for each incompatible availability attribute decl,
+// when the compilation target is less than shader model 6.9.
+
+namespace MyNamespace {
+  // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}}
+  static const int badVar = RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS;
+}
+
+// expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}}
+groupshared const int otherBadVar = RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS;
+
+int retNum(){
+  // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}}
+  return RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS;
+}
+
+int retNumUncalled(){
+  // no diagnostic expected here
+  return RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS;
+}
+
+RaytracingAccelerationStructure RTAS;
+void main(uint i : IDX, RayDesc rayDesc : RAYDESC) {
+
+  int x = MyNamespace::badVar + otherBadVar + retNum();
+  RayQuery<0> rayQuery0a;
+
+  if (x > 4){
+    rayQuery0a.TraceRayInline(RTAS, 8, 2, rayDesc);
+  }
+  else{
+    rayQuery0a.TraceRayInline(RTAS, 16, 2, rayDesc);
+  }
+
+  // expected-error@+2{{A non-zero value for the RayQueryFlags template argument requires shader model 6.9 or above.}}
+  // expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}}
+  RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, 1> rayQuery0b;
+
+  // expected-warning@+2{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}}
+  // expected-warning@+1{{potential misuse of built-in constant 'RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model vs_6_5; introduced in shader model 6.9}}
+  RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS> rayQuery0d;
+
+}
diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-diag-sm65.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-diag-sm65.hlsl
new file mode 100644
index 0000000000..d31d9bf289
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-omm-diag-sm65.hlsl
@@ -0,0 +1,22 @@
+// RUN: %dxc -T vs_6_5 -E main -verify %s
+
+// Test that at the call site of any TraceRayInline call, a default error
+// warning is emitted that indicates the ray query object has the
+// RAY_FLAG_FORCE_OMM_2_STATE set, but doesn't have 
+// RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set
+
+RaytracingAccelerationStructure RTAS;
+void main(RayDesc rayDesc : RAYDESC) : OUT {
+  // expected-note@+1 2 {{RayQueryFlags declared here}}
+  RayQuery<0> rayQuery; // implicitly, the second arg is 0.
+
+  // expected-error@+2{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}}
+  // expected-warning@+1{{potential misuse of built-in constant 'RAY_FLAG_FORCE_OMM_2_STATE' in shader model vs_6_5; introduced in shader model 6.9}}
+  rayQuery.TraceRayInline(RTAS, RAY_FLAG_FORCE_OMM_2_STATE, 2, rayDesc);
+  
+  // expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}}
+  rayQuery.TraceRayInline(RTAS, 1024, 2, rayDesc);
+
+  // expected-error@+1{{A non-zero value for the RayQueryFlags template argument requires shader model 6.9 or above.}}
+  RayQuery<0, 1> rayQueryInvalid;
+}
diff --git a/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl
new file mode 100644
index 0000000000..5e484d193e
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/rayquery-omm-type-diag.hlsl
@@ -0,0 +1,8 @@
+// RUN: %dxc -T vs_6_9 -verify %s
+// RUN: %dxc -T vs_6_5 -verify %s
+
+// validate 2nd template argument flags
+// expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}}
+typedef RayQuery<RAY_FLAG_FORCE_OMM_2_STATE> BadRayQuery;
+// expected-error@+1{{When using 'RAY_FLAG_FORCE_OMM_2_STATE' in RayFlags, RayQueryFlags must have RAYQUERY_FLAG_ALLOW_OPACITY_MICROMAPS set.}}
+typedef RayQuery<RAY_FLAG_FORCE_OMM_2_STATE, 0> BadRayQuery2;
diff --git a/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl
new file mode 100644
index 0000000000..272a46a87e
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-no-errors.hlsl
@@ -0,0 +1,12 @@
+// RUN: %dxc -T ps_6_0 -verify %s
+
+// expected-no-diagnostics
+// No diagnostic is expected because this is a non-library target,
+// and SubObjects are ignored on non-library targets.
+
+RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS };
+
+[shader("pixel")]
+int main(int i : INDEX) : SV_Target {
+  return 1;
+}
diff --git a/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-warnings.hlsl b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-warnings.hlsl
new file mode 100644
index 0000000000..c220f5734d
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/raytracingpipelineconfig1-warnings.hlsl
@@ -0,0 +1,6 @@
+// RUN: %dxc -T lib_6_8 -verify %s
+
+// expected-warning@+1{{potential misuse of built-in constant 'RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS' in shader model lib_6_8; introduced in shader model 6.9}} 
+RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS };
+
+
diff --git a/tools/clang/test/SemaHLSL/reordercoherent-globallycoherent-mismatch.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-globallycoherent-mismatch.hlsl
new file mode 100644
index 0000000000..0192154b78
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/reordercoherent-globallycoherent-mismatch.hlsl
@@ -0,0 +1,96 @@
+// RUN: %dxc -Tlib_6_9 -verify %s
+
+RWByteAddressBuffer NonCBuf;
+globallycoherent RWByteAddressBuffer GCBuf;
+reordercoherent RWByteAddressBuffer RCBuf;
+// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}}
+// expected-warning@+1{{attribute 'reordercoherent' implied by 'globallycoherent' in 'RCGCBuf'. 'reordercoherent' ignored.}}
+reordercoherent globallycoherent RWByteAddressBuffer RCGCBuf;
+
+globallycoherent RWByteAddressBuffer getPromoteRC() {
+  return RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}}
+}
+
+reordercoherent RWByteAddressBuffer getDemoteGC() {
+  return GCBuf; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}}
+}
+
+globallycoherent RWByteAddressBuffer GCBufArr[2];
+reordercoherent RWByteAddressBuffer RCBufArr[2];
+
+reordercoherent RWByteAddressBuffer RCBufMultiArr[2][2];
+globallycoherent RWByteAddressBuffer GCBufMultiArr[2][2];
+
+globallycoherent RWByteAddressBuffer getPromoteRCArr() {
+  return RCBufArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}}
+}
+
+reordercoherent RWByteAddressBuffer getDemoteGCArr() {
+  return GCBufArr[0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}}
+}
+
+globallycoherent RWByteAddressBuffer getPromoteRCMultiArr() {
+  return RCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}}
+}
+
+reordercoherent RWByteAddressBuffer getDemoteGCMultiArr() {
+  return GCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}}
+}
+
+void NonGCStore(RWByteAddressBuffer Buf) {
+  Buf.Store(0, 0);
+}
+
+void RCStore(reordercoherent RWByteAddressBuffer Buf) {
+  Buf.Store(0, 0);
+}
+
+void GCStore(globallycoherent RWByteAddressBuffer Buf) {
+  Buf.Store(0, 0);
+}
+
+void getPromoteToGCParam(inout globallycoherent RWByteAddressBuffer PGCBuf) {
+  PGCBuf = RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer __restrict' promotes reordercoherent to globallycoherent annotation}}
+}
+void getDemoteToRCParam(inout reordercoherent RWByteAddressBuffer PRCBuf) {
+  PRCBuf = GCBuf; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer __restrict' demotes globallycoherent to reordercoherent annotation}}
+}
+
+static reordercoherent RWByteAddressBuffer SRCDemoteBufArr[2] = GCBufArr; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' demotes globallycoherent to reordercoherent annotation}}
+static reordercoherent RWByteAddressBuffer SRCDemoteBufMultiArr0[2] = GCBufMultiArr[0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' demotes globallycoherent to reordercoherent annotation}}
+static reordercoherent RWByteAddressBuffer SRCDemoteBufMultiArr1[2][2] = GCBufMultiArr; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2][2]' to 'reordercoherent RWByteAddressBuffer [2][2]' demotes globallycoherent to reordercoherent annotation}}
+
+static globallycoherent RWByteAddressBuffer SRCPromoteBufArr[2] = RCBufArr; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2]' to 'globallycoherent RWByteAddressBuffer [2]' promotes reordercoherent to globallycoherent annotation}}
+static globallycoherent RWByteAddressBuffer SRCPromoteBufMultiArr0[2] = RCBufMultiArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2]' to 'globallycoherent RWByteAddressBuffer [2]' promotes reordercoherent to globallycoherent annotation}}
+static globallycoherent RWByteAddressBuffer SRCPromoteBufMultiArr1[2][2] = RCBufMultiArr; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2][2]' to 'globallycoherent RWByteAddressBuffer [2][2]' promotes reordercoherent to globallycoherent annotation}}
+
+void getPromoteToGCParamArr(inout globallycoherent RWByteAddressBuffer PGCBufArr[2]) {
+  PGCBufArr = RCBufArr; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer [2]' to 'globallycoherent RWByteAddressBuffer __restrict[2]' promotes reordercoherent to globallycoherent annotation}}
+}
+void getDemoteToRCParamArr(inout reordercoherent RWByteAddressBuffer PRCBufArr[2]) {
+  PRCBufArr = GCBufArr; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer __restrict[2]' demotes globallycoherent to reordercoherent annotation}}
+}
+
+globallycoherent RWByteAddressBuffer getGCBuf() {
+  return GCBuf;
+}
+
+reordercoherent RWByteAddressBuffer getRCBuf() {
+  return RCBuf;
+}
+
+[shader("raygeneration")]
+void main()
+{
+  GCStore(RCBuf); // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}}
+  RCStore(GCBuf); // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}}
+
+  reordercoherent RWByteAddressBuffer RCCopyGC = GCBuf; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}}
+  globallycoherent RWByteAddressBuffer GCCopyRC = RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}}
+
+  reordercoherent RWByteAddressBuffer RCCopyGCReturn = getGCBuf(); // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}}
+  globallycoherent RWByteAddressBuffer GCCopyRCReturn = getRCBuf(); // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}}
+
+  reordercoherent RWByteAddressBuffer RCCopyGC0 = GCBufArr[0]; // expected-warning{{implicit conversion from 'globallycoherent RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' demotes globallycoherent to reordercoherent annotation}}
+  globallycoherent RWByteAddressBuffer GCCopyRC0 = RCBufArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'globallycoherent RWByteAddressBuffer' promotes reordercoherent to globallycoherent annotation}}
+}
diff --git a/tools/clang/test/SemaHLSL/reordercoherent-implied.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-implied.hlsl
new file mode 100644
index 0000000000..130b0efee7
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/reordercoherent-implied.hlsl
@@ -0,0 +1,41 @@
+// RUN: %dxc -E main -T lib_6_9 -verify %s
+// REQUIRES: dxil-1-9
+
+using Ty = RWTexture1D<float4>;
+
+using GTy = globallycoherent Ty;
+using RTy = reordercoherent Ty;
+
+// expected-warning@+1{{attribute 'globallycoherent' is already applied}}
+using GGTy = globallycoherent GTy;
+// expected-warning@+1{{attribute 'reordercoherent' is already applied}}
+using RRTy = reordercoherent RTy;
+
+// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}}
+using GRTy = globallycoherent RTy;
+// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}}
+using RGTy = reordercoherent GTy;
+
+// expected-warning@+1{{attribute 'globallycoherent' is already applied}}
+using GGRTy = globallycoherent GRTy;
+// expected-warning@+1{{attribute 'reordercoherent' is already applied}}
+using RRGTy = reordercoherent RGTy;
+
+// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}}
+using GRTy2 = globallycoherent reordercoherent Ty;
+// expected-warning@+1{{attribute 'globallycoherent' implies 'reordercoherent'}}
+using RGTy2 = reordercoherent globallycoherent Ty;
+
+// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}}
+// expected-warning@+1{{attribute 'globallycoherent' is already applied}}
+using GGRTy2 = globallycoherent globallycoherent reordercoherent Ty;
+// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}}
+// expected-warning@+1{{attribute 'globallycoherent' is already applied}}
+using GRGTy2 = globallycoherent reordercoherent globallycoherent Ty;
+
+// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}}
+// expected-warning@+1{{attribute 'reordercoherent' is already applied}}
+using RGRTy2 = reordercoherent globallycoherent reordercoherent Ty;
+// expected-warning@+2{{attribute 'globallycoherent' implies 'reordercoherent'}}
+// expected-warning@+1{{attribute 'reordercoherent' is already applied}}
+using RRGTy2 = reordercoherent reordercoherent globallycoherent Ty;
diff --git a/tools/clang/test/SemaHLSL/reordercoherent-mismatch.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-mismatch.hlsl
new file mode 100644
index 0000000000..447e496c6e
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/reordercoherent-mismatch.hlsl
@@ -0,0 +1,101 @@
+// RUN: %dxc -Tlib_6_9 -verify %s
+
+RWByteAddressBuffer NonRCBuf;
+reordercoherent RWByteAddressBuffer RCBuf;
+
+RWByteAddressBuffer NonRCBufArr[2];
+reordercoherent RWByteAddressBuffer RCBufArr[2];
+
+RWByteAddressBuffer NonRCBufMultiArr[2][2];
+reordercoherent RWByteAddressBuffer RCBufMultiArr[2][2];
+
+RWByteAddressBuffer getNonRCBuf() {
+  return NonRCBuf;
+}
+
+reordercoherent RWByteAddressBuffer getRCBuf() {
+  return RCBuf;
+}
+
+RWByteAddressBuffer getNonRCBufArr() {
+  return NonRCBufArr[0];
+}
+
+reordercoherent RWByteAddressBuffer getRCBufArr() {
+  return RCBufArr[0];
+}
+
+RWByteAddressBuffer getNonRCBufMultiArr() {
+  return NonRCBufMultiArr[0][0];
+}
+
+reordercoherent RWByteAddressBuffer getRCBufMultiArr() {
+  return RCBufMultiArr[0][0];
+}
+
+RWByteAddressBuffer getNonGCRCBuf() {
+  return RCBuf; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}}
+}
+
+reordercoherent RWByteAddressBuffer getGCNonRCBuf() {
+  return NonRCBuf; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}}
+}
+
+RWByteAddressBuffer getNonGCRCBufArr() {
+  return RCBufArr[0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}}
+}
+
+reordercoherent RWByteAddressBuffer getGCNonRCBufArr() {
+  return NonRCBufArr[0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}}
+}
+
+RWByteAddressBuffer getNonGCRCBufMultiArr() {
+  return RCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}}
+}
+
+reordercoherent RWByteAddressBuffer getGCNonRCBufMultiArr() {
+  return NonRCBufMultiArr[0][0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}}
+}
+
+void NonGCStore(RWByteAddressBuffer Buf) {
+  Buf.Store(0, 0);
+}
+
+void GCStore(reordercoherent RWByteAddressBuffer Buf) {
+  Buf.Store(0, 0);
+}
+
+void getNonRCBufPAram(inout reordercoherent RWByteAddressBuffer PRCBuf) {
+  PRCBuf = NonRCBuf; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer __restrict' adds reordercoherent annotation}}
+}
+
+static reordercoherent RWByteAddressBuffer SRCBufArr[2] = NonRCBufArr;               // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' adds reordercoherent annotation}}
+static reordercoherent RWByteAddressBuffer SRCBufMultiArr0[2] = NonRCBufMultiArr[0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer [2]' adds reordercoherent annotation}}
+static reordercoherent RWByteAddressBuffer SRCBufMultiArr1[2][2] = NonRCBufMultiArr; // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2][2]' to 'reordercoherent RWByteAddressBuffer [2][2]' adds reordercoherent annotation}}
+
+void getNonRCBufArrParam(inout reordercoherent RWByteAddressBuffer PRCBufArr[2]) {
+  PRCBufArr = NonRCBufArr; // expected-warning{{implicit conversion from 'RWByteAddressBuffer [2]' to 'reordercoherent RWByteAddressBuffer __restrict[2]' adds reordercoherent annotation}}
+}
+
+[shader("raygeneration")] void main() {
+  NonGCStore(NonRCBuf); // No diagnostic
+  GCStore(NonRCBuf);    // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}}
+  NonGCStore(RCBuf);    // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}}
+  GCStore(RCBuf);       // No diagnostic
+
+  RWByteAddressBuffer NonGCCopyNonGC = NonRCBuf; // No diagnostic
+  RWByteAddressBuffer NonGCCopyGC = RCBuf;       // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}}
+
+  reordercoherent RWByteAddressBuffer GCCopyNonGC = NonRCBuf; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}}
+  reordercoherent RWByteAddressBuffer GCCopyGC = RCBuf;       // No diagnostic
+
+  reordercoherent RWByteAddressBuffer GCCopyNonGCReturn = getNonRCBuf(); // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}}
+
+  RWByteAddressBuffer NonGCCopyGCReturn = getRCBuf(); // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}}
+
+  RWByteAddressBuffer NonGCCopyNonGC0 = NonRCBufArr[0]; // No diagnostic
+  RWByteAddressBuffer NonGCCopyGC0 = RCBufArr[0];       // expected-warning{{implicit conversion from 'reordercoherent RWByteAddressBuffer' to 'RWByteAddressBuffer' loses reordercoherent annotation}}
+
+  reordercoherent RWByteAddressBuffer GCCopyNonGC0 = NonRCBufArr[0]; // expected-warning{{implicit conversion from 'RWByteAddressBuffer' to 'reordercoherent RWByteAddressBuffer' adds reordercoherent annotation}}
+  reordercoherent RWByteAddressBuffer GCCopyGC0 = RCBufArr[0];       // No diagnostic
+}
diff --git a/tools/clang/test/SemaHLSL/reordercoherent-type-errors.hlsl b/tools/clang/test/SemaHLSL/reordercoherent-type-errors.hlsl
new file mode 100644
index 0000000000..57fd33fb13
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/reordercoherent-type-errors.hlsl
@@ -0,0 +1,26 @@
+// RUN: %dxc -Tlib_6_9 -verify %s
+
+reordercoherent RWTexture1D<float4> uav1 : register(u3);
+
+// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'Buffer<vector<float, 4> >'}}
+// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}}
+reordercoherent Buffer<float4> srv;
+
+// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'float'}}
+// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}}
+reordercoherent float m;
+
+reordercoherent RWTexture2D<float> tex[12];
+reordercoherent RWTexture2D<float> texMD[12][12];
+
+// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'float'}}
+// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}}
+reordercoherent float One() {
+  return 1.0;
+}
+
+struct Record { uint index; };
+
+// expected-error@+2 {{'reordercoherent' is not a valid modifier for a declaration of type 'RWDispatchNodeInputRecord<Record>'}}
+// expected-note@+1 {{'reordercoherent' can only be applied to UAV objects}}
+void func2(reordercoherent RWDispatchNodeInputRecord<Record> funcInputData) {}
diff --git a/tools/clang/test/SemaHLSL/subobjects-ast-dump.hlsl b/tools/clang/test/SemaHLSL/subobjects-ast-dump.hlsl
new file mode 100644
index 0000000000..6133847fb8
--- /dev/null
+++ b/tools/clang/test/SemaHLSL/subobjects-ast-dump.hlsl
@@ -0,0 +1,136 @@
+// RUN: %dxc -T lib_6_9  -ast-dump-implicit %s | FileCheck -check-prefix=ASTIMPL %s
+// RUN: %dxc -T lib_6_9  -ast-dump %s | FileCheck -check-prefix=AST %s
+// The HLSL source is just a copy of 
+// tools\clang\test\HLSLFileCheck\shader_targets\raytracing\subobjects_raytracingPipelineConfig1.hlsl
+
+// This test tests that the HLSLSubObjectAttr attribute is present on all
+// HLSL subobjects, and tests the ast representation of subobjects
+
+// ASTIMPL: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct StateObjectConfig definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 0 2
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit Flags 'unsigned int'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct GlobalRootSignature definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 1 2
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit Data 'string'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct LocalRootSignature definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 2 2
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit Data 'string'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct SubobjectToExportsAssociation definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 8 2
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit Subobject 'string'
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit Exports 'string'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct RaytracingShaderConfig definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 9 2
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit MaxPayloadSizeInBytes 'unsigned int'
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit MaxAttributeSizeInBytes 'unsigned int'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit struct RaytracingPipelineConfig definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 10 2
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit MaxTraceRecursionDepth 'unsigned int'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct TriangleHitGroup definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 11 0
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit AnyHit 'string'
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit ClosestHit 'string'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct ProceduralPrimitiveHitGroup definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 11 1
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit AnyHit 'string'
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit ClosestHit 'string'
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit Intersection 'string'
+// ASTIMPL-NEXT: CXXRecordDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit referenced struct RaytracingPipelineConfig1 definition
+// ASTIMPL-NEXT: HLSLSubObjectAttr 0x{{.+}} <<invalid sloc>> Implicit 12 2
+// ASTIMPL-NEXT: FinalAttr 0x{{.+}} <<invalid sloc>> Implicit final
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit MaxTraceRecursionDepth 'unsigned int'
+// ASTIMPL-NEXT: FieldDecl 0x{{.+}} <<invalid sloc>> <invalid sloc> implicit Flags 'unsigned int'
+
+// AST: VarDecl 0x{{.+}} grs 'GlobalRootSignature' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'GlobalRootSignature'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "CBV(b0)"
+// AST-NEXT: VarDecl 0x{{.+}} soc 'StateObjectConfig' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'StateObjectConfig'
+// AST-NEXT: BinaryOperator 0x{{.+}} 'unsigned int' '|'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <LValueToRValue>
+// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'STATE_OBJECT_FLAGS_ALLOW_LOCAL_DEPENDENCIES_ON_EXTERNAL_DEFINITONS' 'const unsigned int'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <LValueToRValue>
+// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS' 'const unsigned int'
+// AST-NEXT: VarDecl 0x{{.+}} lrs 'LocalRootSignature' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'LocalRootSignature'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "UAV(u0, visibility = SHADER_VISIBILITY_GEOMETRY), RootFlags(LOCAL_ROOT_SIGNATURE)"
+// AST-NEXT: VarDecl 0x{{.+}} sea 'SubobjectToExportsAssociation' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "grs"
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "a;b;foo;c"
+// AST-NEXT: VarDecl 0x{{.+}} sea2 'SubobjectToExportsAssociation' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "grs"
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue ";"
+// AST-NEXT: VarDecl 0x{{.+}} sea3 'SubobjectToExportsAssociation' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "grs"
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue ""
+// AST-NEXT: VarDecl 0x{{.+}} rsc 'RaytracingShaderConfig' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingShaderConfig'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <IntegralCast>
+// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 128
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <IntegralCast>
+// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 64
+// AST-NEXT: VarDecl 0x{{.+}} rpc 'RaytracingPipelineConfig1' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingPipelineConfig1'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <IntegralCast>
+// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 32
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <LValueToRValue>
+// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'RAYTRACING_PIPELINE_FLAG_SKIP_TRIANGLES' 'const unsigned int'
+// AST-NEXT: VarDecl 0x{{.+}} sea4 'SubobjectToExportsAssociation' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'SubobjectToExportsAssociation'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "rpc"
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue ";"
+// AST-NEXT: VarDecl 0x{{.+}} rpc2 'RaytracingPipelineConfig1' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'RaytracingPipelineConfig1'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <IntegralCast>
+// AST-NEXT: IntegerLiteral 0x{{.+}} 'literal int' 32
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'unsigned int' <LValueToRValue>
+// AST-NEXT: DeclRefExpr 0x{{.+}} 'const unsigned int' lvalue Var 0x{{.+}} 'RAYTRACING_PIPELINE_FLAG_NONE' 'const unsigned int'
+// AST-NEXT: VarDecl 0x{{.+}} trHitGt 'TriangleHitGroup' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'TriangleHitGroup'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "a"
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "b"
+// AST-NEXT: VarDecl 0x{{.+}} ppHitGt 'ProceduralPrimitiveHitGroup' static cinit
+// AST-NEXT: InitListExpr 0x{{.+}} 'ProceduralPrimitiveHitGroup'
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "a"
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "b"
+// AST-NEXT: ImplicitCastExpr 0x{{.+}} 'const string' <ArrayToPointerDecay>
+// AST-NEXT: StringLiteral 0x{{.+}} 'literal string' lvalue "c"
+
+GlobalRootSignature grs = {"CBV(b0)"};	
+StateObjectConfig soc = { STATE_OBJECT_FLAGS_ALLOW_LOCAL_DEPENDENCIES_ON_EXTERNAL_DEFINITONS | STATE_OBJECT_FLAG_ALLOW_STATE_OBJECT_ADDITIONS };
+LocalRootSignature lrs = {"UAV(u0, visibility = SHADER_VISIBILITY_GEOMETRY), RootFlags(LOCAL_ROOT_SIGNATURE)"};
+SubobjectToExportsAssociation sea = { "grs", "a;b;foo;c" };
+// Empty association is well-defined: it creates a default association
+SubobjectToExportsAssociation sea2 = { "grs", ";" };
+SubobjectToExportsAssociation sea3 = { "grs", "" };
+RaytracingShaderConfig rsc = { 128, 64 };
+RaytracingPipelineConfig1 rpc = { 32, RAYTRACING_PIPELINE_FLAG_SKIP_TRIANGLES };
+SubobjectToExportsAssociation sea4 = {"rpc", ";"};
+RaytracingPipelineConfig1 rpc2 = {32, RAYTRACING_PIPELINE_FLAG_NONE };
+TriangleHitGroup trHitGt = {"a", "b"};
+ProceduralPrimitiveHitGroup ppHitGt = { "a", "b", "c"};
diff --git a/tools/clang/test/lit.cfg b/tools/clang/test/lit.cfg
index 5fc5d4a27c..a3a352071c 100644
--- a/tools/clang/test/lit.cfg
+++ b/tools/clang/test/lit.cfg
@@ -504,6 +504,9 @@ if config.enable_backtrace == "1":
 if config.spirv:
     config.available_features.add("spirv")
 
+if config.metal:
+    config.available_features.add("metal")
+
 # Check supported dxil version
 def get_dxil_version():
     result = subprocess.run([lit.util.which('dxc', llvm_tools_dir), "--version"], stdout=subprocess.PIPE)
diff --git a/tools/clang/test/lit.site.cfg.in b/tools/clang/test/lit.site.cfg.in
index 207450add5..80dcadf288 100644
--- a/tools/clang/test/lit.site.cfg.in
+++ b/tools/clang/test/lit.site.cfg.in
@@ -22,6 +22,7 @@ config.enable_backtrace = "@ENABLE_BACKTRACES@"
 config.host_arch = "@HOST_ARCH@"
 config.spirv = "@ENABLE_SPIRV_CODEGEN@" =="ON"
 config.hlsl_headers_dir = "@HLSL_HEADERS_DIR@" # HLSL change
+config.metal = "@ENABLE_METAL_CODEGEN@".upper() == "ON" # HLSL change
 
 # Support substitution of the tools and libs dirs with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
diff --git a/tools/clang/tools/dxcompiler/CMakeLists.txt b/tools/clang/tools/dxcompiler/CMakeLists.txt
index 004d2e5ad1..c69e276194 100644
--- a/tools/clang/tools/dxcompiler/CMakeLists.txt
+++ b/tools/clang/tools/dxcompiler/CMakeLists.txt
@@ -136,6 +136,14 @@ target_link_libraries(dxcompiler PRIVATE ${LIBRARIES})
 if (ENABLE_SPIRV_CODEGEN)
   target_link_libraries(dxcompiler PRIVATE clangSPIRV)
 endif (ENABLE_SPIRV_CODEGEN)
+if (ENABLE_METAL_CODEGEN)
+  target_link_libraries(dxcompiler PRIVATE ${METAL_IRCONVERTER_LIB})
+  target_include_directories(dxcompiler PRIVATE ${METAL_IRCONVERTER_INCLUDE_DIR})
+
+  get_filename_component(METAL_IRCONVERTER_LIB_DIR ${METAL_IRCONVERTER_LIB} DIRECTORY CACHE)
+  set_property(TARGET dxcompiler APPEND_STRING
+               PROPERTY LINK_FLAGS " -Wl,-rpath,${METAL_IRCONVERTER_LIB_DIR}")
+endif (ENABLE_METAL_CODEGEN)
 include_directories(AFTER ${LLVM_INCLUDE_DIR}/dxc/Tracing ${DIASDK_INCLUDE_DIRS} ${HLSL_VERSION_LOCATION})
 include_directories(${LLVM_SOURCE_DIR}/tools/clang/tools/dxcvalidator)
 
diff --git a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp
index 01f4973fbe..16d8b1dadd 100644
--- a/tools/clang/tools/dxcompiler/dxcdisassembler.cpp
+++ b/tools/clang/tools/dxcompiler/dxcdisassembler.cpp
@@ -671,6 +671,8 @@ static const char *FlagToString(DXIL::RaytracingPipelineFlags Flag) {
     return "RAYTRACING_PIPELINE_FLAG_SKIP_TRIANGLES";
   case DXIL::RaytracingPipelineFlags::SkipProceduralPrimitives:
     return "RAYTRACING_PIPELINE_FLAG_SKIP_PROCEDURAL_PRIMITIVES";
+  case DXIL::RaytracingPipelineFlags::AllowOpacityMicromaps:
+    return "RAYTRACING_PIPELINE_FLAG_ALLOW_OPACITY_MICROMAPS";
   }
   return "<invalid RaytracingPipelineFlags>";
 }
@@ -1218,6 +1220,7 @@ void PrintResourceProperties(DxilResourceProperties &RP,
   bool bUAV = RP.isUAV();
   LPCSTR RW = bUAV ? (RP.Basic.IsROV ? "ROV" : "RW") : "";
   LPCSTR GC = bUAV && RP.Basic.IsGloballyCoherent ? "globallycoherent " : "";
+  LPCSTR RC = bUAV && RP.Basic.IsReorderCoherent ? "reordercoherent " : "";
   LPCSTR COUNTER = bUAV && RP.Basic.SamplerCmpOrHasCounter ? ", counter" : "";
 
   switch (RP.getResourceKind()) {
@@ -1231,7 +1234,7 @@ void PrintResourceProperties(DxilResourceProperties &RP,
   case DXIL::ResourceKind::TypedBuffer:
   case DXIL::ResourceKind::Texture2DMS:
   case DXIL::ResourceKind::Texture2DMSArray:
-    OS << GC << RW << ResourceKindToString(RP.getResourceKind());
+    OS << GC << RC << RW << ResourceKindToString(RP.getResourceKind());
     OS << "<";
     if (RP.Typed.CompCount > 1)
       OS << std::to_string(RP.Typed.CompCount) << "x";
@@ -1239,11 +1242,11 @@ void PrintResourceProperties(DxilResourceProperties &RP,
     break;
 
   case DXIL::ResourceKind::RawBuffer:
-    OS << GC << RW << ResourceKindToString(RP.getResourceKind());
+    OS << GC << RC << RW << ResourceKindToString(RP.getResourceKind());
     break;
 
   case DXIL::ResourceKind::StructuredBuffer:
-    OS << GC << RW << ResourceKindToString(RP.getResourceKind());
+    OS << GC << RC << RW << ResourceKindToString(RP.getResourceKind());
     OS << "<stride=" << RP.StructStrideInBytes << COUNTER << ">";
     break;
 
diff --git a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
index c1c844d4be..ebeee380ef 100644
--- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
+++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
@@ -71,6 +71,10 @@
 #include "clang/Basic/Version.h"
 #endif // SUPPORT_QUERY_GIT_COMMIT_INFO
 
+#ifdef ENABLE_METAL_CODEGEN
+#include "metal_irconverter.h"
+#endif
+
 #define CP_UTF16 1200
 
 using namespace llvm;
@@ -718,6 +722,7 @@ class DxcCompiler : public IDxcCompiler3,
       bool validateRootSigContainer = false;
 
       if (isPreprocessing) {
+        TimeTraceScope TimeScope("PreprocessAction", StringRef(""));
         // These settings are back-compatible with fxc.
         clang::PreprocessorOutputOptions &PPOutOpts =
             compiler.getPreprocessorOutputOpts();
@@ -817,6 +822,10 @@ class DxcCompiler : public IDxcCompiler3,
         }
         compiler.getLangOpts().IsHLSLLibrary = opts.IsLibraryProfile();
 
+        if (compiler.getLangOpts().IsHLSLLibrary && opts.GenMetal)
+          return ErrorWithString("Shader libraries unsupported in Metal (yet)",
+                                 riid, ppResult);
+
         // Clear entry function if library target
         if (compiler.getLangOpts().IsHLSLLibrary)
           compiler.getLangOpts().HLSLEntryFunction =
@@ -859,6 +868,7 @@ class DxcCompiler : public IDxcCompiler3,
       compiler.getTarget().adjust(compiler.getLangOpts());
 
       if (opts.AstDump) {
+        TimeTraceScope TimeScope("DumpAST", StringRef(""));
         clang::ASTDumpAction dumpAction;
         // Consider - ASTDumpFilter, ASTDumpLookups
         compiler.getFrontendOpts().ASTDumpDecls = true;
@@ -868,6 +878,7 @@ class DxcCompiler : public IDxcCompiler3,
         dumpAction.EndSourceFile();
         outStream.flush();
       } else if (opts.DumpDependencies) {
+        TimeTraceScope TimeScope("DumpDependencies", StringRef(""));
         auto dependencyCollector = std::make_shared<DependencyCollector>();
         compiler.addDependencyCollector(dependencyCollector);
         compiler.createPreprocessor(clang::TranslationUnitKind::TU_Complete);
@@ -970,6 +981,7 @@ class DxcCompiler : public IDxcCompiler3,
         EmitBCAction action(&llvmContext);
         FrontendInputFile file(pUtf8SourceName, IK_HLSL);
         bool compileOK;
+        TimeTraceScope TimeScope("Compile Action", StringRef(""));
         if (action.BeginSourceFile(compiler, file)) {
           action.Execute();
           action.EndSourceFile();
@@ -1024,6 +1036,7 @@ class DxcCompiler : public IDxcCompiler3,
         // Do not create a container when there is only a a high-level
         // representation in the module.
         if (compileOK && !opts.CodeGenHighLevel) {
+          TimeTraceScope TimeScope("AssembleAndWriteContainer", StringRef(""));
           HRESULT valHR = S_OK;
           CComPtr<AbstractMemoryStream> pRootSigStream;
           IFT(CreateMemoryStream(DxcGetThreadMallocNoRef(),
@@ -1107,7 +1120,86 @@ class DxcCompiler : public IDxcCompiler3,
                                               &pHashBlob));
             IFT(pResult->SetOutputObject(DXC_OUT_SHADER_HASH, pHashBlob));
           } // SUCCEEDED(valHR)
-        }   // compileOK && !opts.CodeGenHighLevel
+#ifdef ENABLE_METAL_CODEGEN
+          // This is a bit hacky because we don't currently have a good way to
+          // disassemble AIR.
+          if (opts.GenMetal && produceFullContainer &&
+              !opts.OutputObject.empty()) {
+            IRCompiler *MetalCompiler = IRCompilerCreate();
+            IRCompilerSetEntryPointName(
+                MetalCompiler,
+                compiler.getCodeGenOpts().HLSLEntryFunction.c_str());
+
+            IRObject *DXILObj = IRObjectCreateFromDXIL(
+                static_cast<const uint8_t *>(pOutputBlob->GetBufferPointer()),
+                pOutputBlob->GetBufferSize(), IRBytecodeOwnershipNone);
+
+            // Compile DXIL to Metal IR:
+            IRError *Error = nullptr;
+            IRObject *AIR = IRCompilerAllocCompileAndLink(MetalCompiler, NULL,
+                                                          DXILObj, &Error);
+
+            if (!AIR) {
+              IRObjectDestroy(DXILObj);
+              IRCompilerDestroy(MetalCompiler);
+              IRErrorDestroy(Error);
+              return ErrorWithString(
+                  "Error occurred in Metal Shader Conversion", riid, ppResult);
+            }
+
+            IRMetalLibBinary *MetalLib = IRMetalLibBinaryCreate();
+            IRShaderStage Stage = IRShaderStageInvalid;
+            const ShaderModel *SM = hlsl::ShaderModel::GetByName(
+                compiler.getLangOpts().HLSLProfile);
+            switch (SM->GetKind()) {
+            case DXIL::ShaderKind::Vertex:
+              Stage = IRShaderStageVertex;
+              break;
+            case DXIL::ShaderKind::Pixel:
+              Stage = IRShaderStageFragment;
+              break;
+            case DXIL::ShaderKind::Hull:
+              Stage = IRShaderStageHull;
+              break;
+            case DXIL::ShaderKind::Domain:
+              Stage = IRShaderStageDomain;
+              break;
+            case DXIL::ShaderKind::Mesh:
+              Stage = IRShaderStageMesh;
+              break;
+            case DXIL::ShaderKind::Amplification:
+              Stage = IRShaderStageAmplification;
+              break;
+            case DXIL::ShaderKind::Geometry:
+              Stage = IRShaderStageGeometry;
+              break;
+            case DXIL::ShaderKind::Compute:
+              Stage = IRShaderStageCompute;
+              break;
+            }
+            assert(Stage != IRShaderStageInvalid &&
+                   "Library targets not supported for Metal (yet).");
+            IRObjectGetMetalLibBinary(AIR, Stage, MetalLib);
+            size_t MetalLibSize = IRMetalLibGetBytecodeSize(MetalLib);
+            std::unique_ptr<uint8_t[]> MetalLibBytes =
+                std::unique_ptr<uint8_t[]>(new uint8_t[MetalLibSize]);
+            IRMetalLibGetBytecode(MetalLib, MetalLibBytes.get());
+
+            // Store the metallib to custom format or disk, or use to create a
+            // MTLLibrary.
+
+            CComPtr<IDxcBlob> MetalBlob;
+            IFT(hlsl::DxcCreateBlobOnHeapCopy(
+                MetalLibBytes.get(), (uint32_t)MetalLibSize, &MetalBlob));
+            std::swap(pOutputBlob, MetalBlob);
+
+            IRMetalLibBinaryDestroy(MetalLib);
+            IRObjectDestroy(DXILObj);
+            IRObjectDestroy(AIR);
+            IRCompilerDestroy(MetalCompiler);
+          }
+#endif
+        } // compileOK && !opts.CodeGenHighLevel
       }
 
       std::string remarks;
@@ -1440,6 +1532,13 @@ class DxcCompiler : public IDxcCompiler3,
         Opts.EnablePayloadQualifiers;
     compiler.getLangOpts().HLSLProfile = compiler.getCodeGenOpts().HLSLProfile =
         Opts.TargetProfile;
+    const ShaderModel *SM = hlsl::ShaderModel::GetByName(
+        compiler.getLangOpts().HLSLProfile.c_str());
+    if (SM->IsSM69Plus())
+      compiler.getLangOpts().MaxHLSLVectorLength = DXIL::kSM69MaxVectorLength;
+    else
+      compiler.getLangOpts().MaxHLSLVectorLength =
+          DXIL::kDefaultMaxVectorLength;
 
     // Enable dumping implicit top level decls either if it was specifically
     // requested or if we are not dumping the ast from the command line. That
diff --git a/tools/clang/tools/libclang/CMakeLists.txt b/tools/clang/tools/libclang/CMakeLists.txt
index 1ef0c8ecd9..ed49cbaf44 100644
--- a/tools/clang/tools/libclang/CMakeLists.txt
+++ b/tools/clang/tools/libclang/CMakeLists.txt
@@ -119,6 +119,7 @@ if(MSVC)
   # Each functions is exported as "dllexport" in include/clang-c.
   # KB835326
   set(LLVM_EXPORTED_SYMBOL_FILE)
+  add_compile_options(/bigobj)
 endif()
 
 # HLSL Change Starts
diff --git a/tools/clang/unittests/HLSL/DxilContainerTest.cpp b/tools/clang/unittests/HLSL/DxilContainerTest.cpp
index a1533ae19f..339b33c655 100644
--- a/tools/clang/unittests/HLSL/DxilContainerTest.cpp
+++ b/tools/clang/unittests/HLSL/DxilContainerTest.cpp
@@ -1454,6 +1454,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) {
       "ConsumeStructuredBuffer<Foo> consume_buf;"
       "RasterizerOrderedByteAddressBuffer rov_buf;"
       "globallycoherent RWByteAddressBuffer gc_buf;"
+      "reordercoherent RWByteAddressBuffer rc_buf;"
       "float function_import(float x);"
       "export float function0(min16float x) { "
       "  return x + 1 + tex[0].x; }"
@@ -1465,6 +1466,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) {
       "  f.f2 += 0.5; append_buf.Append(f);"
       "  rov_buf.Store(i, f.i2.x);"
       "  gc_buf.Store(i, f.i2.y);"
+      "  rc_buf.Store(i, f.i2.y);"
       "  b_buf.Store(i, f.i2.x + f.i2.y); }";
   CComPtr<IDxcCompiler> pCompiler;
   CComPtr<IDxcBlobEncoding> pSource;
@@ -1477,7 +1479,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) {
     hlsl::DXIL::ResourceKind kind;
     hlsl::RDAT::DxilResourceFlag flag;
   };
-  const unsigned numResFlagCheck = 5;
+  const unsigned numResFlagCheck = 6;
   CheckResFlagInfo resFlags[numResFlagCheck] = {
       {"b_buf", hlsl::DXIL::ResourceKind::RawBuffer,
        hlsl::RDAT::DxilResourceFlag::None},
@@ -1487,6 +1489,8 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) {
        hlsl::RDAT::DxilResourceFlag::UAVCounter},
       {"gc_buf", hlsl::DXIL::ResourceKind::RawBuffer,
        hlsl::RDAT::DxilResourceFlag::UAVGloballyCoherent},
+      {"rc_buf", hlsl::DXIL::ResourceKind::RawBuffer,
+       hlsl::RDAT::DxilResourceFlag::UAVReorderCoherent},
       {"rov_buf", hlsl::DXIL::ResourceKind::RawBuffer,
        hlsl::RDAT::DxilResourceFlag::UAVRasterizerOrderedView}};
 
@@ -1575,7 +1579,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) {
           IFTBOOLMSG(false, E_FAIL, "unknown function name");
         }
       }
-      VERIFY_ARE_EQUAL(resTable.Count(), 8U);
+      VERIFY_ARE_EQUAL(resTable.Count(), 9U);
     }
   }
   IFTBOOLMSG(blobFound, E_FAIL, "failed to find RDAT blob after compiling");
diff --git a/tools/clang/unittests/HLSL/ExtensionTest.cpp b/tools/clang/unittests/HLSL/ExtensionTest.cpp
index 51dda5533c..65407291ca 100644
--- a/tools/clang/unittests/HLSL/ExtensionTest.cpp
+++ b/tools/clang/unittests/HLSL/ExtensionTest.cpp
@@ -204,79 +204,86 @@ Intrinsic Intrinsics[] = {
     {L"test_fn",
      DEFAULT_NAME,
      "r",
-     {1, false, true, false, -1, countof(TestFnArgs), TestFnArgs}},
+     {1, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnArgs), TestFnArgs}},
     {L"test_proc",
      DEFAULT_NAME,
      "r",
-     {2, false, false, false, -1, countof(TestProcArgs), TestProcArgs}},
+     {2, 0, 0, -1, countof(TestProcArgs), TestProcArgs}},
     {L"test_poly",
      "test_poly.$o",
      "r",
-     {3, false, true, false, -1, countof(TestFnCustomArgs), TestFnCustomArgs}},
+     {3, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnCustomArgs),
+      TestFnCustomArgs}},
     {L"test_int",
      "test_int",
      "r",
-     {4, false, true, false, -1, countof(TestFnIntArgs), TestFnIntArgs}},
+     {4, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnIntArgs), TestFnIntArgs}},
     {L"test_nolower",
      "test_nolower.$o",
      "n",
-     {5, false, true, false, -1, countof(TestFnNoLowerArgs),
+     {5, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnNoLowerArgs),
       TestFnNoLowerArgs}},
     {L"test_pack_0",
      "test_pack_0.$o",
      "p",
-     {6, false, false, false, -1, countof(TestFnPack0), TestFnPack0}},
+     {6, 0, 0, -1, countof(TestFnPack0), TestFnPack0}},
     {L"test_pack_1",
      "test_pack_1.$o",
      "p",
-     {7, false, true, false, -1, countof(TestFnPack1), TestFnPack1}},
+     {7, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnPack1), TestFnPack1}},
     {L"test_pack_2",
      "test_pack_2.$o",
      "p",
-     {8, false, true, false, -1, countof(TestFnPack2), TestFnPack2}},
+     {8, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnPack2), TestFnPack2}},
     {L"test_pack_3",
      "test_pack_3.$o",
      "p",
-     {9, false, true, false, -1, countof(TestFnPack3), TestFnPack3}},
+     {9, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestFnPack3), TestFnPack3}},
     {L"test_pack_4",
      "test_pack_4.$o",
      "p",
-     {10, false, false, false, -1, countof(TestFnPack4), TestFnPack4}},
+     {10, 0, 0, -1, countof(TestFnPack4), TestFnPack4}},
     {L"test_rand",
      "test_rand",
      "r",
-     {11, false, false, false, -1, countof(TestRand), TestRand}},
+     {11, 0, 0, -1, countof(TestRand), TestRand}},
     {L"test_isinf",
      "test_isinf",
      "d",
-     {13, true, true, false, -1, countof(TestIsInf), TestIsInf}},
+     {13, INTRIN_FLAG_READ_ONLY | INTRIN_FLAG_READ_NONE, 0, -1,
+      countof(TestIsInf), TestIsInf}},
     {L"test_ibfe",
      "test_ibfe",
      "d",
-     {14, true, true, false, -1, countof(TestIBFE), TestIBFE}},
+     {14, INTRIN_FLAG_READ_ONLY | INTRIN_FLAG_READ_NONE, 0, -1,
+      countof(TestIBFE), TestIBFE}},
     // Make this intrinsic have the same opcode as an hlsl intrinsic with an
     // unsigned counterpart for testing purposes.
     {L"test_unsigned",
      "test_unsigned",
      "n",
-     {static_cast<unsigned>(hlsl::IntrinsicOp::IOP_min), false, true, false, -1,
-      countof(TestUnsigned), TestUnsigned}},
+     {static_cast<unsigned>(hlsl::IntrinsicOp::IOP_min), INTRIN_FLAG_READ_NONE,
+      0, -1, countof(TestUnsigned), TestUnsigned}},
     {L"wave_proc",
      DEFAULT_NAME,
      "r",
-     {16, false, true, true, -1, countof(WaveProcArgs), WaveProcArgs}},
+     {16, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1,
+      countof(WaveProcArgs), WaveProcArgs}},
     {L"test_o_1",
      "test_o_1.$o:1",
      "r",
-     {18, false, true, true, -1, countof(TestOverloadArgs), TestOverloadArgs}},
+     {18, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1,
+      countof(TestOverloadArgs), TestOverloadArgs}},
     {L"test_o_2",
      "test_o_2.$o:2",
      "r",
-     {19, false, true, true, -1, countof(TestOverloadArgs), TestOverloadArgs}},
+     {19, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1,
+      countof(TestOverloadArgs), TestOverloadArgs}},
     {L"test_o_3",
      "test_o_3.$o:3",
      "r",
-     {20, false, true, true, -1, countof(TestOverloadArgs), TestOverloadArgs}},
+     {20, INTRIN_FLAG_READ_NONE | INTRIN_FLAG_IS_WAVE, 0, -1,
+      countof(TestOverloadArgs), TestOverloadArgs}},
     // custom lowering with both optional arguments and vector exploding.
     // Arg 0 = Opcode
     // Arg 1 = Pass as is
@@ -286,16 +293,17 @@ Intrinsic Intrinsics[] = {
     {L"CustomLoadOp",
      "CustomLoadOp",
      "c:{\"default\" : \"0,1,2:?i1,3.0:?i32,3.1:?i32\"}",
-     {21, true, false, false, -1, countof(TestCustomLoadOp), TestCustomLoadOp}},
+     {21, INTRIN_FLAG_READ_ONLY, 0, -1, countof(TestCustomLoadOp),
+      TestCustomLoadOp}},
     {L"CustomLoadOp",
      "CustomLoadOp",
      "c:{\"default\" : \"0,1,2:?i1,3.0:?i32,3.1:?i32\"}",
-     {21, true, false, false, -1, countof(TestCustomLoadOpBool),
+     {21, INTRIN_FLAG_READ_ONLY, 0, -1, countof(TestCustomLoadOpBool),
       TestCustomLoadOpBool}},
     {L"CustomLoadOp",
      "CustomLoadOp",
      "c:{\"default\" : \"0,1,2:?i1,3.0:?i32,3.1:?i32\"}",
-     {21, true, false, false, -1, countof(TestCustomLoadOpSubscript),
+     {21, INTRIN_FLAG_READ_ONLY, 0, -1, countof(TestCustomLoadOpSubscript),
       TestCustomLoadOpSubscript}},
 };
 
@@ -303,7 +311,8 @@ Intrinsic BufferIntrinsics[] = {
     {L"MyBufferOp",
      "MyBufferOp",
      "m",
-     {12, false, true, false, -1, countof(TestMyBufferOp), TestMyBufferOp}},
+     {12, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyBufferOp),
+      TestMyBufferOp}},
 };
 
 // Test adding a method to an object that normally has no methods (SamplerState
@@ -312,7 +321,8 @@ Intrinsic SamplerIntrinsics[] = {
     {L"MySamplerOp",
      "MySamplerOp",
      "m",
-     {15, false, true, false, -1, countof(TestMySamplerOp), TestMySamplerOp}},
+     {15, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMySamplerOp),
+      TestMySamplerOp}},
 };
 
 // Define a lowering string to target a common dxil extension operation defined
@@ -345,12 +355,12 @@ Intrinsic Texture1DIntrinsics[] = {
     {L"MyTextureOp",
      "MyTextureOp",
      MyTextureOp_LoweringInfo,
-     {17, false, true, false, -1, countof(TestMyTexture1DOp_0),
+     {17, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyTexture1DOp_0),
       TestMyTexture1DOp_0}},
     {L"MyTextureOp",
      "MyTextureOp",
      MyTextureOp_LoweringInfo,
-     {17, false, true, false, -1, countof(TestMyTexture1DOp_1),
+     {17, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyTexture1DOp_1),
       TestMyTexture1DOp_1}},
 };
 
@@ -358,7 +368,7 @@ Intrinsic Texture2DIntrinsics[] = {
     {L"MyTextureOp",
      "MyTextureOp",
      MyTextureOp_LoweringInfo,
-     {17, false, true, false, -1, countof(TestMyTexture2DOp),
+     {17, INTRIN_FLAG_READ_NONE, 0, -1, countof(TestMyTexture2DOp),
       TestMyTexture2DOp}},
 };
 
@@ -1497,8 +1507,8 @@ TEST_F(ExtensionTest, EvalAttributeCollision) {
     Intrinsic Intrinsic = {L"collide_proc",
                            "collide_proc",
                            "r",
-                           {static_cast<unsigned>(op), true, false, false, -1,
-                            countof(Args), Args}};
+                           {static_cast<unsigned>(op), INTRIN_FLAG_READ_ONLY, 0,
+                            -1, countof(Args), Args}};
     Compiler c(m_dllSupport);
     c.RegisterIntrinsicTable(new TestIntrinsicTable(&Intrinsic, 1));
     c.Compile(R"(
@@ -1532,10 +1542,8 @@ TEST_F(ExtensionTest, NoUnwind) {
        IA_C},
       {"value", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_NUMERIC, 1, IA_C}};
 
-  Intrinsic Intrinsic = {L"test_proc",
-                         "test_proc",
-                         "r",
-                         {1, false, false, false, -1, countof(Args), Args}};
+  Intrinsic Intrinsic = {
+      L"test_proc", "test_proc", "r", {1, 0, 0, -1, countof(Args), Args}};
   Compiler c(m_dllSupport);
   c.RegisterIntrinsicTable(new TestIntrinsicTable(&Intrinsic, 1));
   c.Compile(R"(
@@ -1572,7 +1580,8 @@ TEST_F(ExtensionTest, DCE) {
   Intrinsic Intrinsic = {L"test_proc",
                          "test_proc",
                          "r",
-                         {1, true, true, false, -1, countof(Args), Args}};
+                         {1, INTRIN_FLAG_READ_ONLY | INTRIN_FLAG_READ_NONE, 0,
+                          -1, countof(Args), Args}};
   Compiler c(m_dllSupport);
   c.RegisterIntrinsicTable(new TestIntrinsicTable(&Intrinsic, 1));
   c.Compile(R"(
diff --git a/tools/clang/unittests/HLSL/LinkerTest.cpp b/tools/clang/unittests/HLSL/LinkerTest.cpp
index 7cafa0db06..df8bb644e1 100644
--- a/tools/clang/unittests/HLSL/LinkerTest.cpp
+++ b/tools/clang/unittests/HLSL/LinkerTest.cpp
@@ -526,6 +526,11 @@ TEST_F(LinkerTest, RunLinkMatArrayParam) {
   Link(L"main", L"ps_6_0", pLinker, {libName, libName2},
        {"alloca [24 x float]", "getelementptr [12 x float], [12 x float]*"},
        {});
+
+  Link(L"main", L"ps_6_9", pLinker, {libName, libName2},
+       {"alloca [2 x <12 x float>]",
+        "getelementptr [12 x float], [12 x float]*"},
+       {});
 }
 
 TEST_F(LinkerTest, RunLinkMatParam) {
diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp
index bb81c1c953..e337d2951c 100644
--- a/tools/clang/unittests/HLSL/PixTest.cpp
+++ b/tools/clang/unittests/HLSL/PixTest.cpp
@@ -146,12 +146,17 @@ class PixTest : public ::testing::Test {
   TEST_METHOD(RootSignatureUpgrade_Annotation)
 
   TEST_METHOD(DxilPIXDXRInvocationsLog_SanityTest)
+  TEST_METHOD(DxilPIXDXRInvocationsLog_EmbeddedRootSigs)
 
   TEST_METHOD(DebugInstrumentation_TextOutput)
   TEST_METHOD(DebugInstrumentation_BlockReport)
 
   TEST_METHOD(DebugInstrumentation_VectorAllocaWrite_Structs)
 
+  TEST_METHOD(NonUniformResourceIndex_Resource)
+  TEST_METHOD(NonUniformResourceIndex_DescriptorHeap)
+  TEST_METHOD(NonUniformResourceIndex_Raytracing)
+
   dxc::DxcDllSupport m_dllSupport;
   VersionSupportInfo m_ver;
 
@@ -443,6 +448,11 @@ class PixTest : public ::testing::Test {
   std::string RunDxilPIXAddTidToAmplificationShaderPayloadPass(IDxcBlob *blob);
   CComPtr<IDxcBlob> RunDxilPIXMeshShaderOutputPass(IDxcBlob *blob);
   CComPtr<IDxcBlob> RunDxilPIXDXRInvocationsLog(IDxcBlob *blob);
+  std::vector<std::string>
+  RunDxilNonUniformResourceIndexInstrumentation(IDxcBlob *blob,
+                                                std::string &outputText);
+  void TestNuriCase(const char *source, const wchar_t *target,
+                    uint32_t expectedResult);
   void TestPixUAVCase(char const *hlsl, wchar_t const *model,
                       wchar_t const *entry);
   std::string Disassemble(IDxcBlob *pProgram);
@@ -660,7 +670,7 @@ CComPtr<IDxcBlob> PixTest::RunDxilPIXDXRInvocationsLog(IDxcBlob *blob) {
   CComPtr<IDxcBlob> pOptimizedModule;
   CComPtr<IDxcBlobEncoding> pText;
   VERIFY_SUCCEEDED(pOptimizer->RunOptimizer(
-      dxil, Options.data(), Options.size(), &pOptimizedModule, &pText));
+      blob, Options.data(), Options.size(), &pOptimizedModule, &pText));
 
   std::string outputText;
   if (pText->GetBufferSize() != 0) {
@@ -670,6 +680,29 @@ CComPtr<IDxcBlob> PixTest::RunDxilPIXDXRInvocationsLog(IDxcBlob *blob) {
   return pOptimizedModule;
 }
 
+std::vector<std::string> PixTest::RunDxilNonUniformResourceIndexInstrumentation(
+    IDxcBlob *blob, std::string &outputText) {
+
+  CComPtr<IDxcBlob> dxil = FindModule(DFCC_ShaderDebugInfoDXIL, blob);
+  CComPtr<IDxcOptimizer> pOptimizer;
+  VERIFY_SUCCEEDED(
+      m_dllSupport.CreateInstance(CLSID_DxcOptimizer, &pOptimizer));
+  std::array<LPCWSTR, 4> Options = {
+      L"-opt-mod-passes", L"-dxil-dbg-value-to-dbg-declare",
+      L"-dxil-annotate-with-virtual-regs",
+      L"-hlsl-dxil-non-uniform-resource-index-instrumentation"};
+
+  CComPtr<IDxcBlob> pOptimizedModule;
+  CComPtr<IDxcBlobEncoding> pText;
+  VERIFY_SUCCEEDED(pOptimizer->RunOptimizer(
+      dxil, Options.data(), Options.size(), &pOptimizedModule, &pText));
+
+  outputText = BlobToUtf8(pText);
+
+  const std::string disassembly = Disassemble(pOptimizedModule);
+  return Tokenize(disassembly, "\n");
+}
+
 std::string
 PixTest::RunDxilPIXAddTidToAmplificationShaderPayloadPass(IDxcBlob *blob) {
   CComPtr<IDxcBlob> dxil = FindModule(DFCC_ShaderDebugInfoDXIL, blob);
@@ -2945,6 +2978,230 @@ void MyMiss(inout MyPayload payload)
   RunDxilPIXDXRInvocationsLog(compiledLib);
 }
 
+TEST_F(PixTest, DxilPIXDXRInvocationsLog_EmbeddedRootSigs) {
+
+  const char *source = R"x(
+
+GlobalRootSignature grs = {"CBV(b0)"};
+struct MyPayload
+{
+    float4 color;
+};
+
+[shader("raygeneration")]
+void MyRayGen()
+{
+}
+
+[shader("closesthit")]
+void MyClosestHit(inout MyPayload payload, in BuiltInTriangleIntersectionAttributes attr)
+{
+}
+
+[shader("anyhit")]
+void MyAnyHit(inout MyPayload payload, in BuiltInTriangleIntersectionAttributes attr)
+{
+}
+
+[shader("miss")]
+void MyMiss(inout MyPayload payload)
+{
+}
+
+)x";
+
+  auto compiledLib = Compile(m_dllSupport, source, L"lib_6_3",
+                             {L"-Qstrip_reflect"}, L"RootSig");
+  RunDxilPIXDXRInvocationsLog(compiledLib);
+}
+
+uint32_t NuriGetWaveInstructionCount(const std::vector<std::string> &lines) {
+  // This is the instruction we'll insert into the shader if we detect dynamic
+  // resource indexing
+  const char *const waveActiveAllEqual = "call i1 @dx.op.waveActiveAllEqual";
+
+  uint32_t instCount = 0;
+  for (const std::string &line : lines) {
+    instCount += line.find(waveActiveAllEqual) != std::string::npos;
+  }
+  return instCount;
+}
+
+void PixTest::TestNuriCase(const char *source, const wchar_t *target,
+                           uint32_t expectedResult) {
+
+  for (const OptimizationChoice &choice : OptimizationChoices) {
+    const std::vector<LPCWSTR> compilationOptions = {choice.Flag};
+
+    CComPtr<IDxcBlob> compiledLib =
+        Compile(m_dllSupport, source, target, compilationOptions);
+
+    std::string outputText;
+    const std::vector<std::string> dxilLines =
+        RunDxilNonUniformResourceIndexInstrumentation(compiledLib, outputText);
+
+    VERIFY_ARE_EQUAL(NuriGetWaveInstructionCount(dxilLines), expectedResult);
+
+    bool foundDynamicIndexingNoNuri = false;
+    const std::vector<std::string> outputTextLines = Tokenize(outputText, "\n");
+    for (const std::string &line : outputTextLines) {
+      if (line.find("FoundDynamicIndexingNoNuri") != std::string::npos) {
+        foundDynamicIndexingNoNuri = true;
+        break;
+      }
+    }
+
+    VERIFY_ARE_EQUAL((expectedResult != 0), foundDynamicIndexingNoNuri);
+  }
+}
+
+TEST_F(PixTest, NonUniformResourceIndex_Resource) {
+
+  const char *source = R"x(
+Texture2D tex[] : register(t0);
+float4 main(float2 uv : TEXCOORD0) : SV_TARGET
+{
+    uint index = uv.x * uv.y;
+    return tex[index].Load(int3(0, 0, 0));
+})x";
+
+  const char *sourceWithNuri = R"x(
+Texture2D tex[] : register(t0);
+float4 main(float2 uv : TEXCOORD0) : SV_TARGET
+{
+    uint i = uv.x * uv.y;
+    return tex[NonUniformResourceIndex(i)].Load(int3(0, 0, 0));
+})x";
+
+  TestNuriCase(source, L"ps_6_0", 1);
+  TestNuriCase(sourceWithNuri, L"ps_6_0", 0);
+
+  if (m_ver.SkipDxilVersion(1, 6)) {
+    return;
+  }
+
+  TestNuriCase(source, L"ps_6_6", 1);
+  TestNuriCase(sourceWithNuri, L"ps_6_6", 0);
+}
+
+TEST_F(PixTest, NonUniformResourceIndex_DescriptorHeap) {
+
+  if (m_ver.SkipDxilVersion(1, 6)) {
+    return;
+  }
+
+  const char *source = R"x(
+Texture2D tex[] : register(t0);
+float4 main(float2 uv : TEXCOORD0) : SV_TARGET
+{
+    uint i = uv.x + uv.y;
+    Texture2D<float4> dynResTex = 
+        ResourceDescriptorHeap[i];
+    SamplerState dynResSampler = 
+        SamplerDescriptorHeap[i];
+    return dynResTex.Sample(dynResSampler, uv);
+})x";
+
+  const char *sourceWithNuri = R"x(
+Texture2D tex[] : register(t0);
+float4 main(float2 uv : TEXCOORD0) : SV_TARGET
+{
+    uint i = uv.x + uv.y;
+    Texture2D<float4> dynResTex = 
+        ResourceDescriptorHeap[NonUniformResourceIndex(i)];
+    SamplerState dynResSampler = 
+        SamplerDescriptorHeap[NonUniformResourceIndex(i)];
+    return dynResTex.Sample(dynResSampler, uv);
+})x";
+
+  TestNuriCase(source, L"ps_6_6", 2);
+  TestNuriCase(sourceWithNuri, L"ps_6_6", 0);
+}
+
+TEST_F(PixTest, NonUniformResourceIndex_Raytracing) {
+
+  if (m_ver.SkipDxilVersion(1, 5)) {
+    return;
+  }
+
+  const char *source = R"x(
+RWTexture2D<float4> RT[] : register(u0);
+
+[noinline]
+void FuncNoInline(uint index)
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+    uint i = index + rayIndex.x * rayIndex.y;
+    float4 c = float4(0.5, 0.5, 0.5, 0);
+    RT[i][rayIndex.xy] += c;
+}
+
+void Func(uint index)
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+    uint i = index + rayIndex.y;
+    float4 c = float4(0, 1, 0, 0);
+    RT[i][rayIndex.xy] += c;
+}
+
+[shader("raygeneration")]
+void Main()
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+
+    uint i1 = rayIndex.x;
+    float4 c1 = float4(1, 0, 1, 1);
+    RT[i1][rayIndex.xy] += c1;
+
+    uint i2 = rayIndex.x * rayIndex.y * 0.25;
+    float4 c2 = float4(0.25, 0, 0.25, 0);
+    RT[i2][rayIndex.xy] += c2;
+
+    Func(i1);
+    FuncNoInline(i2);
+})x";
+
+  const char *sourceWithNuri = R"x(
+RWTexture2D<float4> RT[] : register(u0);
+
+[noinline]
+void FuncNoInline(uint index)
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+    uint i = index + rayIndex.x * rayIndex.y;
+    float4 c = float4(0.5, 0.5, 0.5, 0);
+    RT[NonUniformResourceIndex(i)][rayIndex.xy] += c;
+}
+
+void Func(uint index)
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+    uint i = index + rayIndex.y;
+    float4 c = float4(0, 1, 0, 0);
+    RT[NonUniformResourceIndex(i)][rayIndex.xy] += c;
+}
+
+[shader("raygeneration")]
+void Main()
+{
+    float2 rayIndex = DispatchRaysIndex().xy;
+
+    uint i1 = rayIndex.x;
+    float4 c1 = float4(1, 0, 1, 1);
+    RT[NonUniformResourceIndex(i1)][rayIndex.xy] += c1;
+
+    uint i2 = rayIndex.x * rayIndex.y * 0.25;
+    float4 c2 = float4(0.25, 0, 0.25, 0);
+    RT[NonUniformResourceIndex(i2)][rayIndex.xy] += c2;
+
+    Func(i1);
+    FuncNoInline(i2);
+})x";
+
+  TestNuriCase(source, L"lib_6_5", 4);
+  TestNuriCase(sourceWithNuri, L"lib_6_5", 0);
+}
+
 TEST_F(PixTest, DebugInstrumentation_TextOutput) {
 
   const char *source = R"x(
diff --git a/tools/clang/unittests/HLSL/PixTestUtils.cpp b/tools/clang/unittests/HLSL/PixTestUtils.cpp
index 91b6c4479c..61647ff5fa 100644
--- a/tools/clang/unittests/HLSL/PixTestUtils.cpp
+++ b/tools/clang/unittests/HLSL/PixTestUtils.cpp
@@ -397,7 +397,7 @@ CComPtr<IDxcBlob> Compile(dxc::DxcDllSupport &dllSupport, const char *hlsl,
       CheckOperationSucceeded(pResult, &pProgram);
 
       CComPtr<IDxcLibrary> pLib;
-      VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcLibrary, &pLib));
+      VERIFY_SUCCEEDED(dllSupport.CreateInstance(CLSID_DxcLibrary, &pLib));
       const hlsl::DxilContainerHeader *pContainer = hlsl::IsDxilContainerLike(
           pProgram->GetBufferPointer(), pProgram->GetBufferSize());
       VERIFY_IS_NOT_NULL(pContainer);
diff --git a/tools/clang/unittests/HLSL/ValidationTest.cpp b/tools/clang/unittests/HLSL/ValidationTest.cpp
index f69b0be204..01f24e0227 100644
--- a/tools/clang/unittests/HLSL/ValidationTest.cpp
+++ b/tools/clang/unittests/HLSL/ValidationTest.cpp
@@ -1506,21 +1506,23 @@ TEST_F(ValidationTest, StructBufStrideOutOfBound) {
 }
 
 TEST_F(ValidationTest, StructBufLoadCoordinates) {
-  RewriteAssemblyCheckMsg(L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0",
-                          "bufferLoad.f32(i32 68, %dx.types.Handle "
-                          "%buf1_texture_structbuf, i32 1, i32 8)",
-                          "bufferLoad.f32(i32 68, %dx.types.Handle "
-                          "%buf1_texture_structbuf, i32 1, i32 undef)",
-                          "structured buffer require 2 coordinates");
+  RewriteAssemblyCheckMsg(
+      L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0",
+      "bufferLoad.f32(i32 68, %dx.types.Handle "
+      "%buf1_texture_structbuf, i32 1, i32 8)",
+      "bufferLoad.f32(i32 68, %dx.types.Handle "
+      "%buf1_texture_structbuf, i32 1, i32 undef)",
+      "structured buffer requires defined index and offset coordinates");
 }
 
 TEST_F(ValidationTest, StructBufStoreCoordinates) {
-  RewriteAssemblyCheckMsg(L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0",
-                          "bufferStore.f32(i32 69, %dx.types.Handle "
-                          "%buf2_UAV_structbuf, i32 0, i32 0",
-                          "bufferStore.f32(i32 69, %dx.types.Handle "
-                          "%buf2_UAV_structbuf, i32 0, i32 undef",
-                          "structured buffer require 2 coordinates");
+  RewriteAssemblyCheckMsg(
+      L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0",
+      "bufferStore.f32(i32 69, %dx.types.Handle "
+      "%buf2_UAV_structbuf, i32 0, i32 0",
+      "bufferStore.f32(i32 69, %dx.types.Handle "
+      "%buf2_UAV_structbuf, i32 0, i32 undef",
+      "structured buffer requires defined index and offset coordinates");
 }
 
 TEST_F(ValidationTest, TypedBufRetType) {
diff --git a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp
index 7066247883..6db27d7a41 100644
--- a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp
+++ b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp
@@ -820,10 +820,10 @@ class ExecutionTest {
         return false;
       }
 
-      if (GetModuleHandle("d3d10warp.dll") != NULL) {
-        CHAR szFullModuleFilePath[MAX_PATH] = "";
-        GetModuleFileName(GetModuleHandle("d3d10warp.dll"),
-                          szFullModuleFilePath, sizeof(szFullModuleFilePath));
+      if (GetModuleHandleW(L"d3d10warp.dll") != NULL) {
+        WCHAR szFullModuleFilePath[MAX_PATH] = L"";
+        GetModuleFileNameW(GetModuleHandleW(L"d3d10warp.dll"),
+                           szFullModuleFilePath, sizeof(szFullModuleFilePath));
         WEX::Logging::Log::Comment(WEX::Common::String().Format(
             L"WARP driver loaded from: %S", szFullModuleFilePath));
       }
@@ -5632,7 +5632,7 @@ void ExecutionTest::RunBasicShaderModelTest(CComPtr<ID3D12Device> pDevice,
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryFPOp",
-      // this callbacked is called when the test is creating the resource to run
+      // this callback is called when the test is creating the resource to run
       // the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         UNREFERENCED_PARAMETER(Name);
@@ -6999,7 +6999,7 @@ TEST_F(ExecutionTest, UnaryFloatOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryFPOp"));
@@ -7067,7 +7067,7 @@ TEST_F(ExecutionTest, BinaryFloatOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp"));
@@ -7157,7 +7157,7 @@ TEST_F(ExecutionTest, TertiaryFloatOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp"));
@@ -7234,7 +7234,7 @@ TEST_F(ExecutionTest, UnaryHalfOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryFPOp"));
@@ -7314,7 +7314,7 @@ TEST_F(ExecutionTest, BinaryHalfOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp"));
@@ -7424,7 +7424,7 @@ TEST_F(ExecutionTest, TertiaryHalfOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp"));
@@ -7494,7 +7494,7 @@ TEST_F(ExecutionTest, UnaryIntOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryIntOp"));
@@ -7554,7 +7554,7 @@ TEST_F(ExecutionTest, UnaryUintOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryUintOp"));
@@ -7619,7 +7619,7 @@ TEST_F(ExecutionTest, BinaryIntOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryIntOp"));
@@ -7707,7 +7707,7 @@ TEST_F(ExecutionTest, TertiaryIntOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryIntOp"));
@@ -7777,7 +7777,7 @@ TEST_F(ExecutionTest, BinaryUintOpTest) {
   int numExpected = Validation_Expected2->size() == 0 ? 1 : 2;
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryUintOp"));
@@ -7869,7 +7869,7 @@ TEST_F(ExecutionTest, TertiaryUintOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryUintOp"));
@@ -7948,7 +7948,7 @@ TEST_F(ExecutionTest, UnaryInt16OpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryIntOp"));
@@ -8016,7 +8016,7 @@ TEST_F(ExecutionTest, UnaryUint16OpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "UnaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryUintOp"));
@@ -8091,7 +8091,7 @@ TEST_F(ExecutionTest, BinaryInt16OpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryIntOp"));
@@ -8187,7 +8187,7 @@ TEST_F(ExecutionTest, TertiaryInt16OpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryIntOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryIntOp"));
@@ -8264,7 +8264,7 @@ TEST_F(ExecutionTest, BinaryUint16OpTest) {
   int numExpected = Validation_Expected2->size() == 0 ? 1 : 2;
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryUintOp"));
@@ -8363,7 +8363,7 @@ TEST_F(ExecutionTest, TertiaryUint16OpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryUintOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryUintOp"));
@@ -8948,7 +8948,7 @@ TEST_F(ExecutionTest, DotTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "DotOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SDotOp"));
@@ -9240,7 +9240,7 @@ TEST_F(ExecutionTest, Msad4Test) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "Msad4",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SMsad4"));
@@ -9342,7 +9342,7 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "BinaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp"));
@@ -9455,7 +9455,7 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) {
 
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
       pDevice, m_support, pStream, "TertiaryFPOp",
-      // this callbacked is called when the test
+      // this callback is called when the test
       // is creating the resource to run the test
       [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
         VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp"));
@@ -9883,7 +9883,7 @@ void ExecutionTest::WaveIntrinsicsActivePrefixTest(
          ++maskIndex) {
       std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(
           pDevice, m_support, "WaveIntrinsicsOp",
-          // this callbacked is called when the test
+          // this callback is called when the test
           // is creating the resource to run the test
           [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
             VERIFY_IS_TRUE(0 == _stricmp(Name, "SWaveIntrinsicsOp"));
@@ -12609,7 +12609,7 @@ TEST_F(ExecutionTest, HelperLaneTest) {
 
     std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(
         pDevice, m_support, "HelperLaneTestNoWave",
-        // this callbacked is called when the test is creating the resource to
+        // this callback is called when the test is creating the resource to
         // run the test
         [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
           VERIFY_IS_TRUE(0 == _stricmp(Name, "UAVBuffer0"));
diff --git a/tools/clang/utils/check_cfc/setup.py b/tools/clang/utils/check_cfc/setup.py
index b5fc473639..7405513f0a 100644
--- a/tools/clang/utils/check_cfc/setup.py
+++ b/tools/clang/utils/check_cfc/setup.py
@@ -8,10 +8,10 @@
     import platform
     import sys
     if platform.system() == 'Windows':
-        print "Could not find py2exe. Please install then run setup.py py2exe."
+        print("Could not find py2exe. Please install then run setup.py py2exe.")
         raise
     else:
-        print "setup.py only required on Windows."
+        print("setup.py only required on Windows.")
         sys.exit(1)
 
 setup(
diff --git a/utils/git/requirements_formatting.txt b/utils/git/requirements_formatting.txt
index 06db8176c9..6f3e07dcf2 100644
--- a/utils/git/requirements_formatting.txt
+++ b/utils/git/requirements_formatting.txt
@@ -18,7 +18,7 @@ charset-normalizer==3.2.0
     # via requests
 click==8.1.7
     # via black
-cryptography==43.0.1
+cryptography==44.0.1
     # via pyjwt
 darker==1.7.2
     # via -r llvm/utils/git/requirements_formatting.txt.in
diff --git a/utils/hct/CMakeLists.txt b/utils/hct/CMakeLists.txt
new file mode 100644
index 0000000000..41e6b494e6
--- /dev/null
+++ b/utils/hct/CMakeLists.txt
@@ -0,0 +1,3 @@
+# generate hlsl_intrinsic_opcodes.json to preserve high level intrinsic opcodes
+# This uses CODE_TAG because the file exists in the source tree.
+add_hlsl_hctgen(HlslIntrinsicOpcodes OUTPUT hlsl_intrinsic_opcodes.json CODE_TAG)
diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt
index 7f7637b230..f1274fd308 100644
--- a/utils/hct/gen_intrin_main.txt
+++ b/utils/hct/gen_intrin_main.txt
@@ -1,6 +1,9 @@
 // Copyright (C) Microsoft Corporation. All rights reserved.
 // This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details.
 //
+// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+// All rights reserved.
+//
 // See hctdb.py for the implementation of intrinsic file processing.
 //
 // Intrinsic declarations are grouped into namespaces that
@@ -361,8 +364,8 @@ void [[]] DispatchMesh(in uint threadGroupCountX, in uint threadGroupCountY, in
 // Return true if the current lane is a helper lane
 bool [[ro]] IsHelperLane();
 
-// HL Op for allocating ray query object that default constructor uses
-uint [[hidden]] AllocateRayQuery(in uint flags);
+// HL Op for allocating ray query object
+uint [[hidden]] AllocateRayQuery(in uint flags, in uint rayqueryflags);
 
 resource [[hidden]] CreateResourceFromHeap(in uint index);
 
@@ -393,7 +396,13 @@ void [[]] RawBufferStore(in u64 addr, in $funcT value);
 void [[]] RawBufferStore(in u64 addr, in $funcT value, in uint alignment);
 void [[]] ext_execution_mode(in uint mode, ...);
 void [[]] ext_execution_mode_id(in uint mode, ...);
+$funcT2 [[]] static_pointer_cast(in VkBufferPointer ptr);
+$funcT2 [[]] reinterpret_pointer_cast(in VkBufferPointer ptr);
+
+} namespace
 
+namespace BufferPointerMethods {
+$classT [[ro]] GetBufferContents();
 } namespace
 // SPIRV Change Ends
 
@@ -1089,6 +1098,45 @@ uint [[ro]] CommittedInstanceContributionToHitGroupIndex();
 
 } namespace
 
+// Shader Execution Reordering
+namespace DxHitObjectMethods {
+    DxHitObject [[static,class_prefix,min_sm=6.9]] MakeNop();
+    DxHitObject [[static,class_prefix,min_sm=6.9]] MakeMiss(in uint RayFlags, in uint MissShaderIndex, in ray_desc Ray);
+    DxHitObject [[static,class_prefix,min_sm=6.9]] FromRayQuery(in RayQuery rq);
+    DxHitObject [[static,class_prefix,min_sm=6.9]] FromRayQuery(in RayQuery rq, in uint HitKind, in udt Attributes);
+    DxHitObject [[static,class_prefix,min_sm=6.9]] TraceRay(in acceleration_struct AccelerationStructure, in uint RayFlags, in uint InstanceInclusionMask, in uint RayContributionToHitGroupIndex, in uint MultiplierForGeometryContributionToHitGroupIndex, in uint MissShaderIndex, in ray_desc Ray, inout udt Payload);
+    void [[static,class_prefix,min_sm=6.9]] Invoke(in DxHitObject ho, inout udt Payload);
+    bool [[rn,class_prefix,min_sm=6.9]] IsMiss();
+    bool [[rn,class_prefix,min_sm=6.9]] IsHit();
+    bool [[rn,class_prefix,min_sm=6.9]] IsNop();
+    uint [[rn,class_prefix,min_sm=6.9]] GetRayFlags();
+    float [[rn,class_prefix,min_sm=6.9]] GetRayTMin();
+    float [[rn,class_prefix,min_sm=6.9]] GetRayTCurrent();
+    float<3> [[rn,class_prefix,min_sm=6.9]] GetWorldRayOrigin();
+    float<3> [[rn,class_prefix,min_sm=6.9]] GetWorldRayDirection();
+    float<3> [[rn,class_prefix,min_sm=6.9]] GetObjectRayOrigin();
+    float<3> [[rn,class_prefix,min_sm=6.9]] GetObjectRayDirection();
+    float<3,4> [[rn,class_prefix,min_sm=6.9]] GetObjectToWorld3x4();
+    float<4,3> [[rn,class_prefix,min_sm=6.9]] GetObjectToWorld4x3();
+    float<3,4> [[rn,class_prefix,min_sm=6.9]] GetWorldToObject3x4();
+    float<4,3> [[rn,class_prefix,min_sm=6.9]] GetWorldToObject4x3();
+    uint [[rn,class_prefix,min_sm=6.9]] GetGeometryIndex();
+    uint [[rn,class_prefix,min_sm=6.9]] GetInstanceIndex();
+    uint [[rn,class_prefix,min_sm=6.9]] GetInstanceID();
+    uint [[rn,class_prefix,min_sm=6.9]] GetPrimitiveIndex();
+    uint [[rn,class_prefix,min_sm=6.9]] GetHitKind();
+    uint [[rn,class_prefix,min_sm=6.9]] GetShaderTableIndex();
+    $funcT [[class_prefix,min_sm=6.9]] GetAttributes();
+    void [[class_prefix,min_sm=6.9]] SetShaderTableIndex(in uint RecordIndex);
+    uint [[ro,class_prefix,min_sm=6.9]] LoadLocalRootTableConstant(in uint RootConstantOffsetInBytes);
+} namespace
+
+namespace DxIntrinsics {
+void [[min_sm=6.9]] MaybeReorderThread(in DxHitObject HitObject);
+void [[min_sm=6.9]] MaybeReorderThread(in uint CoherenceHint, in uint NumCoherenceHintBitsFromLSB);
+void [[min_sm=6.9]] MaybeReorderThread(in DxHitObject HitObject, in uint CoherenceHint, in uint NumCoherenceHintBitsFromLSB);
+} namespace
+
 // Work Graphs objects and methods
 
 // EmptyNodeInput
@@ -1136,4 +1184,3 @@ $classT [[]] SubpassLoad(in int sample) : subpassinputms_load;
 } namespace
 
 // SPIRV Change Ends
-
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 66376c3b9b..6344fb5849 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -1,5 +1,7 @@
 # Copyright (C) Microsoft Corporation. All rights reserved.
 # This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details.
+# Modifications Copyright(C) 2025 Advanced Micro Devices, Inc.
+# All rights reserved.
 ###############################################################################
 # DXIL information.                                                           #
 ###############################################################################
@@ -37,6 +39,30 @@
     "array_local_ldst",
 ]
 
+# These are the valid overload type characters for DXIL instructions.
+# - "v" is for void, and can only be used alone.
+# - "u" is for user defined type (UDT), and is mutually exclusive with the other
+#   types.
+# - "o" is for an HLSL object type (e.g. Texture, Sampler, etc.), and is
+#   mutually exclusive with the other types.
+# - "<" is for vector overloads, and may be followed by a set of supported
+#   component types.
+#   - If "<" is not followed by any component types, any preceding scalar types
+#     are used.
+#   - Vector component types are captured into a separate list during
+#     processing.
+# - "," is used to separate multiple overload dimensions.
+#   - When used, only $x0, $x1, etc. are supported for overloaded parameter
+#     types.
+# dxil_all_user_oload_chars must be kept in sync with the indices in
+# hlsl::OP::TypeSlot in DxilOperations.h.
+dxil_all_user_oload_chars = "hfd18wiluo<"
+dxil_scalar_oload_chars = "hfd18wil"
+
+# Maximum number of overload dimensions supported through the extended overload
+# in DXIL instructions.
+dxil_max_overload_dims = 2
+
 
 class db_dxil_enum_value(object):
     "A representation for a value in an enumeration type"
@@ -81,6 +107,7 @@ def __init__(self, name, **kwargs):
         self.ops = []  # the operands that this instruction takes
         self.is_allowed = True  # whether this instruction is allowed in a DXIL program
         self.oload_types = ""  # overload types if applicable
+        # Always call process_oload_types() after setting oload_types.
         self.fn_attr = ""  # attribute shorthands: rn=does not access memory,ro=only reads from memory,
         self.is_deriv = False  # whether this is some kind of derivative
         self.is_gradient = False  # whether this requires a gradient calculation
@@ -98,6 +125,9 @@ def __init__(self, name, **kwargs):
         self.is_reserved = self.dxil_class == "Reserved"
         self.shader_model_translated = ()  # minimum shader model required with translation by linker
         self.props = {}  # extra properties
+        self.num_oloads = 0  # number of overloads for this instruction
+        if self.is_dxil_op:
+            self.process_oload_types()
 
     def __str__(self):
         return self.name
@@ -105,6 +135,127 @@ def __str__(self):
     def fully_qualified_name(self):
         return "{}::{}".format(self.fully_qualified_name_prefix, self.name)
 
+    def process_oload_types(self):
+        if type(self.oload_types) is not str:
+            raise ValueError(
+                f"overload for '{self.name}' should be a string - use empty if n/a"
+            )
+        # Early out for LLVM instructions
+        if not self.is_dxil_op:
+            return
+
+        self.num_oloads = 0
+
+        # Early out for void overloads.
+        if self.oload_types == "v":
+            return
+
+        if self.oload_types == "":
+            raise ValueError(
+                f"overload for '{self.name}' should not be empty - use void if n/a"
+            )
+        if "v" in self.oload_types:
+            raise ValueError(
+                f"void overload should be exclusive to other types for '({self.name})'"
+            )
+
+        # Process oload_types for extended and vector overloads.
+        # Contrived example: "hf<,<fd,i<1"
+        #   - "," splits multiple overload dimensions
+        #   - In the first overload dimension "hf<":
+        #     - "hf" means overloads for scalar half and float
+        #     - ending with "<" means vector overload supporting the same
+        #       components as defined for the scalar overload types.
+        #   - In the second overload dimension "<fd":
+        #     - starting with "<" means only vector overloads are supported.
+        #     - "fd" means the vector supports float or double components.
+        #   - In the third overload dimension "i<1":
+        #     - "i" means it supports a scalar i32 overload
+        #     - "<1" means it also supports a vector overload with an i1
+        #       component type.
+        oload_types = self.oload_types.split(",")
+        self.num_oloads = len(oload_types)
+        if self.num_oloads > dxil_max_overload_dims:
+            raise ValueError(
+                "Too many overload dimensions for DXIL op "
+                f"{self.name}: '{self.oload_types}'"
+            )
+
+        def check_duplicate_overloads(oloads):
+            if len(oloads) != len(set(oloads)):
+                raise ValueError(
+                    "Duplicate overload types specified for DXIL op "
+                    f"{self.name}: '{oloads}' in '{self.oload_types}'"
+                )
+
+        def check_overload_chars(oloads, valid_chars):
+            invalid_chars = set(oloads).difference(set(valid_chars))
+            if invalid_chars:
+                raise ValueError(
+                    "Invalid overload type character(s) used for DXIL op "
+                    f"{self.name}: '{invalid_chars}' in '{oloads}' from "
+                    f"'{self.oload_types}'"
+                )
+
+        for n, oloads in enumerate(oload_types):
+            if len(oloads) == 0:
+                raise ValueError(
+                    f"Invalid empty overload type for DXIL op "
+                    f"{self.name}: '{self.oload_types}'"
+                )
+            check_overload_chars(oloads, dxil_all_user_oload_chars)
+
+            # split at vector for component overloads, if vector specified
+            # without following components, use the scalar overloads that
+            # precede the vector character.
+            split = oloads.split("<")
+            if len(split) == 1:
+                # No vector overload.
+                continue
+            elif len(split) != 2:
+                raise ValueError(
+                    f"Invalid vector overload for DXIL op {self.name}: "
+                    f"{oloads} in '{self.oload_types}'"
+                )
+
+            # Split into scalar and vector component overloads.
+            scalars, vector_oloads = split
+            check_duplicate_overloads(scalars)
+            if not vector_oloads:
+                vector_oloads = scalars
+            else:
+                check_duplicate_overloads(vector_oloads)
+            if not vector_oloads:
+                raise ValueError(
+                    "No scalar overload types provided with vector overload "
+                    f"for DXIL op {self.name}: '{self.oload_types}'"
+                )
+            check_overload_chars(vector_oloads, dxil_scalar_oload_chars)
+            oload_types[n] = scalars + "<" + vector_oloads
+        # Reconstruct overload string with default vector overloads.
+        self.oload_types = ",".join(oload_types)
+        self.check_extended_oload_ops()
+
+    def check_extended_oload_ops(self):
+        "Ensure ops has sequential extended overload references with $x0, $x1, etc."
+        if self.num_oloads < 2:
+            return
+        next_oload_idx = 0
+        for i in self.ops:
+            if i.llvm_type.startswith("$x"):
+                if i.llvm_type != "$x" + str(next_oload_idx):
+                    raise ValueError(
+                        "Extended overloads are not sequentially referenced in "
+                        f"DXIL op {self.name}: {i.llvm_type} != $x{next_oload_idx}"
+                    )
+                next_oload_idx += 1
+        if next_oload_idx != self.num_oloads:
+            raise ValueError(
+                "Extended overloads are not referenced for all overload "
+                f"dimensions in DXIL op {self.name}: {next_oload_idx} != "
+                f"{self.num_oloads}"
+            )
+
 
 class db_dxil_metadata(object):
     "A representation for a metadata record"
@@ -328,7 +479,7 @@ def populate_categories_and_models(self):
             self.name_idx[i].category = "Dot"
         for (
             i
-        ) in "CreateHandle,CBufferLoad,CBufferLoadLegacy,TextureLoad,TextureStore,TextureStoreSample,BufferLoad,BufferStore,BufferUpdateCounter,CheckAccessFullyMapped,GetDimensions,RawBufferLoad,RawBufferStore".split(
+        ) in "CreateHandle,CBufferLoad,CBufferLoadLegacy,TextureLoad,TextureStore,TextureStoreSample,BufferLoad,BufferStore,BufferUpdateCounter,CheckAccessFullyMapped,GetDimensions,RawBufferLoad,RawBufferStore,RawBufferVectorLoad,RawBufferVectorStore".split(
             ","
         ):
             self.name_idx[i].category = "Resources"
@@ -455,6 +606,8 @@ def populate_categories_and_models(self):
         for i in "RawBufferLoad,RawBufferStore".split(","):
             self.name_idx[i].shader_model = 6, 2
             self.name_idx[i].shader_model_translated = 6, 0
+        for i in "RawBufferVectorLoad,RawBufferVectorStore".split(","):
+            self.name_idx[i].shader_model = 6, 9
         for i in "DispatchRaysIndex,DispatchRaysDimensions".split(","):
             self.name_idx[i].category = "Ray Dispatch Arguments"
             self.name_idx[i].shader_model = 6, 3
@@ -477,9 +630,7 @@ def populate_categories_and_models(self):
                 "closesthit",
             )
         for i in "GeometryIndex".split(","):
-            self.name_idx[
-                i
-            ].category = (
+            self.name_idx[i].category = (
                 "Raytracing object space uint System Values, raytracing tier 1.1"
             )
             self.name_idx[i].shader_model = 6, 5
@@ -574,9 +725,7 @@ def populate_categories_and_models(self):
             self.name_idx[i].shader_model = 6, 3
             self.name_idx[i].shader_stages = ("library", "intersection")
         for i in "CreateHandleForLib".split(","):
-            self.name_idx[
-                i
-            ].category = (
+            self.name_idx[i].category = (
                 "Library create handle from resource struct (like HL intrinsic)"
             )
             self.name_idx[i].shader_model = 6, 3
@@ -699,6 +848,31 @@ def populate_categories_and_models(self):
             self.name_idx[i].category = "Extended Command Information"
             self.name_idx[i].shader_stages = ("vertex",)
             self.name_idx[i].shader_model = 6, 8
+        for i in (
+            "HitObject_MakeMiss,HitObject_MakeNop"
+            + ",HitObject_TraceRay,HitObject_Invoke"
+            + ",HitObject_FromRayQuery,HitObject_FromRayQueryWithAttrs"
+            + ",HitObject_IsMiss,HitObject_IsHit,HitObject_IsNop"
+            + ",HitObject_RayFlags,HitObject_RayTMin,HitObject_RayTCurrent,HitObject_GeometryIndex,HitObject_InstanceIndex,HitObject_InstanceID,HitObject_PrimitiveIndex,HitObject_HitKind,HitObject_ShaderTableIndex"
+            + ",HitObject_WorldRayOrigin,HitObject_WorldRayDirection,HitObject_ObjectRayOrigin,HitObject_ObjectRayDirection"
+            + ",HitObject_ObjectToWorld3x4,HitObject_WorldToObject3x4"
+            + ",HitObject_SetShaderTableIndex,HitObject_LoadLocalRootTableConstant,HitObject_Attributes"
+        ).split(","):
+            self.name_idx[i].category = "Shader Execution Reordering"
+            self.name_idx[i].shader_model = 6, 9
+            self.name_idx[i].shader_stages = (
+                "library",
+                "raygeneration",
+                "closesthit",
+                "miss",
+            )
+        for i in ("MaybeReorderThread").split(","):
+            self.name_idx[i].category = "Shader Execution Reordering"
+            self.name_idx[i].shader_model = 6, 9
+            self.name_idx[i].shader_stages = (
+                "library",
+                "raygeneration",
+            )
 
     def populate_llvm_instructions(self):
         # Add instructions that map to LLVM instructions.
@@ -1175,6 +1349,37 @@ def populate_llvm_instructions(self):
         self.add_llvm_instr(
             "OTHER", 53, "VAArg", "VAArgInst", "vaarg instruction", "", []
         )
+
+        self.add_llvm_instr(
+            "OTHER",
+            54,
+            "ExtractElement",
+            "ExtractElementInst",
+            "extracts from vector",
+            "",
+            [],
+        )
+
+        self.add_llvm_instr(
+            "OTHER",
+            55,
+            "InsertElement",
+            "InsertElementInst",
+            "inserts into vector",
+            "",
+            [],
+        )
+
+        self.add_llvm_instr(
+            "OTHER",
+            56,
+            "ShuffleVector",
+            "ShuffleVectorInst",
+            "Shuffle two vectors",
+            "",
+            [],
+        )
+
         self.add_llvm_instr(
             "OTHER",
             57,
@@ -1314,7 +1519,7 @@ def UFI(name, **mappings):
                 next_op_idx,
                 "Unary",
                 "returns the " + i,
-                "hfd",
+                "hfd<",
                 "rn",
                 [
                     db_dxil_param(0, "$o", "", "operation result"),
@@ -1348,7 +1553,7 @@ def UFI(name, **mappings):
                 next_op_idx,
                 "Unary",
                 "returns the " + i,
-                "hf",
+                "hf<",
                 "rn",
                 [
                     db_dxil_param(0, "$o", "", "operation result"),
@@ -1365,7 +1570,7 @@ def UFI(name, **mappings):
                 next_op_idx,
                 "Unary",
                 "returns the reverse bit pattern of the input value",
-                "wil",
+                "wil<",
                 "rn",
                 [
                     db_dxil_param(0, "$o", "", "operation result"),
@@ -1412,7 +1617,7 @@ def UFI(name, **mappings):
                 next_op_idx,
                 "Binary",
                 "returns the " + i + " of the input values",
-                "hfd",
+                "hfd<",
                 "rn",
                 [
                     db_dxil_param(0, "$o", "", "operation result"),
@@ -1430,7 +1635,7 @@ def UFI(name, **mappings):
                 next_op_idx,
                 "Binary",
                 "returns the " + i + " of the input values",
-                "wil",
+                "wil<",
                 "rn",
                 [
                     db_dxil_param(0, "$o", "", "operation result"),
@@ -1485,7 +1690,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Tertiary",
             "performs a fused multiply add (FMA) of the form a * b + c",
-            "hfd",
+            "hfd<",
             "rn",
             [
                 db_dxil_param(
@@ -1502,7 +1707,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Tertiary",
             "performs a fused multiply add (FMA) of the form a * b + c",
-            "d",
+            "d<",
             "rn",
             [
                 db_dxil_param(
@@ -1526,7 +1731,7 @@ def UFI(name, **mappings):
                 next_op_idx,
                 "Tertiary",
                 "performs an integral " + i,
-                "wil",
+                "wil<",
                 "rn",
                 [
                     db_dxil_param(0, "$o", "", "the operation result"),
@@ -2419,7 +2624,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Unary",
             "computes the rate of change of components per stamp",
-            "hf",
+            "hf<",
             "rn",
             [
                 db_dxil_param(
@@ -2437,7 +2642,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Unary",
             "computes the rate of change of components per stamp",
-            "hf",
+            "hf<",
             "rn",
             [
                 db_dxil_param(
@@ -2455,7 +2660,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Unary",
             "computes the rate of change of components per pixel",
-            "hf",
+            "hf<",
             "rn",
             [
                 db_dxil_param(
@@ -2473,7 +2678,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Unary",
             "computes the rate of change of components per pixel",
-            "hf",
+            "hf<",
             "rn",
             [
                 db_dxil_param(
@@ -5550,79 +5755,655 @@ def UFI(name, **mappings):
         next_op_idx = self.reserve_dxil_op_range("ReservedA", next_op_idx, 3)
 
         # Shader Execution Reordering
-        next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 31)
+        self.add_dxil_op(
+            "HitObject_TraceRay",
+            next_op_idx,
+            "HitObject_TraceRay",
+            "Analogous to TraceRay but without invoking CH/MS and returns the intermediate state as a HitObject",
+            "u",
+            "",
+            [
+                db_dxil_param(0, "hit_object", "", "Resulting HitObject"),
+                db_dxil_param(
+                    2,
+                    "res",
+                    "accelerationStructure",
+                    "Top-level acceleration structure to use",
+                ),
+                db_dxil_param(
+                    3,
+                    "i32",
+                    "rayFlags",
+                    "Valid combination of Ray_flags",
+                ),
+                db_dxil_param(
+                    4,
+                    "i32",
+                    "instanceInclusionMask",
+                    "Bottom 8 bits of InstanceInclusionMask are used to include/reject geometry instances based on the InstanceMask in each instance: if(!((InstanceInclusionMask & InstanceMask) & 0xff)) { ignore intersection }",
+                ),
+                db_dxil_param(
+                    5,
+                    "i32",
+                    "rayContributionToHitGroupIndex",
+                    "Offset to add into Addressing calculations within shader tables for hit group indexing.  Only the bottom 4 bits of this value are used",
+                ),
+                db_dxil_param(
+                    6,
+                    "i32",
+                    "multiplierForGeometryContributionToHitGroupIndex",
+                    "Stride to multiply by per-geometry GeometryContributionToHitGroupIndex in Addressing calculations within shader tables for hit group indexing.  Only the bottom 4 bits of this value are used",
+                ),
+                db_dxil_param(
+                    7,
+                    "i32",
+                    "missShaderIndex",
+                    "Miss shader index in Addressing calculations within shader tables.  Only the bottom 16 bits of this value are used",
+                ),
+                db_dxil_param(8, "f", "Origin_X", "Origin x of the ray"),
+                db_dxil_param(9, "f", "Origin_Y", "Origin y of the ray"),
+                db_dxil_param(10, "f", "Origin_Z", "Origin z of the ray"),
+                db_dxil_param(11, "f", "TMin", "Tmin of the ray"),
+                db_dxil_param(12, "f", "Direction_X", "Direction x of the ray"),
+                db_dxil_param(13, "f", "Direction_Y", "Direction y of the ray"),
+                db_dxil_param(14, "f", "Direction_Z", "Direction z of the ray"),
+                db_dxil_param(15, "f", "TMax", "Tmax of the ray"),
+                db_dxil_param(
+                    16,
+                    "udt",
+                    "payload",
+                    "User-defined payload structure",
+                ),
+            ],
+        )
+        next_op_idx += 1
 
-        # Reserved block C
-        next_op_idx = self.reserve_dxil_op_range("ReservedC", next_op_idx, 10)
+        self.add_dxil_op(
+            "HitObject_FromRayQuery",
+            next_op_idx,
+            "HitObject_FromRayQuery",
+            "Creates a new HitObject representing a committed hit from a RayQuery",
+            "v",
+            "ro",
+            [
+                db_dxil_param(
+                    0, "hit_object", "", "HitObject created from RayQuery object"
+                ),
+                db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
+            ],
+        )
+        next_op_idx += 1
 
-        # Set interesting properties.
-        self.build_indices()
-        for (
-            i
-        ) in "CalculateLOD,DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY,Sample,SampleBias,SampleCmp,SampleCmpBias".split(
-            ","
-        ):
-            self.name_idx[i].is_gradient = True
-        for i in "DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY".split(","):
-            assert (
-                self.name_idx[i].is_gradient == True
-            ), "all derivatives are marked as requiring gradients"
-            self.name_idx[i].is_deriv = True
+        self.add_dxil_op(
+            "HitObject_FromRayQueryWithAttrs",
+            next_op_idx,
+            "HitObject_FromRayQueryWithAttrs",
+            "Creates a new HitObject representing a committed hit from a RayQuery and committed attributes",
+            "u",
+            "ro",
+            [
+                db_dxil_param(
+                    0, "hit_object", "", "HitObject created from RayQuery object"
+                ),
+                db_dxil_param(2, "i32", "rayQueryHandle", "RayQuery handle"),
+                db_dxil_param(
+                    3,
+                    "i32",
+                    "HitKind",
+                    "User-specified value in range of 0-127 to identify the type of hit",
+                ),
+                db_dxil_param(4, "udt", "CommittedAttribs", "Committed attributes"),
+            ],
+        )
+        next_op_idx += 1
 
-        # TODO - some arguments are required to be immediate constants in DXIL, eg resource kinds; add this information
-        # consider - report instructions that are overloaded on a single type, then turn them into non-overloaded version of that type
-        self.verify_dense(
-            self.get_dxil_insts(), lambda x: x.dxil_opid, lambda x: x.name
+        self.add_dxil_op(
+            "HitObject_MakeMiss",
+            next_op_idx,
+            "HitObject_MakeMiss",
+            "Creates a new HitObject representing a miss",
+            "v",
+            "rn",
+            [
+                db_dxil_param(0, "hit_object", "", "HitObject with a committed miss"),
+                db_dxil_param(2, "i32", "RayFlags", "ray flags"),
+                db_dxil_param(3, "i32", "MissShaderIndex", "Miss shader index"),
+                db_dxil_param(4, "f", "Origin_X", "Origin x of the ray"),
+                db_dxil_param(5, "f", "Origin_Y", "Origin y of the ray"),
+                db_dxil_param(6, "f", "Origin_Z", "Origin z of the ray"),
+                db_dxil_param(7, "f", "TMin", "Tmin of the ray"),
+                db_dxil_param(8, "f", "Direction_X", "Direction x of the ray"),
+                db_dxil_param(9, "f", "Direction_Y", "Direction y of the ray"),
+                db_dxil_param(10, "f", "Direction_Z", "Direction z of the ray"),
+                db_dxil_param(11, "f", "TMax", "Tmax of the ray"),
+            ],
         )
-        for i in self.instr:
-            self.verify_dense(i.ops, lambda x: x.pos, lambda x: i.name)
-        for i in self.instr:
-            if i.is_dxil_op:
-                assert i.oload_types != "", (
-                    "overload for DXIL operation %s should not be empty - use void if n/a"
-                    % (i.name)
-                )
-                assert i.oload_types == "v" or i.oload_types.find("v") < 0, (
-                    "void overload should be exclusive to other types (%s)" % i.name
-                )
-            assert (
-                type(i.oload_types) is str
-            ), "overload for %s should be a string - use empty if n/a" % (i.name)
+        next_op_idx += 1
 
-        # Verify that all operations in each class have the same signature.
-        import itertools
+        self.add_dxil_op(
+            "HitObject_MakeNop",
+            next_op_idx,
+            "HitObject_MakeNop",
+            "Creates an empty nop HitObject",
+            "v",
+            "rn",
+            [db_dxil_param(0, "hit_object", "", "Empty nop HitObject")],
+        )
+        next_op_idx += 1
 
-        class_sort_func = lambda x, y: x < y
-        class_key_func = lambda x: x.dxil_class
-        instr_ordered_by_class = sorted(
-            [i for i in self.instr if i.is_dxil_op], key=class_key_func
+        self.add_dxil_op(
+            "HitObject_Invoke",
+            next_op_idx,
+            "HitObject_Invoke",
+            "Represents the invocation of the CH/MS shader represented by the HitObject",
+            "u",
+            "",
+            [
+                retvoid_param,
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(
+                    3,
+                    "udt",
+                    "payload",
+                    "User-defined payload structure",
+                ),
+            ],
         )
-        instr_grouped_by_class = itertools.groupby(
-            instr_ordered_by_class, key=class_key_func
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "MaybeReorderThread",
+            next_op_idx,
+            "MaybeReorderThread",
+            "Reorders the current thread",
+            "v",
+            "",
+            [
+                retvoid_param,
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(3, "i32", "coherenceHint", "Coherence hint"),
+                db_dxil_param(
+                    4,
+                    "i32",
+                    "numCoherenceHintBitsFromLSB",
+                    "Num coherence hint bits from LSB",
+                ),
+            ],
         )
+        next_op_idx += 1
 
-        def calc_oload_sig(inst):
-            result = ""
-            for o in inst.ops:
-                result += o.llvm_type
-            return result
+        self.add_dxil_op(
+            "HitObject_IsMiss",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns `true` if the HitObject represents a miss",
+            "1",
+            "rn",
+            [
+                db_dxil_param(0, "i1", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
 
-        for k, g in instr_grouped_by_class:
-            group = list(g)
-            if len(group) > 1:
-                first = group[0]
-                first_group = calc_oload_sig(first)
-                for other in group[1:]:
-                    other_group = calc_oload_sig(other)
-                    # TODO: uncomment assert when opcodes are fixed
-                    # assert first_group == other_group, "overload signature %s for instruction %s differs from %s in %s" % (first.name, first_group, other.name, other_group)
+        self.add_dxil_op(
+            "HitObject_IsHit",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns `true` if the HitObject is a NOP-HitObject",
+            "1",
+            "rn",
+            [
+                db_dxil_param(0, "i1", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
 
-    def populate_extended_docs(self):
-        "Update the documentation with text from external files."
-        inst_starter = "* Inst: "
-        block_starter = "* BLOCK-BEGIN"
-        block_end = "* BLOCK-END"
-        thisdir = os.path.dirname(os.path.realpath(__file__))
+        self.add_dxil_op(
+            "HitObject_IsNop",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns `true` if the HitObject represents a nop",
+            "1",
+            "rn",
+            [
+                db_dxil_param(0, "i1", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_RayFlags",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the ray flags set in the HitObject",
+            "i",
+            "rn",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_RayTMin",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the TMin value set in the HitObject",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_RayTCurrent",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the current T value set in the HitObject",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_WorldRayOrigin",
+            next_op_idx,
+            "HitObject_StateVector",
+            "Returns the ray origin in world space",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_WorldRayDirection",
+            next_op_idx,
+            "HitObject_StateVector",
+            "Returns the ray direction in world space",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_ObjectRayOrigin",
+            next_op_idx,
+            "HitObject_StateVector",
+            "Returns the ray origin in object space",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_ObjectRayDirection",
+            next_op_idx,
+            "HitObject_StateVector",
+            "Returns the ray direction in object space",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(3, "i32", "component", "component [0..2]", is_const=True),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_ObjectToWorld3x4",
+            next_op_idx,
+            "HitObject_StateMatrix",
+            "Returns the object to world space transformation matrix in 3x4 form",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(
+                    3,
+                    "i32",
+                    "row",
+                    "row [0..2], , relative to the element",
+                    is_const=True,
+                ),
+                db_dxil_param(
+                    4,
+                    "i32",
+                    "col",
+                    "column [0..3], relative to the element",
+                    is_const=True,
+                ),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_WorldToObject3x4",
+            next_op_idx,
+            "HitObject_StateMatrix",
+            "Returns the world to object space transformation matrix in 3x4 form",
+            "f",
+            "rn",
+            [
+                db_dxil_param(0, "f", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(
+                    3,
+                    "i32",
+                    "row",
+                    "row [0..2], relative to the element",
+                    is_const=True,
+                ),
+                db_dxil_param(
+                    4,
+                    "i32",
+                    "col",
+                    "column [0..3], relative to the element",
+                    is_const=True,
+                ),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_GeometryIndex",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the geometry index committed on hit",
+            "i",
+            "rn",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_InstanceIndex",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the instance index committed on hit",
+            "i",
+            "rn",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_InstanceID",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the instance id committed on hit",
+            "i",
+            "rn",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_PrimitiveIndex",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the primitive index committed on hit",
+            "i",
+            "rn",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_HitKind",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the HitKind of the hit",
+            "i",
+            "rn",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_ShaderTableIndex",
+            next_op_idx,
+            "HitObject_StateScalar",
+            "Returns the shader table index set for this HitObject",
+            "i",
+            "rn",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_SetShaderTableIndex",
+            next_op_idx,
+            "HitObject_SetShaderTableIndex",
+            "Returns a HitObject with updated shader table index",
+            "v",
+            "rn",
+            [
+                db_dxil_param(
+                    0, "hit_object", "hitObject", "hit with shader table index set"
+                ),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(3, "i32", "shaderTableIndex", "shader table index"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_LoadLocalRootTableConstant",
+            next_op_idx,
+            "HitObject_LoadLocalRootTableConstant",
+            "Returns the root table constant for this HitObject and offset",
+            "v",
+            "ro",
+            [
+                db_dxil_param(0, "i32", "", "operation result"),
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(3, "i32", "offset", "offset"),
+            ],
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "HitObject_Attributes",
+            next_op_idx,
+            "HitObject_Attributes",
+            "Returns the attributes set for this HitObject",
+            "u",
+            "amo",
+            [
+                retvoid_param,
+                db_dxil_param(2, "hit_object", "hitObject", "hit"),
+                db_dxil_param(
+                    3, "udt", "attributes", "pointer to store the attributes to"
+                ),
+            ],
+        )
+        next_op_idx += 1
+
+        next_op_idx = self.reserve_dxil_op_range("ReservedB", next_op_idx, 3, 28)
+
+        # Reserved block C
+        next_op_idx = self.reserve_dxil_op_range("ReservedC", next_op_idx, 10)
+
+        # Long Vectors
+        self.add_dxil_op(
+            "RawBufferVectorLoad",
+            next_op_idx,
+            "RawBufferVectorLoad",
+            "reads from a raw buffer and structured buffer",
+            "hfwidl<",
+            "ro",
+            [
+                db_dxil_param(0, "$r", "", "the loaded value"),
+                db_dxil_param(2, "res", "buf", "handle of Raw Buffer to load from"),
+                db_dxil_param(
+                    3,
+                    "i32",
+                    "index",
+                    "element index for StructuredBuffer, or byte offset for ByteAddressBuffer",
+                ),
+                db_dxil_param(
+                    4,
+                    "i32",
+                    "elementOffset",
+                    "offset into element for StructuredBuffer, or undef for ByteAddressBuffer",
+                ),
+                db_dxil_param(
+                    5,
+                    "i32",
+                    "alignment",
+                    "relative load access alignment",
+                    is_const=True,
+                ),
+            ],
+            counters=("tex_load",),
+        )
+        next_op_idx += 1
+
+        self.add_dxil_op(
+            "RawBufferVectorStore",
+            next_op_idx,
+            "RawBufferVectorStore",
+            "writes to a RWByteAddressBuffer or RWStructuredBuffer",
+            "hfwidl<",
+            "",
+            [
+                db_dxil_param(0, "v", "", ""),
+                db_dxil_param(2, "res", "uav", "handle of UAV to store to"),
+                db_dxil_param(
+                    3,
+                    "i32",
+                    "index",
+                    "element index for StructuredBuffer, or byte offset for ByteAddressBuffer",
+                ),
+                db_dxil_param(
+                    4,
+                    "i32",
+                    "elementOffset",
+                    "offset into element for StructuredBuffer, or undef for ByteAddressBuffer",
+                ),
+                db_dxil_param(5, "$o", "value0", "value"),
+                db_dxil_param(
+                    6,
+                    "i32",
+                    "alignment",
+                    "relative store access alignment",
+                    is_const=True,
+                ),
+            ],
+            counters=("tex_store",),
+        )
+        next_op_idx += 1
+
+        # End of DXIL 1.9 opcodes.
+        # NOTE!! Update and uncomment when DXIL 1.9 opcodes are finalized:
+        # self.set_op_count_for_version(1, 9, next_op_idx)
+        # assert next_op_idx == NNN, (
+        #    "NNN is expected next operation index but encountered %d and thus opcodes are broken"
+        #    % next_op_idx
+        # )
+
+        # Set interesting properties.
+        self.build_indices()
+        for (
+            i
+        ) in "CalculateLOD,DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY,Sample,SampleBias,SampleCmp,SampleCmpBias".split(
+            ","
+        ):
+            self.name_idx[i].is_gradient = True
+        for i in "DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY".split(","):
+            assert (
+                self.name_idx[i].is_gradient == True
+            ), "all derivatives are marked as requiring gradients"
+            self.name_idx[i].is_deriv = True
+
+        # TODO - some arguments are required to be immediate constants in DXIL, eg resource kinds; add this information
+        # consider - report instructions that are overloaded on a single type, then turn them into non-overloaded version of that type
+        self.verify_dense(
+            self.get_dxil_insts(), lambda x: x.dxil_opid, lambda x: x.name
+        )
+        for i in self.instr:
+            self.verify_dense(i.ops, lambda x: x.pos, lambda x: i.name)
+
+        # Verify that all operations in each class have the same signature.
+        import itertools
+
+        class_sort_func = lambda x, y: x < y
+        class_key_func = lambda x: x.dxil_class
+        instr_ordered_by_class = sorted(
+            [i for i in self.instr if i.is_dxil_op], key=class_key_func
+        )
+        instr_grouped_by_class = itertools.groupby(
+            instr_ordered_by_class, key=class_key_func
+        )
+
+        def calc_oload_sig(inst):
+            result = ""
+            for o in inst.ops:
+                result += o.llvm_type
+            return result
+
+        for k, g in instr_grouped_by_class:
+            group = list(g)
+            if len(group) > 1:
+                first = group[0]
+                first_group = calc_oload_sig(first)
+                for other in group[1:]:
+                    other_group = calc_oload_sig(other)
+                    # TODO: uncomment assert when opcodes are fixed
+                    # assert first_group == other_group, "overload signature %s for instruction %s differs from %s in %s" % (first.name, first_group, other.name, other_group)
+
+    def populate_extended_docs(self):
+        "Update the documentation with text from external files."
+        inst_starter = "* Inst: "
+        block_starter = "* BLOCK-BEGIN"
+        block_end = "* BLOCK-END"
+        thisdir = os.path.dirname(os.path.realpath(__file__))
         with open(os.path.join(thisdir, "hctdb_inst_docs.txt")) as ops_file:
             inst_name = ""
             inst_doc = ""
@@ -6049,6 +6830,12 @@ def add_pass(name, type_name, doc, opts):
             "HLSL DXIL Logs all non-RayGen DXR 1.0 invocations into a UAV",
             [{"n": "maxNumEntriesInLog", "t": "int", "c": 1}],
         )
+        add_pass(
+            "hlsl-dxil-non-uniform-resource-index-instrumentation",
+            "DxilNonUniformResourceIndexInstrumentation",
+            "HLSL DXIL NonUniformResourceIndex instrumentation for PIX",
+            [],
+        )
 
         category_lib = "dxil_gen"
 
@@ -6174,6 +6961,12 @@ def add_pass(name, type_name, doc, opts):
             "DXIL Lower createHandleForLib",
             [],
         )
+        add_pass(
+            "hlsl-dxil-scalarize-vector-load-stores",
+            "DxilScalarizeVectorLoadStores",
+            "DXIL scalarize vector load/stores",
+            [],
+        )
         add_pass(
             "hlsl-dxil-cleanup-dynamic-resource-handle",
             "DxilCleanupDynamicResourceHandle",
@@ -7396,11 +8189,15 @@ def build_valrules(self):
         )
         self.add_valrule(
             "Instr.CoordinateCountForRawTypedBuf",
-            "raw/typed buffer don't need 2 coordinates.",
+            "raw/typed buffer offset must be undef.",
+        )
+        self.add_valrule(
+            "Instr.ConstAlignForRawBuf",
+            "Raw Buffer alignment value must be a constant.",
         )
         self.add_valrule(
             "Instr.CoordinateCountForStructBuf",
-            "structured buffer require 2 coordinates.",
+            "structured buffer requires defined index and offset coordinates.",
         )
         self.add_valrule(
             "Instr.MipLevelForGetDimension",
@@ -7496,6 +8293,16 @@ def build_valrules(self):
             "Invalid use of completed record handle.",
         )
 
+        # Shader Execution Reordering
+        self.add_valrule(
+            "Instr.UndefHitObject",
+            "HitObject is undef.",
+        )
+        self.add_valrule(
+            "Instr.MayReorderThreadUndefCoherenceHintParam",
+            "Use of undef coherence hint or num coherence hint bits in MaybeReorderThread.",
+        )
+
         # Some legacy rules:
         # - space is only supported for shader targets 5.1 and higher
         # - multiple rules regarding derivatives, which isn't a supported feature for DXIL
@@ -8145,10 +8952,12 @@ def add_dxil_op_reserved(self, name, code_id):
         )
         self.instr.append(i)
 
-    def reserve_dxil_op_range(self, group_name, start_id, count):
+    def reserve_dxil_op_range(self, group_name, start_id, count, start_reserved_id=0):
         "Reserve a range of dxil opcodes for future use; returns next id"
         for i in range(0, count):
-            self.add_dxil_op_reserved("{0}{1}".format(group_name, i), start_id + i)
+            self.add_dxil_op_reserved(
+                "{0}{1}".format(group_name, start_reserved_id + i), start_id + i
+            )
         return start_id + count
 
     def get_instr_by_llvm_name(self, llvm_name):
@@ -8208,6 +9017,9 @@ def __init__(
         unsigned_op,
         overload_idx,
         hidden,
+        min_shader_model,
+        static_member,
+        class_prefix,
     ):
         self.name = name  # Function name
         self.idx = idx  # Unique number within namespace
@@ -8216,14 +9028,27 @@ def __init__(
         self.ns = ns  # Function namespace
         self.ns_idx = ns_idx  # Namespace index
         self.doc = doc  # Documentation
-        id_prefix = "IOP" if ns == "Intrinsics" else "MOP"
+        id_prefix = "IOP" if ns.endswith("Intrinsics") else "MOP"
+
+        class_name = None
+        if ns.endswith("Methods"):
+            class_name = ns[0 : -len("Methods")]
+
         # SPIR-V Change Starts
         if ns == "VkIntrinsics":
             name = "Vk" + name
             self.name = "Vk" + self.name
             id_prefix = "IOP"
         # SPIR-V Change Ends
-        self.enum_name = "%s_%s" % (id_prefix, name)  # enum name
+        if ns.startswith("Dx"):
+            if not class_prefix:
+                name = "Dx" + name
+            self.name = name
+
+        if class_prefix:
+            self.enum_name = "%s_%s_%s" % (id_prefix, class_name, name)
+        else:
+            self.enum_name = "%s_%s" % (id_prefix, name)
         self.readonly = ro  # Only read memory
         self.readnone = rn  # Not read memory
         self.argmemonly = amo  # Only accesses memory through argument pointers
@@ -8235,6 +9060,13 @@ def __init__(
             overload_idx  # Parameter determines the overload type, -1 means ret type
         )
         self.hidden = hidden  # Internal high-level op, not exposed to HLSL
+        # Encoded minimum shader model for this intrinsic
+        self.min_shader_model = 0
+        if min_shader_model:
+            self.min_shader_model = (min_shader_model[0] << 4) | (
+                min_shader_model[1] & 0x0F
+            )
+        self.static_member = static_member  # HLSL static member function
         self.key = (
             ("%3d" % ns_idx)
             + "!"
@@ -8247,6 +9079,8 @@ def __init__(
         self.vulkanSpecific = ns.startswith(
             "Vk"
         )  # Vulkan specific intrinsic - SPIRV change
+        self.opcode = None  # high-level opcode assigned later
+        self.unsigned_opcode = None  # unsigned high-level opcode if appicable
 
 
 class db_hlsl_namespace(object):
@@ -8292,7 +9126,7 @@ def __init__(
 class db_hlsl(object):
     "A database of HLSL language data"
 
-    def __init__(self, intrinsic_defs):
+    def __init__(self, intrinsic_defs, opcode_data):
         self.base_types = {
             "bool": "LICOMPTYPE_BOOL",
             "int": "LICOMPTYPE_INT",
@@ -8347,6 +9181,9 @@ def __init__(self, intrinsic_defs):
             "AnyNodeOutputRecord": "LICOMPTYPE_ANY_NODE_OUTPUT_RECORD",
             "GroupNodeOutputRecords": "LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS",
             "ThreadNodeOutputRecords": "LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS",
+            "DxHitObject": "LICOMPTYPE_HIT_OBJECT",
+            "VkBufferPointer": "LICOMPTYPE_VK_BUFFER_POINTER",
+            "RayQuery": "LICOMPTYPE_RAY_QUERY",
         }
 
         self.trans_rowcol = {"r": "IA_R", "c": "IA_C", "r2": "IA_R2", "c2": "IA_C2"}
@@ -8365,6 +9202,13 @@ def __init__(self, intrinsic_defs):
         self.populate_attributes()
         self.opcode_namespace = "hlsl::IntrinsicOp"
 
+        # Populate opcode data for HLSL intrinsics.
+        self.opcode_data = opcode_data
+        # If opcode data is empty, create the default structure.
+        if not self.opcode_data:
+            self.opcode_data["IntrinsicOpCodes"] = {"Num_Intrinsics": 0}
+        self.assign_opcodes()
+
     def create_namespaces(self):
         last_ns = None
         self.namespaces = {}
@@ -8399,9 +9243,10 @@ def load_intrinsics(self, intrinsic_defs):
             r"""(
             sampler\w* | string |
             (?:RW)?(?:Texture\w*|ByteAddressBuffer) |
-            acceleration_struct | ray_desc |
+            acceleration_struct | ray_desc | RayQuery | DxHitObject |
             Node\w* | RWNode\w* | EmptyNode\w* |
-            AnyNodeOutput\w* | NodeOutputRecord\w* | GroupShared\w*
+            AnyNodeOutput\w* | NodeOutputRecord\w* | GroupShared\w* |
+            VkBufferPointer
             $)""",
             flags=re.VERBOSE,
         )
@@ -8453,6 +9298,10 @@ def process_arg(desc, idx, done_args, intrinsic_name):
                 template_id = "-3"
                 component_id = "0"
                 type_name = "void"
+            elif type_name == "$funcT2":
+                template_id = "-4"
+                component_id = "0"
+                type_name = "void"
             elif type_name == "...":
                 assert idx != 0, "'...' can only be used in the parameter list"
                 template_id = "-2"
@@ -8581,6 +9430,8 @@ def do_object(m):
                 template_id = "INTRIN_TEMPLATE_VARARGS"
             elif template_id == "-3":
                 template_id = "INTRIN_TEMPLATE_FROM_FUNCTION"
+            elif template_id == "-4":
+                template_id = "INTRIN_TEMPLATE_FROM_FUNCTION_2"
             if component_id == "-1":
                 component_id = "INTRIN_COMPTYPE_FROM_TYPE_ELT0"
             if component_id == "-2":
@@ -8605,13 +9456,16 @@ def process_attr(attr):
             readonly = False  # Only read memory
             readnone = False  # Not read memory
             argmemonly = False  # Only reads memory through pointer arguments
+            static_member = False  # Static member function
             is_wave = False
+            class_prefix = False  # Insert class name as enum_prefix
             # Is wave-sensitive
             unsigned_op = ""  # Unsigned opcode if exist
             overload_param_index = (
                 -1
             )  # Parameter determines the overload type, -1 means ret type.
             hidden = False
+            min_shader_model = (0, 0)
             for a in attrs:
                 if a == "":
                     continue
@@ -8630,6 +9484,12 @@ def process_attr(attr):
                 if a == "hidden":
                     hidden = True
                     continue
+                if a == "static":
+                    static_member = True
+                    continue
+                if a == "class_prefix":
+                    class_prefix = True
+                    continue
 
                 assign = a.split("=")
 
@@ -8644,6 +9504,24 @@ def process_attr(attr):
                 if d == "overload":
                     overload_param_index = int(v)
                     continue
+                if d == "min_sm":
+                    # min_sm is a string like "6.0" or "6.5"
+                    # Convert to a tuple of integers (major, minor)
+                    try:
+                        major_minor = v.split(".")
+                        if len(major_minor) != 2:
+                            raise ValueError
+                        major, minor = major_minor
+                        major = int(major)
+                        minor = int(minor)
+                        # minor of 15 has special meaning, and larger values
+                        # cannot be encoded in the version DWORD.
+                        if major < 0 or minor < 0 or minor > 14:
+                            raise ValueError
+                        min_shader_model = (major, minor)
+                    except ValueError:
+                        assert False, "invalid min_sm: %s" % (v)
+                    continue
                 assert False, "invalid attr %s" % (a)
 
             return (
@@ -8654,6 +9532,9 @@ def process_attr(attr):
                 unsigned_op,
                 overload_param_index,
                 hidden,
+                min_shader_model,
+                static_member,
+                class_prefix,
             )
 
         current_namespace = None
@@ -8701,6 +9582,9 @@ def process_attr(attr):
                     unsigned_op,
                     overload_param_index,
                     hidden,
+                    min_shader_model,
+                    static_member,
+                    class_prefix,
                 ) = process_attr(attr)
                 # Add an entry for this intrinsic.
                 if bracket_cleanup_re.search(opts):
@@ -8717,6 +9601,8 @@ def process_attr(attr):
                 for in_arg in in_args:
                     args.append(process_arg(in_arg, arg_idx, args, name))
                     arg_idx += 1
+                if class_prefix:
+                    assert current_namespace.endswith("Methods")
                 # We have to process the return type description last
                 # to match the compiler's handling of it and allow
                 # the return type to match an input type.
@@ -8739,6 +9625,9 @@ def process_attr(attr):
                         unsigned_op,
                         overload_param_index,
                         hidden,
+                        min_shader_model,
+                        static_member,
+                        class_prefix,
                     )
                 )
                 num_entries += 1
@@ -8869,6 +9758,29 @@ def add_attr_arg(title_name, scope, args, doc):
         )
         self.attributes = attributes
 
+    # Iterate through all intrinsics, assigning opcodes to each one.
+    # This uses the opcode_data to preserve already-assigned opcodes.
+    def assign_opcodes(self):
+        "Assign opcodes to the intrinsics."
+        IntrinsicOpDict = self.opcode_data["IntrinsicOpCodes"]
+        Num_Intrinsics = self.opcode_data["IntrinsicOpCodes"]["Num_Intrinsics"]
+
+        def add_intrinsic(name):
+            nonlocal Num_Intrinsics
+            opcode = IntrinsicOpDict.setdefault(name, Num_Intrinsics)
+            if opcode == Num_Intrinsics:
+                Num_Intrinsics += 1
+            return opcode
+
+        sorted_intrinsics = sorted(self.intrinsics, key=lambda x: x.key)
+        for i in sorted_intrinsics:
+            i.opcode = add_intrinsic(i.enum_name)
+        for i in sorted_intrinsics:
+            if i.unsigned_op == "":
+                continue
+            i.unsigned_opcode = add_intrinsic(i.unsigned_op)
+        self.opcode_data["IntrinsicOpCodes"]["Num_Intrinsics"] = Num_Intrinsics
+
 
 if __name__ == "__main__":
     db = db_dxil()
diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py
index 17eefd4918..f0d8b0ebae 100644
--- a/utils/hct/hctdb_instrhelp.py
+++ b/utils/hct/hctdb_instrhelp.py
@@ -18,6 +18,29 @@ def get_db_dxil():
     return g_db_dxil
 
 
+# opcode data contains fixed opcode assignments for HLSL intrinsics.
+g_hlsl_opcode_data = None
+
+
+def get_hlsl_opcode_data():
+    global g_hlsl_opcode_data
+    if g_hlsl_opcode_data is None:
+        # Load the intrinsic opcodes from the JSON file.
+        json_filepath = os.path.join(
+            os.path.dirname(__file__), "hlsl_intrinsic_opcodes.json"
+        )
+        try:
+            with open(json_filepath, "r") as file:
+                g_hlsl_opcode_data = json.load(file)
+        except FileNotFoundError:
+            print(f"File not found: {json_filepath}")
+        except json.JSONDecodeError as e:
+            print(f"Error decoding JSON from {json_filepath}: {e}")
+        if not g_hlsl_opcode_data:
+            g_hlsl_opcode_data = {}
+    return g_hlsl_opcode_data
+
+
 g_db_hlsl = None
 
 
@@ -26,10 +49,14 @@ def get_db_hlsl():
     if g_db_hlsl is None:
         thisdir = os.path.dirname(os.path.realpath(__file__))
         with open(os.path.join(thisdir, "gen_intrin_main.txt"), "r") as f:
-            g_db_hlsl = db_hlsl(f)
+            g_db_hlsl = db_hlsl(f, get_hlsl_opcode_data())
     return g_db_hlsl
 
 
+def get_max_oload_dims():
+    return f"const unsigned kDxilMaxOloadDims = {dxil_max_overload_dims};"
+
+
 def format_comment(prefix, val):
     "Formats a value with a line-comment prefix."
     result = ""
@@ -486,26 +513,15 @@ def print_opfunc_props(self):
                 OP=self.OP
             )
         )
-        print(
-            "//   OpCode                       OpCode name,                OpCodeClass                    OpCodeClass name,              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj,  function attribute"
-        )
-        # Example formatted string:
-        #   {  OC::TempRegLoad,             "TempRegLoad",              OCC::TempRegLoad,              "tempRegLoad",                false,  true,  true, false,  true, false,  true,  true, false, Attribute::ReadOnly, },
-        # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
-        # 0         1         2         3         4         5         6         7         8         9         0         1         2         3         4         5         6         7         8         9         0
 
         last_category = None
-        # overload types are a string of (v)oid, (h)alf, (f)loat, (d)ouble, (1)-bit, (8)-bit, (w)ord, (i)nt, (l)ong, u(dt)
-        f = lambda i, c: "true" if i.oload_types.find(c) >= 0 else "false"
         lower_exceptions = {
             "CBufferLoad": "cbufferLoad",
             "CBufferLoadLegacy": "cbufferLoadLegacy",
             "GSInstanceID": "gsInstanceID",
         }
-        lower_fn = (
-            lambda t: lower_exceptions[t]
-            if t in lower_exceptions
-            else t[:1].lower() + t[1:]
+        lower_fn = lambda t: (
+            lower_exceptions[t] if t in lower_exceptions else t[:1].lower() + t[1:]
         )
         attr_dict = {
             "": "None",
@@ -516,35 +532,47 @@ def print_opfunc_props(self):
             "nr": "NoReturn",
             "wv": "None",
         }
-        attr_fn = lambda i: "Attribute::" + attr_dict[i.fn_attr] + ","
+        attr_fn = lambda i: "Attribute::" + attr_dict[i.fn_attr]
+        oload_to_mask = lambda oload: sum(
+            [1 << dxil_all_user_oload_chars.find(c) for c in oload]
+        )
+        oloads_fn = lambda oloads: (
+            "{" + ",".join(["{0x%x}" % m for m in oloads]) + "}"
+        )
         for i in self.instrs:
             if last_category != i.category:
                 if last_category != None:
                     print("")
-                print(
-                    "  // {category:118}  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute".format(
-                        category=i.category
-                    )
-                )
+                if not i.is_reserved:
+                    print(f"  // {i.category}")
                 last_category = i.category
+            scalar_masks = []
+            vector_masks = []
+            if i.num_oloads > 0:
+                for n, o in enumerate(i.oload_types.split(",")):
+                    if "<" in o:
+                        v = o.split("<")
+                        scalar_masks.append(oload_to_mask(v[0] + "<"))
+                        vector_masks.append(oload_to_mask(v[1]))
+                    else:
+                        scalar_masks.append(oload_to_mask(o))
+                        vector_masks.append(0)
             print(
-                "  {{  {OC}::{name:24} {quotName:27} {OCC}::{className:25} {classNameQuot:28} {{{v:>6},{h:>6},{f:>6},{d:>6},{b:>6},{e:>6},{w:>6},{i:>6},{l:>6},{u:>6},{o:>6}}}, {attr:20} }},".format(
+                (
+                    "  {{  {OC}::{name:24} {quotName:27} {OCC}::{className:25} "
+                    + "{classNameQuot:28} {attr:20}, {num_oloads}, "
+                    + "{scalar_masks:16}, {vector_masks:16} }}, "
+                    + "// Overloads: {oloads}"
+                ).format(
                     name=i.name + ",",
                     quotName='"' + i.name + '",',
                     className=i.dxil_class + ",",
                     classNameQuot='"' + lower_fn(i.dxil_class) + '",',
-                    v=f(i, "v"),
-                    h=f(i, "h"),
-                    f=f(i, "f"),
-                    d=f(i, "d"),
-                    b=f(i, "1"),
-                    e=f(i, "8"),
-                    w=f(i, "w"),
-                    i=f(i, "i"),
-                    l=f(i, "l"),
-                    u=f(i, "u"),
-                    o=f(i, "o"),
                     attr=attr_fn(i),
+                    num_oloads=i.num_oloads,
+                    scalar_masks=oloads_fn(scalar_masks),
+                    vector_masks=oloads_fn(vector_masks),
+                    oloads=i.oload_types,
                     OC=self.OC,
                     OCC=self.OCC,
                 )
@@ -599,6 +627,10 @@ def print_opfunc_table(self):
             "noderecordhandle": "A(pNodeRecordHandle);",
             "nodeproperty": "A(nodeProperty);",
             "noderecordproperty": "A(nodeRecordProperty);",
+            "hit_object": "A(pHit);",
+            # Extended overload slots, extend as needed:
+            "$x0": "EXT(0);",
+            "$x1": "EXT(1);",
         }
         last_category = None
         for i in self.instrs:
@@ -629,14 +661,24 @@ def print_opfunc_oload_type(self):
         obj_ty = "obj"
         vec_ty = "$vec"
         gsptr_ty = "$gsptr"
+        extended_ty = "$x"
         last_category = None
 
         index_dict = collections.OrderedDict()
         ptr_index_dict = collections.OrderedDict()
         single_dict = collections.OrderedDict()
+        # extended_dict collects overloads with multiple overload types
+        # grouped by the set of overload parameter indices.
+        extended_dict = collections.OrderedDict()
         struct_list = []
+        extended_list = []
 
         for instr in self.instrs:
+            if instr.num_oloads > 1:
+                # Process extended overloads separately.
+                extended_list.append(instr)
+                continue
+
             ret_ty = instr.ops[0].llvm_type
             # Skip case return type is overload type
             if ret_ty == elt_ty:
@@ -708,8 +750,7 @@ def print_opfunc_oload_type(self):
                 "i": "IntegerType::get(Ctx, 32)",
                 "l": "IntegerType::get(Ctx, 64)",
                 "v": "Type::getVoidTy(Ctx)",
-                "u": "Type::getInt32PtrTy(Ctx)",
-                "o": "Type::getInt32PtrTy(Ctx)",
+                # No other types should be referenced here.
             }
             assert ty in type_code_texts, "llvm type %s is unknown" % (ty)
             ty_code = type_code_texts[ty]
@@ -769,6 +810,61 @@ def print_opfunc_oload_type(self):
         line = line + "}"
         print(line)
 
+        for instr in extended_list:
+            # Collect indices for overloaded return and types, make a tuple of
+            # indices the key, and add the opcode to a list of opcodes for that
+            # key.  Indices start with 0 for return type, and 1 for the first
+            # function parameter, which is the DXIL OpCode.
+            indices = []
+            for index, op in enumerate(instr.ops):
+                # Skip dxil opcode.
+                if op.pos == 1:
+                    continue
+
+                op_type = op.llvm_type
+                if op_type.startswith(extended_ty):
+                    try:
+                        extended_index = int(op_type[2:])
+                    except:
+                        raise ValueError(
+                            "Error parsing extended operand type "
+                            + f"'{op_type}' for DXIL op '{instr.name}'"
+                        )
+                    if extended_index != len(indices):
+                        raise ValueError(
+                            f"'$x{extended_index}' is not in sequential "
+                            + f"order for DXIL op '{instr.name}'"
+                        )
+                    indices.append(op.pos)
+
+            if len(indices) != instr.num_oloads:
+                raise ValueError(
+                    f"DXIL op {instr.name}: extended overload count "
+                    + "mismatches the number of overload types"
+                )
+            extended_dict.setdefault(tuple(indices), []).append(instr.name)
+
+        def get_type_at_index(index):
+            if index == 0:
+                return "FT->getReturnType()"
+            return f"FT->getParamType({index - 1})"
+
+        for index_tuple, opcodes in extended_dict.items():
+            line = ""
+            for opcode in opcodes:
+                line = line + f"case OpCode::{opcode}:\n"
+            if index_tuple[-1] > 0:
+                line += (
+                    f"  if (FT->getNumParams() < {index_tuple[-1]})\n"
+                    + "    return nullptr;\n"
+                )
+            line += (
+                "  return llvm::StructType::get(Ctx, {"
+                + ", ".join([get_type_at_index(index) for index in index_tuple])
+                + "});\n"
+            )
+            print(line)
+
 
 class db_valfns_gen:
     "A generator of validation functions."
@@ -964,15 +1060,11 @@ def get_hlsl_intrinsics():
     last_ns = ""
     ns_table = ""
     is_vk_table = False  # SPIRV Change
-    id_prefix = ""
     arg_idx = 0
     opcode_namespace = db.opcode_namespace
     for i in sorted(db.intrinsics, key=lambda x: x.key):
         if last_ns != i.ns:
             last_ns = i.ns
-            id_prefix = (
-                "IOP" if last_ns == "Intrinsics" or last_ns == "VkIntrinsics" else "MOP"
-            )  # SPIRV Change
             if len(ns_table):
                 result += ns_table + "};\n"
                 # SPIRV Change Starts
@@ -989,13 +1081,24 @@ def get_hlsl_intrinsics():
                 result += "#ifdef ENABLE_SPIRV_CODEGEN\n\n"
             # SPIRV Change Ends
             arg_idx = 0
-        ns_table += "    {(UINT)%s::%s_%s, %s, %s, %s, %d, %d, g_%s_Args%s},\n" % (
+        flags = []
+        if i.readonly:
+            flags.append("INTRIN_FLAG_READ_ONLY")
+        if i.readnone:
+            flags.append("INTRIN_FLAG_READ_NONE")
+        if i.wave:
+            flags.append("INTRIN_FLAG_IS_WAVE")
+        if i.static_member:
+            flags.append("INTRIN_FLAG_STATIC_MEMBER")
+        if flags:
+            flags = " | ".join(flags)
+        else:
+            flags = "0"
+        ns_table += "    {(UINT)%s::%s, %s, 0x%x, %d, %d, g_%s_Args%s},\n" % (
             opcode_namespace,
-            id_prefix,
-            i.name,
-            str(i.readonly).lower(),
-            str(i.readnone).lower(),
-            str(i.wave).lower(),
+            i.enum_name,
+            flags,
+            i.min_shader_model,
             i.overload_param_index,
             len(i.params),
             last_ns,
@@ -1045,22 +1148,22 @@ def wrap_with_ifdef_if_vulkan_specific(intrinsic, text):
 def enum_hlsl_intrinsics():
     db = get_db_hlsl()
     result = ""
-    enumed = []
+    enumed = set()
     for i in sorted(db.intrinsics, key=lambda x: x.key):
         if i.enum_name not in enumed:
-            enumerant = "  %s,\n" % (i.enum_name)
-            result += wrap_with_ifdef_if_vulkan_specific(i, enumerant)  # SPIRV Change
-            enumed.append(i.enum_name)
+            result += "  %s = %d,\n" % (i.enum_name, i.opcode)
+            enumed.add(i.enum_name)
     # unsigned
     result += "  // unsigned\n"
 
     for i in sorted(db.intrinsics, key=lambda x: x.key):
         if i.unsigned_op != "":
             if i.unsigned_op not in enumed:
-                result += "  %s,\n" % (i.unsigned_op)
-                enumed.append(i.unsigned_op)
+                result += "  %s = %d,\n" % (i.unsigned_op, i.unsigned_opcode)
+                enumed.add(i.unsigned_op)
 
-    result += "  Num_Intrinsics,\n"
+    Num_Intrinsics = get_hlsl_opcode_data()["IntrinsicOpCodes"]["Num_Intrinsics"]
+    result += "  Num_Intrinsics = %d,\n" % (Num_Intrinsics)
     return result
 
 
@@ -1570,6 +1673,7 @@ def get_highest_released_shader_model():
     )
     return result
 
+
 def get_highest_shader_model():
     result = """static const unsigned kHighestMajor = %d;
 static const unsigned kHighestMinor = %d;""" % (
@@ -1578,6 +1682,7 @@ def get_highest_shader_model():
     )
     return result
 
+
 def get_dxil_version_minor():
     return "const unsigned kDxilMinor = %d;" % highest_minor
 
diff --git a/utils/hct/hctgen.py b/utils/hct/hctgen.py
index dbb7e3a745..1421fbfad5 100755
--- a/utils/hct/hctgen.py
+++ b/utils/hct/hctgen.py
@@ -2,6 +2,7 @@
 import argparse
 from hctdb_instrhelp import *
 from hctdb import *
+import json
 import sys
 import os
 import CodeTags
@@ -28,6 +29,7 @@
         "DxilCounters",
         "DxilMetadata",
         "RDAT_LibraryTypes",
+        "HlslIntrinsicOpcodes",
     ],
 )
 parser.add_argument("--output", required=True)
@@ -232,6 +234,14 @@ def writeDxilPIXPasses(args):
     return 0
 
 
+def writeHlslIntrinsicOpcodes(args):
+    out = openOutput(args)
+    # get_db_hlsl() initializes the hlsl intrinsic database and opcode_data.
+    get_db_hlsl()
+    json.dump(get_hlsl_opcode_data(), out, indent=2)
+    out.write("\n")
+    return 0
+
 args = parser.parse_args()
 if args.force_lf and args.force_crlf:
     eprint("--force-lf and --force-crlf are mutually exclusive, only pass one")
diff --git a/utils/hct/hlsl_intrinsic_opcodes.json b/utils/hct/hlsl_intrinsic_opcodes.json
new file mode 100644
index 0000000000..d99b84b745
--- /dev/null
+++ b/utils/hct/hlsl_intrinsic_opcodes.json
@@ -0,0 +1,395 @@
+{
+  "IntrinsicOpCodes": {
+    "Num_Intrinsics": 390,
+    "IOP_AcceptHitAndEndSearch": 0,
+    "IOP_AddUint64": 1,
+    "IOP_AllMemoryBarrier": 2,
+    "IOP_AllMemoryBarrierWithGroupSync": 3,
+    "IOP_AllocateRayQuery": 4,
+    "IOP_Barrier": 5,
+    "IOP_CallShader": 6,
+    "IOP_CheckAccessFullyMapped": 7,
+    "IOP_CreateResourceFromHeap": 8,
+    "IOP_D3DCOLORtoUBYTE4": 9,
+    "IOP_DeviceMemoryBarrier": 10,
+    "IOP_DeviceMemoryBarrierWithGroupSync": 11,
+    "IOP_DispatchMesh": 12,
+    "IOP_DispatchRaysDimensions": 13,
+    "IOP_DispatchRaysIndex": 14,
+    "IOP_EvaluateAttributeAtSample": 15,
+    "IOP_EvaluateAttributeCentroid": 16,
+    "IOP_EvaluateAttributeSnapped": 17,
+    "IOP_GeometryIndex": 18,
+    "IOP_GetAttributeAtVertex": 19,
+    "IOP_GetRemainingRecursionLevels": 20,
+    "IOP_GetRenderTargetSampleCount": 21,
+    "IOP_GetRenderTargetSamplePosition": 22,
+    "IOP_GroupMemoryBarrier": 23,
+    "IOP_GroupMemoryBarrierWithGroupSync": 24,
+    "IOP_HitKind": 25,
+    "IOP_IgnoreHit": 26,
+    "IOP_InstanceID": 27,
+    "IOP_InstanceIndex": 28,
+    "IOP_InterlockedAdd": 29,
+    "IOP_InterlockedAnd": 30,
+    "IOP_InterlockedCompareExchange": 31,
+    "IOP_InterlockedCompareExchangeFloatBitwise": 32,
+    "IOP_InterlockedCompareStore": 33,
+    "IOP_InterlockedCompareStoreFloatBitwise": 34,
+    "IOP_InterlockedExchange": 35,
+    "IOP_InterlockedMax": 36,
+    "IOP_InterlockedMin": 37,
+    "IOP_InterlockedOr": 38,
+    "IOP_InterlockedXor": 39,
+    "IOP_IsHelperLane": 40,
+    "IOP_NonUniformResourceIndex": 41,
+    "IOP_ObjectRayDirection": 42,
+    "IOP_ObjectRayOrigin": 43,
+    "IOP_ObjectToWorld": 44,
+    "IOP_ObjectToWorld3x4": 45,
+    "IOP_ObjectToWorld4x3": 46,
+    "IOP_PrimitiveIndex": 47,
+    "IOP_Process2DQuadTessFactorsAvg": 48,
+    "IOP_Process2DQuadTessFactorsMax": 49,
+    "IOP_Process2DQuadTessFactorsMin": 50,
+    "IOP_ProcessIsolineTessFactors": 51,
+    "IOP_ProcessQuadTessFactorsAvg": 52,
+    "IOP_ProcessQuadTessFactorsMax": 53,
+    "IOP_ProcessQuadTessFactorsMin": 54,
+    "IOP_ProcessTriTessFactorsAvg": 55,
+    "IOP_ProcessTriTessFactorsMax": 56,
+    "IOP_ProcessTriTessFactorsMin": 57,
+    "IOP_QuadAll": 58,
+    "IOP_QuadAny": 59,
+    "IOP_QuadReadAcrossDiagonal": 60,
+    "IOP_QuadReadAcrossX": 61,
+    "IOP_QuadReadAcrossY": 62,
+    "IOP_QuadReadLaneAt": 63,
+    "IOP_RayFlags": 64,
+    "IOP_RayTCurrent": 65,
+    "IOP_RayTMin": 66,
+    "IOP_ReportHit": 67,
+    "IOP_SetMeshOutputCounts": 68,
+    "IOP_TraceRay": 69,
+    "IOP_WaveActiveAllEqual": 70,
+    "IOP_WaveActiveAllTrue": 71,
+    "IOP_WaveActiveAnyTrue": 72,
+    "IOP_WaveActiveBallot": 73,
+    "IOP_WaveActiveBitAnd": 74,
+    "IOP_WaveActiveBitOr": 75,
+    "IOP_WaveActiveBitXor": 76,
+    "IOP_WaveActiveCountBits": 77,
+    "IOP_WaveActiveMax": 78,
+    "IOP_WaveActiveMin": 79,
+    "IOP_WaveActiveProduct": 80,
+    "IOP_WaveActiveSum": 81,
+    "IOP_WaveGetLaneCount": 82,
+    "IOP_WaveGetLaneIndex": 83,
+    "IOP_WaveIsFirstLane": 84,
+    "IOP_WaveMatch": 85,
+    "IOP_WaveMultiPrefixBitAnd": 86,
+    "IOP_WaveMultiPrefixBitOr": 87,
+    "IOP_WaveMultiPrefixBitXor": 88,
+    "IOP_WaveMultiPrefixCountBits": 89,
+    "IOP_WaveMultiPrefixProduct": 90,
+    "IOP_WaveMultiPrefixSum": 91,
+    "IOP_WavePrefixCountBits": 92,
+    "IOP_WavePrefixProduct": 93,
+    "IOP_WavePrefixSum": 94,
+    "IOP_WaveReadLaneAt": 95,
+    "IOP_WaveReadLaneFirst": 96,
+    "IOP_WorldRayDirection": 97,
+    "IOP_WorldRayOrigin": 98,
+    "IOP_WorldToObject": 99,
+    "IOP_WorldToObject3x4": 100,
+    "IOP_WorldToObject4x3": 101,
+    "IOP_abort": 102,
+    "IOP_abs": 103,
+    "IOP_acos": 104,
+    "IOP_all": 105,
+    "IOP_and": 106,
+    "IOP_any": 107,
+    "IOP_asdouble": 108,
+    "IOP_asfloat": 109,
+    "IOP_asfloat16": 110,
+    "IOP_asin": 111,
+    "IOP_asint": 112,
+    "IOP_asint16": 113,
+    "IOP_asuint": 114,
+    "IOP_asuint16": 115,
+    "IOP_atan": 116,
+    "IOP_atan2": 117,
+    "IOP_ceil": 118,
+    "IOP_clamp": 119,
+    "IOP_clip": 120,
+    "IOP_cos": 121,
+    "IOP_cosh": 122,
+    "IOP_countbits": 123,
+    "IOP_cross": 124,
+    "IOP_ddx": 125,
+    "IOP_ddx_coarse": 126,
+    "IOP_ddx_fine": 127,
+    "IOP_ddy": 128,
+    "IOP_ddy_coarse": 129,
+    "IOP_ddy_fine": 130,
+    "IOP_degrees": 131,
+    "IOP_determinant": 132,
+    "IOP_distance": 133,
+    "IOP_dot": 134,
+    "IOP_dot2add": 135,
+    "IOP_dot4add_i8packed": 136,
+    "IOP_dot4add_u8packed": 137,
+    "IOP_dst": 138,
+    "IOP_exp": 139,
+    "IOP_exp2": 140,
+    "IOP_f16tof32": 141,
+    "IOP_f32tof16": 142,
+    "IOP_faceforward": 143,
+    "IOP_firstbithigh": 144,
+    "IOP_firstbitlow": 145,
+    "IOP_floor": 146,
+    "IOP_fma": 147,
+    "IOP_fmod": 148,
+    "IOP_frac": 149,
+    "IOP_frexp": 150,
+    "IOP_fwidth": 151,
+    "IOP_isfinite": 152,
+    "IOP_isinf": 153,
+    "IOP_isnan": 154,
+    "IOP_ldexp": 155,
+    "IOP_length": 156,
+    "IOP_lerp": 157,
+    "IOP_lit": 158,
+    "IOP_log": 159,
+    "IOP_log10": 160,
+    "IOP_log2": 161,
+    "IOP_mad": 162,
+    "IOP_max": 163,
+    "IOP_min": 164,
+    "IOP_modf": 165,
+    "IOP_msad4": 166,
+    "IOP_mul": 167,
+    "IOP_normalize": 168,
+    "IOP_or": 169,
+    "IOP_pack_clamp_s8": 170,
+    "IOP_pack_clamp_u8": 171,
+    "IOP_pack_s8": 172,
+    "IOP_pack_u8": 173,
+    "IOP_pow": 174,
+    "IOP_printf": 175,
+    "IOP_radians": 176,
+    "IOP_rcp": 177,
+    "IOP_reflect": 178,
+    "IOP_refract": 179,
+    "IOP_reversebits": 180,
+    "IOP_round": 181,
+    "IOP_rsqrt": 182,
+    "IOP_saturate": 183,
+    "IOP_select": 184,
+    "IOP_sign": 185,
+    "IOP_sin": 186,
+    "IOP_sincos": 187,
+    "IOP_sinh": 188,
+    "IOP_smoothstep": 189,
+    "IOP_source_mark": 190,
+    "IOP_sqrt": 191,
+    "IOP_step": 192,
+    "IOP_tan": 193,
+    "IOP_tanh": 194,
+    "IOP_tex1D": 195,
+    "IOP_tex1Dbias": 196,
+    "IOP_tex1Dgrad": 197,
+    "IOP_tex1Dlod": 198,
+    "IOP_tex1Dproj": 199,
+    "IOP_tex2D": 200,
+    "IOP_tex2Dbias": 201,
+    "IOP_tex2Dgrad": 202,
+    "IOP_tex2Dlod": 203,
+    "IOP_tex2Dproj": 204,
+    "IOP_tex3D": 205,
+    "IOP_tex3Dbias": 206,
+    "IOP_tex3Dgrad": 207,
+    "IOP_tex3Dlod": 208,
+    "IOP_tex3Dproj": 209,
+    "IOP_texCUBE": 210,
+    "IOP_texCUBEbias": 211,
+    "IOP_texCUBEgrad": 212,
+    "IOP_texCUBElod": 213,
+    "IOP_texCUBEproj": 214,
+    "IOP_transpose": 215,
+    "IOP_trunc": 216,
+    "IOP_unpack_s8s16": 217,
+    "IOP_unpack_s8s32": 218,
+    "IOP_unpack_u8u16": 219,
+    "IOP_unpack_u8u32": 220,
+    "IOP_VkRawBufferLoad": 221,
+    "IOP_VkRawBufferStore": 222,
+    "IOP_VkReadClock": 223,
+    "IOP_Vkext_execution_mode": 224,
+    "IOP_Vkext_execution_mode_id": 225,
+    "MOP_Append": 226,
+    "MOP_RestartStrip": 227,
+    "MOP_CalculateLevelOfDetail": 228,
+    "MOP_CalculateLevelOfDetailUnclamped": 229,
+    "MOP_GetDimensions": 230,
+    "MOP_Load": 231,
+    "MOP_Sample": 232,
+    "MOP_SampleBias": 233,
+    "MOP_SampleCmp": 234,
+    "MOP_SampleCmpBias": 235,
+    "MOP_SampleCmpGrad": 236,
+    "MOP_SampleCmpLevel": 237,
+    "MOP_SampleCmpLevelZero": 238,
+    "MOP_SampleGrad": 239,
+    "MOP_SampleLevel": 240,
+    "MOP_Gather": 241,
+    "MOP_GatherAlpha": 242,
+    "MOP_GatherBlue": 243,
+    "MOP_GatherCmp": 244,
+    "MOP_GatherCmpAlpha": 245,
+    "MOP_GatherCmpBlue": 246,
+    "MOP_GatherCmpGreen": 247,
+    "MOP_GatherCmpRed": 248,
+    "MOP_GatherGreen": 249,
+    "MOP_GatherRaw": 250,
+    "MOP_GatherRed": 251,
+    "MOP_GetSamplePosition": 252,
+    "MOP_Load2": 253,
+    "MOP_Load3": 254,
+    "MOP_Load4": 255,
+    "MOP_InterlockedAdd": 256,
+    "MOP_InterlockedAdd64": 257,
+    "MOP_InterlockedAnd": 258,
+    "MOP_InterlockedAnd64": 259,
+    "MOP_InterlockedCompareExchange": 260,
+    "MOP_InterlockedCompareExchange64": 261,
+    "MOP_InterlockedCompareExchangeFloatBitwise": 262,
+    "MOP_InterlockedCompareStore": 263,
+    "MOP_InterlockedCompareStore64": 264,
+    "MOP_InterlockedCompareStoreFloatBitwise": 265,
+    "MOP_InterlockedExchange": 266,
+    "MOP_InterlockedExchange64": 267,
+    "MOP_InterlockedExchangeFloat": 268,
+    "MOP_InterlockedMax": 269,
+    "MOP_InterlockedMax64": 270,
+    "MOP_InterlockedMin": 271,
+    "MOP_InterlockedMin64": 272,
+    "MOP_InterlockedOr": 273,
+    "MOP_InterlockedOr64": 274,
+    "MOP_InterlockedXor": 275,
+    "MOP_InterlockedXor64": 276,
+    "MOP_Store": 277,
+    "MOP_Store2": 278,
+    "MOP_Store3": 279,
+    "MOP_Store4": 280,
+    "MOP_DecrementCounter": 281,
+    "MOP_IncrementCounter": 282,
+    "MOP_Consume": 283,
+    "MOP_WriteSamplerFeedback": 284,
+    "MOP_WriteSamplerFeedbackBias": 285,
+    "MOP_WriteSamplerFeedbackGrad": 286,
+    "MOP_WriteSamplerFeedbackLevel": 287,
+    "MOP_Abort": 288,
+    "MOP_CandidateGeometryIndex": 289,
+    "MOP_CandidateInstanceContributionToHitGroupIndex": 290,
+    "MOP_CandidateInstanceID": 291,
+    "MOP_CandidateInstanceIndex": 292,
+    "MOP_CandidateObjectRayDirection": 293,
+    "MOP_CandidateObjectRayOrigin": 294,
+    "MOP_CandidateObjectToWorld3x4": 295,
+    "MOP_CandidateObjectToWorld4x3": 296,
+    "MOP_CandidatePrimitiveIndex": 297,
+    "MOP_CandidateProceduralPrimitiveNonOpaque": 298,
+    "MOP_CandidateTriangleBarycentrics": 299,
+    "MOP_CandidateTriangleFrontFace": 300,
+    "MOP_CandidateTriangleRayT": 301,
+    "MOP_CandidateType": 302,
+    "MOP_CandidateWorldToObject3x4": 303,
+    "MOP_CandidateWorldToObject4x3": 304,
+    "MOP_CommitNonOpaqueTriangleHit": 305,
+    "MOP_CommitProceduralPrimitiveHit": 306,
+    "MOP_CommittedGeometryIndex": 307,
+    "MOP_CommittedInstanceContributionToHitGroupIndex": 308,
+    "MOP_CommittedInstanceID": 309,
+    "MOP_CommittedInstanceIndex": 310,
+    "MOP_CommittedObjectRayDirection": 311,
+    "MOP_CommittedObjectRayOrigin": 312,
+    "MOP_CommittedObjectToWorld3x4": 313,
+    "MOP_CommittedObjectToWorld4x3": 314,
+    "MOP_CommittedPrimitiveIndex": 315,
+    "MOP_CommittedRayT": 316,
+    "MOP_CommittedStatus": 317,
+    "MOP_CommittedTriangleBarycentrics": 318,
+    "MOP_CommittedTriangleFrontFace": 319,
+    "MOP_CommittedWorldToObject3x4": 320,
+    "MOP_CommittedWorldToObject4x3": 321,
+    "MOP_Proceed": 322,
+    "MOP_RayFlags": 323,
+    "MOP_RayTMin": 324,
+    "MOP_TraceRayInline": 325,
+    "MOP_WorldRayDirection": 326,
+    "MOP_WorldRayOrigin": 327,
+    "MOP_Count": 328,
+    "MOP_FinishedCrossGroupSharing": 329,
+    "MOP_GetGroupNodeOutputRecords": 330,
+    "MOP_GetThreadNodeOutputRecords": 331,
+    "MOP_IsValid": 332,
+    "MOP_GroupIncrementOutputCount": 333,
+    "MOP_ThreadIncrementOutputCount": 334,
+    "MOP_OutputComplete": 335,
+    "MOP_SubpassLoad": 336,
+    "IOP_InterlockedUMax": 337,
+    "IOP_InterlockedUMin": 338,
+    "IOP_WaveActiveUMax": 339,
+    "IOP_WaveActiveUMin": 340,
+    "IOP_WaveActiveUProduct": 341,
+    "IOP_WaveActiveUSum": 342,
+    "IOP_WaveMultiPrefixUProduct": 343,
+    "IOP_WaveMultiPrefixUSum": 344,
+    "IOP_WavePrefixUProduct": 345,
+    "IOP_WavePrefixUSum": 346,
+    "IOP_uabs": 347,
+    "IOP_uclamp": 348,
+    "IOP_udot": 349,
+    "IOP_ufirstbithigh": 350,
+    "IOP_umad": 351,
+    "IOP_umax": 352,
+    "IOP_umin": 353,
+    "IOP_umul": 354,
+    "IOP_usign": 355,
+    "MOP_InterlockedUMax": 356,
+    "MOP_InterlockedUMin": 357,
+    "MOP_DxHitObject_MakeNop": 358,
+    "IOP_DxMaybeReorderThread": 359,
+    "IOP_Vkreinterpret_pointer_cast": 360,
+    "IOP_Vkstatic_pointer_cast": 361,
+    "MOP_GetBufferContents": 362,
+    "MOP_DxHitObject_FromRayQuery": 363,
+    "MOP_DxHitObject_GetAttributes": 364,
+    "MOP_DxHitObject_GetGeometryIndex": 365,
+    "MOP_DxHitObject_GetHitKind": 366,
+    "MOP_DxHitObject_GetInstanceID": 367,
+    "MOP_DxHitObject_GetInstanceIndex": 368,
+    "MOP_DxHitObject_GetObjectRayDirection": 369,
+    "MOP_DxHitObject_GetObjectRayOrigin": 370,
+    "MOP_DxHitObject_GetObjectToWorld3x4": 371,
+    "MOP_DxHitObject_GetObjectToWorld4x3": 372,
+    "MOP_DxHitObject_GetPrimitiveIndex": 373,
+    "MOP_DxHitObject_GetRayFlags": 374,
+    "MOP_DxHitObject_GetRayTCurrent": 375,
+    "MOP_DxHitObject_GetRayTMin": 376,
+    "MOP_DxHitObject_GetShaderTableIndex": 377,
+    "MOP_DxHitObject_GetWorldRayDirection": 378,
+    "MOP_DxHitObject_GetWorldRayOrigin": 379,
+    "MOP_DxHitObject_GetWorldToObject3x4": 380,
+    "MOP_DxHitObject_GetWorldToObject4x3": 381,
+    "MOP_DxHitObject_Invoke": 382,
+    "MOP_DxHitObject_IsHit": 383,
+    "MOP_DxHitObject_IsMiss": 384,
+    "MOP_DxHitObject_IsNop": 385,
+    "MOP_DxHitObject_LoadLocalRootTableConstant": 386,
+    "MOP_DxHitObject_MakeMiss": 387,
+    "MOP_DxHitObject_SetShaderTableIndex": 388,
+    "MOP_DxHitObject_TraceRay": 389
+  }
+}